npm - @kibhq/core - Versions diffs - 0.1.0 - Mend

@kibhq/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/package.json +40 -0
package/src/compile/backlinks.test.ts +112 -0
package/src/compile/backlinks.ts +80 -0
package/src/compile/cache.test.ts +126 -0
package/src/compile/cache.ts +125 -0
package/src/compile/compiler.test.ts +278 -0
package/src/compile/compiler.ts +305 -0
package/src/compile/diff.test.ts +164 -0
package/src/compile/diff.ts +121 -0
package/src/compile/index-manager.test.ts +227 -0
package/src/compile/index-manager.ts +148 -0
package/src/compile/prompts.ts +124 -0
package/src/constants.ts +40 -0
package/src/errors.ts +66 -0
package/src/hash.test.ts +21 -0
package/src/hash.ts +24 -0
package/src/index.ts +22 -0
package/src/ingest/extractors/file.test.ts +129 -0
package/src/ingest/extractors/file.ts +136 -0
package/src/ingest/extractors/github.test.ts +47 -0
package/src/ingest/extractors/github.ts +135 -0
package/src/ingest/extractors/interface.ts +26 -0
package/src/ingest/extractors/pdf.ts +130 -0
package/src/ingest/extractors/web.test.ts +242 -0
package/src/ingest/extractors/web.ts +163 -0
package/src/ingest/extractors/youtube.test.ts +44 -0
package/src/ingest/extractors/youtube.ts +166 -0
package/src/ingest/ingest.test.ts +187 -0
package/src/ingest/ingest.ts +179 -0
package/src/ingest/normalize.test.ts +120 -0
package/src/ingest/normalize.ts +83 -0
package/src/ingest/router.test.ts +154 -0
package/src/ingest/router.ts +119 -0
package/src/lint/lint.test.ts +253 -0
package/src/lint/lint.ts +43 -0
package/src/lint/rules.ts +178 -0
package/src/providers/anthropic.ts +107 -0
package/src/providers/index.ts +4 -0
package/src/providers/ollama.ts +101 -0
package/src/providers/openai.ts +67 -0
package/src/providers/router.ts +62 -0
package/src/query/query.test.ts +165 -0
package/src/query/query.ts +136 -0
package/src/schemas.ts +193 -0
package/src/search/engine.test.ts +230 -0
package/src/search/engine.ts +390 -0
package/src/skills/loader.ts +163 -0
package/src/skills/runner.ts +139 -0
package/src/skills/schema.ts +28 -0
package/src/skills/skills.test.ts +134 -0
package/src/types.ts +136 -0
package/src/vault.test.ts +141 -0
package/src/vault.ts +251 -0

package/src/ingest/normalize.ts ADDED Viewed

@@ -0,0 +1,83 @@
+import type { SourceType } from "../types.js";
+interface NormalizeInput {
+	title: string;
+	content: string;
+	sourceType: SourceType;
+	originalUrl?: string;
+	metadata?: Record<string, unknown>;
+}
+/**
+ * Normalize extracted content into a consistent raw markdown format with frontmatter.
+ */
+export function normalizeSource(input: NormalizeInput): string {
+	const now = new Date().toISOString().split("T")[0]; // YYYY-MM-DD
+	const wordCount = countWords(input.content);
+	const frontmatter = [
+		"---",
+		`title: "${escapeFrontmatter(input.title)}"`,
+		`source_type: ${input.sourceType}`,
+	];
+	if (input.originalUrl) {
+		frontmatter.push(`url: "${input.originalUrl}"`);
+	}
+	if (input.metadata?.author) {
+		frontmatter.push(`author: "${escapeFrontmatter(String(input.metadata.author))}"`);
+	}
+	if (input.metadata?.date) {
+		frontmatter.push(`date: "${input.metadata.date}"`);
+	}
+	frontmatter.push(`ingested: "${now}"`);
+	frontmatter.push(`word_count: ${wordCount}`);
+	frontmatter.push("---");
+	return `${frontmatter.join("\n")}\n\n${cleanMarkdown(input.content)}`;
+}
+/**
+ * Generate a filesystem-safe slug from a title.
+ */
+export function slugify(title: string): string {
+	return title
+		.toLowerCase()
+		.replace(/[^a-z0-9\s-]/g, "")
+		.replace(/\s+/g, "-")
+		.replace(/-+/g, "-")
+		.replace(/^-|-$/g, "")
+		.slice(0, 80);
+}
+/**
+ * Count words in a string.
+ */
+export function countWords(text: string): number {
+	return text
+		.replace(/```[\s\S]*?```/g, "") // strip code blocks
+		.replace(/`[^`]*`/g, "") // strip inline code
+		.replace(/---[\s\S]*?---/g, "") // strip frontmatter
+		.replace(/[#*_[\]()>|]/g, " ") // strip markdown syntax
+		.split(/\s+/)
+		.filter((w) => w.length > 0).length;
+}
+function escapeFrontmatter(str: string): string {
+	return str.replace(/"/g, '\\"').replace(/\n/g, " ");
+}
+function cleanMarkdown(content: string): string {
+	return (
+		content
+			// Normalize line endings
+			.replace(/\r\n/g, "\n")
+			// Remove excessive blank lines (3+ → 2)
+			.replace(/\n{3,}/g, "\n\n")
+			// Trim
+			.trim()
+	);
+}

package/src/ingest/router.test.ts ADDED Viewed

@@ -0,0 +1,154 @@
+import { describe, expect, test } from "bun:test";
+import { detectSourceType } from "./router.js";
+describe("detectSourceType", () => {
+	describe("web URLs", () => {
+		test("generic https URL → web", () => {
+			expect(detectSourceType("https://example.com/article")).toBe("web");
+		});
+		test("http URL → web", () => {
+			expect(detectSourceType("http://blog.example.com/post")).toBe("web");
+		});
+		test("URL with query params → web", () => {
+			expect(detectSourceType("https://example.com/page?id=123")).toBe("web");
+		});
+		test("URL with fragment → web", () => {
+			expect(detectSourceType("https://docs.example.com/guide#section")).toBe("web");
+		});
+	});
+	describe("YouTube URLs", () => {
+		test("youtube.com/watch → youtube", () => {
+			expect(detectSourceType("https://www.youtube.com/watch?v=dQw4w9WgXcQ")).toBe("youtube");
+		});
+		test("youtu.be short URL → youtube", () => {
+			expect(detectSourceType("https://youtu.be/dQw4w9WgXcQ")).toBe("youtube");
+		});
+		test("m.youtube.com → youtube", () => {
+			expect(detectSourceType("https://m.youtube.com/watch?v=abc123")).toBe("youtube");
+		});
+		test("youtube.com without www → youtube", () => {
+			expect(detectSourceType("https://youtube.com/watch?v=abc123")).toBe("youtube");
+		});
+		test("youtube playlist → youtube", () => {
+			expect(detectSourceType("https://www.youtube.com/playlist?list=PLrAXtmErZgOe")).toBe(
+				"youtube",
+			);
+		});
+	});
+	describe("GitHub URLs", () => {
+		test("github.com repo → github", () => {
+			expect(detectSourceType("https://github.com/anthropics/claude-code")).toBe("github");
+		});
+		test("github.com repo with path → github", () => {
+			expect(detectSourceType("https://github.com/anthropics/claude-code/tree/main/src")).toBe(
+				"github",
+			);
+		});
+		test("github.com profile only (1 part) → web", () => {
+			expect(detectSourceType("https://github.com/anthropics")).toBe("web");
+		});
+		test("github.com root → web", () => {
+			expect(detectSourceType("https://github.com")).toBe("web");
+		});
+	});
+	describe("PDF URLs", () => {
+		test("URL ending in .pdf → pdf", () => {
+			expect(detectSourceType("https://example.com/paper.pdf")).toBe("pdf");
+		});
+		test("arxiv PDF URL → pdf", () => {
+			expect(detectSourceType("https://arxiv.org/pdf/1706.03762")).toBe("pdf");
+		});
+		test("arxiv abstract (not PDF) → web", () => {
+			expect(detectSourceType("https://arxiv.org/abs/1706.03762")).toBe("web");
+		});
+	});
+	describe("image URLs", () => {
+		test("URL ending in .png → image", () => {
+			expect(detectSourceType("https://example.com/diagram.png")).toBe("image");
+		});
+		test("URL ending in .jpg → image", () => {
+			expect(detectSourceType("https://example.com/photo.jpg")).toBe("image");
+		});
+		test("URL ending in .webp → image", () => {
+			expect(detectSourceType("https://example.com/hero.webp")).toBe("image");
+		});
+	});
+	describe("local file paths", () => {
+		test(".md → file", () => {
+			expect(detectSourceType("./notes/paper.md")).toBe("file");
+		});
+		test(".txt → file", () => {
+			expect(detectSourceType("/home/user/doc.txt")).toBe("file");
+		});
+		test(".pdf → pdf", () => {
+			expect(detectSourceType("./papers/attention.pdf")).toBe("pdf");
+		});
+		test(".png → image", () => {
+			expect(detectSourceType("/tmp/whiteboard.png")).toBe("image");
+		});
+		test(".jpg → image", () => {
+			expect(detectSourceType("photo.jpg")).toBe("image");
+		});
+		test(".ts → file", () => {
+			expect(detectSourceType("./src/index.ts")).toBe("file");
+		});
+		test(".py → file", () => {
+			expect(detectSourceType("script.py")).toBe("file");
+		});
+		test(".html → file", () => {
+			expect(detectSourceType("page.html")).toBe("file");
+		});
+		test(".json → file", () => {
+			expect(detectSourceType("data.json")).toBe("file");
+		});
+		test("no extension → file", () => {
+			expect(detectSourceType("Makefile")).toBe("file");
+		});
+		test("unknown extension → file", () => {
+			expect(detectSourceType("data.xyz")).toBe("file");
+		});
+	});
+	describe("edge cases", () => {
+		test("trims whitespace", () => {
+			expect(detectSourceType("  https://example.com  ")).toBe("web");
+		});
+		test("case insensitive for file extensions", () => {
+			expect(detectSourceType("PAPER.PDF")).toBe("pdf");
+		});
+		test("case insensitive for image extensions", () => {
+			expect(detectSourceType("PHOTO.PNG")).toBe("image");
+		});
+	});
+});

package/src/ingest/router.ts ADDED Viewed

@@ -0,0 +1,119 @@
+import type { SourceType } from "../types.js";
+/**
+ * Detect the source type from a URI string (URL or file path).
+ */
+export function detectSourceType(uri: string): SourceType {
+	// Normalize
+	const trimmed = uri.trim();
+	// URL-based detection
+	if (isUrl(trimmed)) {
+		const url = new URL(trimmed);
+		const hostname = url.hostname.toLowerCase();
+		const pathname = url.pathname.toLowerCase();
+		// YouTube
+		if (
+			hostname === "youtube.com" ||
+			hostname === "www.youtube.com" ||
+			hostname === "m.youtube.com" ||
+			hostname === "youtu.be"
+		) {
+			return "youtube";
+		}
+		// GitHub
+		if (hostname === "github.com" || hostname === "www.github.com") {
+			// Only match repo URLs (owner/repo), not arbitrary github pages
+			const parts = pathname.split("/").filter(Boolean);
+			if (parts.length >= 2) {
+				return "github";
+			}
+		}
+		// PDF (URL ending in .pdf or common academic PDF hosts)
+		if (pathname.endsWith(".pdf")) {
+			return "pdf";
+		}
+		// ArXiv — these serve PDFs at /pdf/ paths
+		if (hostname === "arxiv.org" && pathname.startsWith("/pdf/")) {
+			return "pdf";
+		}
+		// Image URLs
+		if (isImagePath(pathname)) {
+			return "image";
+		}
+		// Default: web page
+		return "web";
+	}
+	// Local file-based detection
+	const lower = trimmed.toLowerCase();
+	if (lower.endsWith(".pdf")) {
+		return "pdf";
+	}
+	if (isImagePath(lower)) {
+		return "image";
+	}
+	if (
+		lower.endsWith(".md") ||
+		lower.endsWith(".txt") ||
+		lower.endsWith(".rst") ||
+		lower.endsWith(".org") ||
+		lower.endsWith(".html") ||
+		lower.endsWith(".htm") ||
+		lower.endsWith(".json") ||
+		lower.endsWith(".csv") ||
+		lower.endsWith(".xml") ||
+		lower.endsWith(".yaml") ||
+		lower.endsWith(".yml") ||
+		lower.endsWith(".toml")
+	) {
+		return "file";
+	}
+	// Source code files
+	if (
+		lower.endsWith(".ts") ||
+		lower.endsWith(".js") ||
+		lower.endsWith(".py") ||
+		lower.endsWith(".go") ||
+		lower.endsWith(".rs") ||
+		lower.endsWith(".java") ||
+		lower.endsWith(".c") ||
+		lower.endsWith(".cpp") ||
+		lower.endsWith(".h") ||
+		lower.endsWith(".rb") ||
+		lower.endsWith(".sh") ||
+		lower.endsWith(".sql")
+	) {
+		return "file";
+	}
+	// If no extension or unrecognized, treat as file
+	return "file";
+}
+function isUrl(str: string): boolean {
+	return str.startsWith("http://") || str.startsWith("https://");
+}
+function isImagePath(path: string): boolean {
+	return (
+		path.endsWith(".png") ||
+		path.endsWith(".jpg") ||
+		path.endsWith(".jpeg") ||
+		path.endsWith(".gif") ||
+		path.endsWith(".webp") ||
+		path.endsWith(".svg") ||
+		path.endsWith(".bmp") ||
+		path.endsWith(".tiff")
+	);
+}

package/src/lint/lint.test.ts ADDED Viewed

@@ -0,0 +1,253 @@
+import { afterEach, describe, expect, test } from "bun:test";
+import { writeFile as fsWriteFile, mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { ingestSource } from "../ingest/ingest.js";
+import type { Manifest } from "../types.js";
+import { initVault, loadManifest, saveManifest, writeWiki } from "../vault.js";
+import { lintVault } from "./lint.js";
+let tempDir: string;
+afterEach(async () => {
+	if (tempDir) await rm(tempDir, { recursive: true, force: true });
+});
+async function makeTempVault() {
+	tempDir = await mkdtemp(join(tmpdir(), "kib-lint-test-"));
+	await initVault(tempDir, { name: "test" });
+	return tempDir;
+}
+function articleMd(opts: { title: string; slug: string; category: string; body?: string }) {
+	return `---
+title: ${opts.title}
+slug: ${opts.slug}
+category: ${opts.category}
+tags: []
+summary: ""
+---
+# ${opts.title}
+${opts.body ?? "Some content."}`;
+}
+describe("lint engine", () => {
+	test("reports no issues for healthy vault", async () => {
+		const root = await makeTempVault();
+		// Create articles with cross-links
+		await writeWiki(
+			root,
+			"concepts/alpha.md",
+			articleMd({
+				title: "Alpha",
+				slug: "alpha",
+				category: "concept",
+				body: "See [[beta]].",
+			}),
+		);
+		await writeWiki(
+			root,
+			"concepts/beta.md",
+			articleMd({
+				title: "Beta",
+				slug: "beta",
+				category: "concept",
+				body: "See [[alpha]].",
+			}),
+		);
+		// Update manifest to include articles with backlinks
+		const manifest = await loadManifest(root);
+		manifest.articles["alpha"] = {
+			hash: "a",
+			createdAt: new Date().toISOString(),
+			lastUpdated: new Date().toISOString(),
+			derivedFrom: [],
+			backlinks: ["beta"],
+			forwardLinks: ["beta"],
+			tags: [],
+			summary: "",
+			wordCount: 10,
+			category: "concept",
+		};
+		manifest.articles["beta"] = {
+			hash: "b",
+			createdAt: new Date().toISOString(),
+			lastUpdated: new Date().toISOString(),
+			derivedFrom: [],
+			backlinks: ["alpha"],
+			forwardLinks: ["alpha"],
+			tags: [],
+			summary: "",
+			wordCount: 10,
+			category: "concept",
+		};
+		await saveManifest(root, manifest);
+		const result = await lintVault(root);
+		expect(result.errors).toBe(0);
+		// May have warnings (orphan detection depends on exact backlink setup)
+	});
+	test("detects orphan articles", async () => {
+		const root = await makeTempVault();
+		await writeWiki(
+			root,
+			"concepts/orphan.md",
+			articleMd({ title: "Orphan", slug: "orphan", category: "concept" }),
+		);
+		// Add to manifest with no backlinks
+		const manifest = await loadManifest(root);
+		manifest.articles["orphan"] = {
+			hash: "o",
+			createdAt: new Date().toISOString(),
+			lastUpdated: new Date().toISOString(),
+			derivedFrom: [],
+			backlinks: [],
+			forwardLinks: [],
+			tags: [],
+			summary: "",
+			wordCount: 10,
+			category: "concept",
+		};
+		await saveManifest(root, manifest);
+		const result = await lintVault(root, { ruleFilter: "orphan" });
+		expect(result.warnings).toBeGreaterThan(0);
+		expect(result.diagnostics.some((d) => d.rule === "orphan")).toBe(true);
+	});
+	test("detects broken wikilinks", async () => {
+		const root = await makeTempVault();
+		await writeWiki(
+			root,
+			"concepts/test.md",
+			articleMd({
+				title: "Test",
+				slug: "test",
+				category: "concept",
+				body: "See [[nonexistent-article]].",
+			}),
+		);
+		const manifest = await loadManifest(root);
+		manifest.articles["test"] = {
+			hash: "t",
+			createdAt: new Date().toISOString(),
+			lastUpdated: new Date().toISOString(),
+			derivedFrom: [],
+			backlinks: [],
+			forwardLinks: ["nonexistent-article"],
+			tags: [],
+			summary: "",
+			wordCount: 10,
+			category: "concept",
+		};
+		await saveManifest(root, manifest);
+		const result = await lintVault(root, { ruleFilter: "broken-link" });
+		expect(result.errors).toBeGreaterThan(0);
+		expect(
+			result.diagnostics.some(
+				(d) => d.rule === "broken-link" && d.message.includes("nonexistent-article"),
+			),
+		).toBe(true);
+	});
+	test("detects stale sources", async () => {
+		const root = await makeTempVault();
+		// Ingest a source (it won't be compiled)
+		const testFile = join(root, "article.md");
+		await fsWriteFile(testFile, "# Test\n\nContent.");
+		await ingestSource(root, testFile);
+		const result = await lintVault(root, { ruleFilter: "stale" });
+		expect(result.warnings).toBeGreaterThan(0);
+		expect(result.diagnostics.some((d) => d.rule === "stale")).toBe(true);
+	});
+	test("detects missing frontmatter", async () => {
+		const root = await makeTempVault();
+		// Write article without frontmatter
+		await writeWiki(root, "concepts/nofm.md", "# No Frontmatter\n\nJust content.");
+		const result = await lintVault(root, { ruleFilter: "frontmatter" });
+		expect(result.errors).toBeGreaterThan(0);
+		expect(
+			result.diagnostics.some(
+				(d) => d.rule === "frontmatter" && d.message.includes("Missing YAML"),
+			),
+		).toBe(true);
+	});
+	test("detects missing required frontmatter fields", async () => {
+		const root = await makeTempVault();
+		// Write article with partial frontmatter (missing slug)
+		await writeWiki(root, "concepts/partial.md", "---\ntitle: Partial\n---\n\nContent.");
+		const result = await lintVault(root, { ruleFilter: "frontmatter" });
+		expect(result.errors).toBeGreaterThan(0);
+		expect(
+			result.diagnostics.some((d) => d.rule === "frontmatter" && d.message.includes("slug")),
+		).toBe(true);
+	});
+	test("filters by specific rule", async () => {
+		const root = await makeTempVault();
+		// Set up conditions that would trigger multiple rules
+		await writeWiki(root, "concepts/test.md", "# No Frontmatter");
+		// Ingest but don't compile
+		const testFile = join(root, "source.md");
+		await fsWriteFile(testFile, "# Source");
+		await ingestSource(root, testFile);
+		// Only run frontmatter check
+		const result = await lintVault(root, { ruleFilter: "frontmatter" });
+		expect(result.diagnostics.every((d) => d.rule === "frontmatter")).toBe(true);
+	});
+	test("handles empty vault", async () => {
+		const root = await makeTempVault();
+		const result = await lintVault(root);
+		expect(result.diagnostics).toHaveLength(0);
+	});
+	test("outputs skip orphan for output category", async () => {
+		const root = await makeTempVault();
+		await writeWiki(
+			root,
+			"outputs/query-result.md",
+			articleMd({ title: "Query Result", slug: "query-result", category: "output" }),
+		);
+		const manifest = await loadManifest(root);
+		manifest.articles["query-result"] = {
+			hash: "q",
+			createdAt: new Date().toISOString(),
+			lastUpdated: new Date().toISOString(),
+			derivedFrom: [],
+			backlinks: [], // No backlinks — but it's an output, so should NOT be orphan
+			forwardLinks: [],
+			tags: [],
+			summary: "",
+			wordCount: 10,
+			category: "output",
+		};
+		await saveManifest(root, manifest);
+		const result = await lintVault(root, { ruleFilter: "orphan" });
+		expect(result.diagnostics.filter((d) => d.rule === "orphan")).toHaveLength(0);
+	});
+});

package/src/lint/lint.ts ADDED Viewed

@@ -0,0 +1,43 @@
+import type { LintDiagnostic, Manifest } from "../types.js";
+import { loadManifest } from "../vault.js";
+import { ALL_RULES } from "./rules.js";
+export interface LintOptions {
+	/** Run only a specific rule */
+	ruleFilter?: string;
+	/** Callback for progress updates */
+	onProgress?: (msg: string) => void;
+}
+export interface LintResult {
+	diagnostics: LintDiagnostic[];
+	errors: number;
+	warnings: number;
+	infos: number;
+}
+/**
+ * Run lint checks on the wiki.
+ */
+export async function lintVault(root: string, options: LintOptions = {}): Promise<LintResult> {
+	const manifest = await loadManifest(root);
+	const rules = options.ruleFilter
+		? ALL_RULES.filter((r) => r.name === options.ruleFilter)
+		: ALL_RULES;
+	const allDiagnostics: LintDiagnostic[] = [];
+	for (const rule of rules) {
+		options.onProgress?.(`Running ${rule.name} check...`);
+		const diagnostics = await rule.fn(root, manifest);
+		allDiagnostics.push(...diagnostics);
+	}
+	return {
+		diagnostics: allDiagnostics,
+		errors: allDiagnostics.filter((d) => d.severity === "error").length,
+		warnings: allDiagnostics.filter((d) => d.severity === "warning").length,
+		infos: allDiagnostics.filter((d) => d.severity === "info").length,
+	};
+}