llm-kb 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/cli.ts ADDED
@@ -0,0 +1,132 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { Command } from "commander";
4
+ import { scan, summarize } from "./scan.js";
5
+ import { parsePDF } from "./pdf.js";
6
+ import { buildIndex } from "./indexer.js";
7
+ import { startWatcher } from "./watcher.js";
8
+ import { query } from "./query.js";
9
+ import { resolveKnowledgeBase } from "./resolve-kb.js";
10
+ import { existsSync } from "node:fs";
11
+ import { mkdir } from "node:fs/promises";
12
+ import { resolve, join } from "node:path";
13
+ import chalk from "chalk";
14
+
15
+ const program = new Command();
16
+
17
+ program
18
+ .name("llm-kb")
19
+ .description("Drop files into a folder. Get a knowledge base you can query.")
20
+ .version("0.2.0");
21
+
22
+ program
23
+ .command("run")
24
+ .description("Scan, parse, index, and watch a folder")
25
+ .argument("<folder>", "Path to your documents folder")
26
+ .action(async (folder: string) => {
27
+ console.log(`\n${chalk.bold("llm-kb")} v0.2.0\n`);
28
+
29
+ if (!existsSync(folder)) {
30
+ console.error(chalk.red(`Error: Folder not found: ${folder}`));
31
+ process.exit(1);
32
+ }
33
+
34
+ console.log(`Scanning ${folder}...`);
35
+
36
+ const files = await scan(folder);
37
+
38
+ if (files.length === 0) {
39
+ console.log(chalk.yellow(" No supported files found."));
40
+ return;
41
+ }
42
+
43
+ const pdfs = files.filter((f) => f.ext === ".pdf");
44
+ console.log(` Found ${chalk.bold(files.length.toString())} files (${summarize(files)})`);
45
+ if (pdfs.length === 0) return;
46
+
47
+ // Set up .llm-kb folder structure
48
+ const root = resolve(folder);
49
+ const sourcesDir = join(root, ".llm-kb", "wiki", "sources");
50
+ await mkdir(sourcesDir, { recursive: true });
51
+
52
+ // Parse PDFs with inline progress
53
+ let parsed = 0;
54
+ let skipped = 0;
55
+ let failed = 0;
56
+ const errors: { name: string; message: string }[] = [];
57
+
58
+ for (let i = 0; i < pdfs.length; i++) {
59
+ const pdf = pdfs[i];
60
+ const fullPath = join(root, pdf.path);
61
+
62
+ // Inline progress — overwrite same line
63
+ const progress = ` Parsing... ${i + 1}/${pdfs.length} — ${pdf.name}`;
64
+ process.stdout.write(`\r${progress.padEnd(80)}`);
65
+
66
+ try {
67
+ const result = await parsePDF(fullPath, sourcesDir);
68
+ if (result.skipped) {
69
+ skipped++;
70
+ } else {
71
+ parsed++;
72
+ }
73
+ } catch (err: any) {
74
+ failed++;
75
+ errors.push({ name: pdf.name, message: err.message });
76
+ }
77
+ }
78
+
79
+ // Clear progress line
80
+ process.stdout.write(`\r${"".padEnd(80)}\r`);
81
+
82
+ // Summary
83
+ const parts: string[] = [];
84
+ if (parsed > 0) parts.push(chalk.green(`${parsed} parsed`));
85
+ if (skipped > 0) parts.push(chalk.dim(`${skipped} skipped (up to date)`));
86
+ if (failed > 0) parts.push(chalk.red(`${failed} failed`));
87
+ console.log(` ${parts.join(", ")}`);
88
+
89
+ // Show errors
90
+ for (const err of errors) {
91
+ console.log(chalk.red(` ✗ ${err.name} — ${err.message}`));
92
+ }
93
+
94
+ // Build index
95
+ console.log(`\n Building index...`);
96
+ try {
97
+ await buildIndex(root, sourcesDir);
98
+ console.log(chalk.green(` Index built: .llm-kb/wiki/index.md`));
99
+ } catch (err: any) {
100
+ console.error(chalk.red(` Index failed: ${err.message}`));
101
+ }
102
+
103
+ console.log(`\n ${chalk.dim("Output:")} ${sourcesDir}`);
104
+
105
+ // Start watching for new files
106
+ console.log(chalk.dim(`\n Watching for new files... (Ctrl+C to stop)`));
107
+ startWatcher({ folder: root, sourcesDir });
108
+ });
109
+
110
+ program
111
+ .command("query")
112
+ .description("Ask a question across your knowledge base")
113
+ .argument("<question>", "Your question")
114
+ .option("--folder <path>", "Path to document folder (auto-detects if omitted)")
115
+ .option("--save", "Save the answer to wiki/outputs/ (research mode)")
116
+ .action(async (question: string, options: { folder?: string; save?: boolean }) => {
117
+ const root = resolveKnowledgeBase(options.folder || process.cwd());
118
+
119
+ if (!root) {
120
+ console.error(chalk.red("No knowledge base found. Run 'llm-kb run <folder>' first."));
121
+ process.exit(1);
122
+ }
123
+
124
+ try {
125
+ await query(root, question, { save: options.save });
126
+ } catch (err: any) {
127
+ console.error(chalk.red(err.message));
128
+ process.exit(1);
129
+ }
130
+ });
131
+
132
+ program.parse();
package/src/indexer.ts ADDED
@@ -0,0 +1,148 @@
1
+ import {
2
+ createAgentSession,
3
+ createBashTool,
4
+ createReadTool,
5
+ createWriteTool,
6
+ DefaultResourceLoader,
7
+ SessionManager,
8
+ SettingsManager,
9
+ } from "@mariozechner/pi-coding-agent";
10
+ import { readdir, readFile } from "node:fs/promises";
11
+ import { join, dirname } from "node:path";
12
+ import { fileURLToPath } from "node:url";
13
+
14
+ const __filename = fileURLToPath(import.meta.url);
15
+ const __dirname = dirname(__filename);
16
+
17
+ /**
18
+ * Find the node_modules directory for llm-kb's bundled libraries.
19
+ * When running from bin/cli.js, node_modules is at ../node_modules.
20
+ */
21
+ function getNodeModulesPath(): string {
22
+ // Walk up from this file to find node_modules
23
+ let dir = __dirname;
24
+ for (let i = 0; i < 5; i++) {
25
+ const candidate = join(dir, "node_modules");
26
+ try {
27
+ return candidate;
28
+ } catch {
29
+ dir = dirname(dir);
30
+ }
31
+ }
32
+ return join(process.cwd(), "node_modules");
33
+ }
34
+
35
+ function buildAgentsContent(sourcesDir: string, files: string[]): string {
36
+ const sourceList = files
37
+ .filter((f) => f.endsWith(".md"))
38
+ .map((f) => ` - ${f}`)
39
+ .join("\n");
40
+
41
+ return `# llm-kb Knowledge Base
42
+
43
+ ## How to access documents
44
+
45
+ ### PDFs (pre-parsed)
46
+ PDFs have been parsed to markdown with bounding boxes.
47
+ Read the markdown versions in \`.llm-kb/wiki/sources/\` instead of the raw PDFs.
48
+
49
+ Available parsed sources:
50
+ ${sourceList}
51
+
52
+ ### Other file types (Excel, Word, PowerPoint, CSV, images)
53
+ You have bash and read tools. These libraries are pre-installed and available:
54
+ - **exceljs** — for .xlsx/.xls files
55
+ - **mammoth** — for .docx files
56
+ - **officeparser** — for .pptx files
57
+ - **csv-parse** — built into Node.js, use fs + split for .csv
58
+
59
+ Write a quick Node.js script to extract content when needed.
60
+
61
+ ## Index file
62
+ Write the index to \`.llm-kb/wiki/index.md\`.
63
+
64
+ The index should be a markdown file with:
65
+ 1. A title and last-updated timestamp
66
+ 2. A summary table with columns: Source, Type, Pages/Size, Summary, Key Topics
67
+ 3. Each source gets a one-line summary (read the first ~500 chars of each file to generate it)
68
+ 4. Total word count across all sources
69
+ `;
70
+ }
71
+
72
+ export async function buildIndex(
73
+ folder: string,
74
+ sourcesDir: string,
75
+ onOutput?: (text: string) => void
76
+ ): Promise<string> {
77
+ // List source files
78
+ const files = await readdir(sourcesDir);
79
+ const mdFiles = files.filter((f) => f.endsWith(".md"));
80
+
81
+ if (mdFiles.length === 0) {
82
+ throw new Error("No source files found to index");
83
+ }
84
+
85
+ // Build AGENTS.md content
86
+ const agentsContent = buildAgentsContent(sourcesDir, files);
87
+
88
+ // Set NODE_PATH so agent's bash scripts can use bundled libraries
89
+ const nodeModulesPath = getNodeModulesPath();
90
+ process.env.NODE_PATH = nodeModulesPath;
91
+
92
+ const loader = new DefaultResourceLoader({
93
+ cwd: folder,
94
+ agentsFilesOverride: (current) => ({
95
+ agentsFiles: [
96
+ ...current.agentsFiles,
97
+ { path: ".llm-kb/AGENTS.md", content: agentsContent },
98
+ ],
99
+ }),
100
+ });
101
+ await loader.reload();
102
+
103
+ const { session } = await createAgentSession({
104
+ cwd: folder,
105
+ resourceLoader: loader,
106
+ tools: [
107
+ createReadTool(folder),
108
+ createBashTool(folder),
109
+ createWriteTool(folder),
110
+ ],
111
+ sessionManager: SessionManager.inMemory(),
112
+ settingsManager: SettingsManager.inMemory({
113
+ compaction: { enabled: false },
114
+ }),
115
+ });
116
+
117
+ // Subscribe to streaming output
118
+ if (onOutput) {
119
+ session.subscribe((event) => {
120
+ if (
121
+ event.type === "message_update" &&
122
+ event.assistantMessageEvent.type === "text_delta"
123
+ ) {
124
+ onOutput(event.assistantMessageEvent.delta);
125
+ }
126
+ });
127
+ }
128
+
129
+ // Build the prompt
130
+ const prompt = `Read each file in .llm-kb/wiki/sources/ (one at a time, just the first 500 characters of each).
131
+ Then write .llm-kb/wiki/index.md with a summary table of all sources.
132
+
133
+ Include: Source filename, Type (PDF/Excel/Word/etc), Pages (from the JSON if available), a one-line summary, and key topics.
134
+ Add a total word count estimate at the bottom.`;
135
+
136
+ await session.prompt(prompt);
137
+
138
+ // Read the generated index
139
+ const indexPath = join(sourcesDir, "..", "index.md");
140
+ try {
141
+ const content = await readFile(indexPath, "utf-8");
142
+ session.dispose();
143
+ return content;
144
+ } catch {
145
+ session.dispose();
146
+ throw new Error("Agent did not create index.md");
147
+ }
148
+ }
package/src/pdf.ts ADDED
@@ -0,0 +1,119 @@
1
+ import { LiteParse } from "@llamaindex/liteparse";
2
+ import { writeFile, mkdir, stat } from "node:fs/promises";
3
+ import { join, basename } from "node:path";
4
+ import { cpus } from "node:os";
5
+
6
+ export interface ParsedPDF {
7
+ name: string;
8
+ mdPath: string;
9
+ jsonPath: string;
10
+ totalPages: number;
11
+ textLength: number;
12
+ skipped: boolean;
13
+ }
14
+
15
+ /**
16
+ * Check if source PDF is newer than the parsed output.
17
+ * Returns true if we can skip parsing.
18
+ */
19
+ async function isUpToDate(
20
+ pdfPath: string,
21
+ mdPath: string,
22
+ jsonPath: string
23
+ ): Promise<boolean> {
24
+ try {
25
+ const [pdfStat, mdStat, jsonStat] = await Promise.all([
26
+ stat(pdfPath),
27
+ stat(mdPath),
28
+ stat(jsonPath),
29
+ ]);
30
+ return pdfStat.mtimeMs <= mdStat.mtimeMs && pdfStat.mtimeMs <= jsonStat.mtimeMs;
31
+ } catch {
32
+ return false;
33
+ }
34
+ }
35
+
36
+ /**
37
+ * Suppress stderr temporarily to hide noisy library warnings.
38
+ */
39
+ function suppressStderr(): () => void {
40
+ const originalWrite = process.stderr.write.bind(process.stderr);
41
+ process.stderr.write = (() => true) as any;
42
+ return () => {
43
+ process.stderr.write = originalWrite;
44
+ };
45
+ }
46
+
47
+ export async function parsePDF(
48
+ pdfPath: string,
49
+ outputDir: string
50
+ ): Promise<ParsedPDF> {
51
+ const name = basename(pdfPath, ".pdf");
52
+ await mkdir(outputDir, { recursive: true });
53
+
54
+ const mdPath = join(outputDir, `${name}.md`);
55
+ const jsonPath = join(outputDir, `${name}.json`);
56
+
57
+ // Skip if already parsed and source hasn't changed
58
+ if (await isUpToDate(pdfPath, mdPath, jsonPath)) {
59
+ return { name, mdPath, jsonPath, totalPages: 0, textLength: 0, skipped: true };
60
+ }
61
+
62
+ const ocrServerUrl = process.env.OCR_SERVER_URL;
63
+ const ocrEnabled = ocrServerUrl ? true : process.env.OCR_ENABLED === "true";
64
+
65
+ const parser = new LiteParse({
66
+ ocrEnabled,
67
+ outputFormat: "json",
68
+ numWorkers: cpus().length,
69
+ ...(ocrServerUrl ? { ocrServerUrl } : {}),
70
+ });
71
+
72
+ // Suppress noisy Tesseract/PDF.js warnings during parse
73
+ const restore = suppressStderr();
74
+ let result;
75
+ try {
76
+ result = await parser.parse(pdfPath, true);
77
+ } finally {
78
+ restore();
79
+ }
80
+
81
+ // Build markdown — spatial text per page
82
+ const markdown = result.pages
83
+ .map((p: any) => `# Page ${p.pageNum}\n\n${p.text}`)
84
+ .join("\n\n---\n\n");
85
+
86
+ // Build bounding box JSON
87
+ const bboxData = {
88
+ source: basename(pdfPath),
89
+ totalPages: result.pages.length,
90
+ pages: result.pages.map((p: any) => ({
91
+ page: p.pageNum,
92
+ width: p.width,
93
+ height: p.height,
94
+ textItems: p.textItems.map((item: any) => ({
95
+ text: (item.str ?? item.text ?? "").trim(),
96
+ x: Math.round(item.x * 100) / 100,
97
+ y: Math.round(item.y * 100) / 100,
98
+ width: Math.round((item.width ?? item.w ?? 0) * 100) / 100,
99
+ height: Math.round((item.height ?? item.h ?? 0) * 100) / 100,
100
+ fontName: item.fontName,
101
+ fontSize: item.fontSize
102
+ ? Math.round(item.fontSize * 100) / 100
103
+ : undefined,
104
+ })),
105
+ })),
106
+ };
107
+
108
+ await writeFile(mdPath, markdown);
109
+ await writeFile(jsonPath, JSON.stringify(bboxData, null, 2));
110
+
111
+ return {
112
+ name,
113
+ mdPath,
114
+ jsonPath,
115
+ totalPages: result.pages.length,
116
+ textLength: markdown.length,
117
+ skipped: false,
118
+ };
119
+ }
package/src/query.ts ADDED
@@ -0,0 +1,132 @@
1
+ import {
2
+ createAgentSession,
3
+ createBashTool,
4
+ createReadTool,
5
+ createWriteTool,
6
+ DefaultResourceLoader,
7
+ SessionManager,
8
+ SettingsManager,
9
+ } from "@mariozechner/pi-coding-agent";
10
+ import { readdir, mkdir } from "node:fs/promises";
11
+ import { join, dirname } from "node:path";
12
+ import { fileURLToPath } from "node:url";
13
+
14
+ const __dirname = dirname(fileURLToPath(import.meta.url));
15
+
16
+ function getNodeModulesPath(): string {
17
+ let dir = __dirname;
18
+ for (let i = 0; i < 5; i++) {
19
+ const candidate = join(dir, "node_modules");
20
+ try { return candidate; } catch { dir = dirname(dir); }
21
+ }
22
+ return join(process.cwd(), "node_modules");
23
+ }
24
+
25
+ function buildQueryAgents(sourceFiles: string[], save: boolean): string {
26
+ const sourceList = sourceFiles.map((f) => ` - ${f}`).join("\n");
27
+
28
+ let content = `# llm-kb Knowledge Base — Query Mode
29
+
30
+ ## How to answer questions
31
+
32
+ 1. FIRST read .llm-kb/wiki/index.md to understand all available sources
33
+ 2. Based on the question, select the most relevant source files (usually 2-5)
34
+ 3. Read those source files in full from .llm-kb/wiki/sources/
35
+ 4. Answer with inline citations: (filename, page number)
36
+ 5. If the answer requires cross-referencing multiple files, read additional ones
37
+ 6. If you can't find the answer, say so — don't hallucinate
38
+
39
+ ## Available parsed sources
40
+ ${sourceList}
41
+
42
+ ## Non-PDF files
43
+ If the user's folder has Excel, Word, or PowerPoint files, these libraries are available:
44
+ - **exceljs** — for .xlsx/.xls files
45
+ - **mammoth** — for .docx files
46
+ - **officeparser** — for .pptx files
47
+ Write a quick Node.js script via bash to read them.
48
+
49
+ ## Rules
50
+ - Always cite sources with filename and page number
51
+ - Read the FULL source file, not just the beginning
52
+ - Prefer primary sources over previous analyses
53
+ `;
54
+
55
+ if (save) {
56
+ content += `
57
+ ## Research Mode
58
+ Save your analysis to .llm-kb/wiki/outputs/ with a descriptive filename (e.g., comparison-analysis.md).
59
+ Include the question at the top and all citations.
60
+ `;
61
+ }
62
+
63
+ return content;
64
+ }
65
+
66
+ export async function query(
67
+ folder: string,
68
+ question: string,
69
+ options: { save?: boolean }
70
+ ): Promise<void> {
71
+ const sourcesDir = join(folder, ".llm-kb", "wiki", "sources");
72
+
73
+ const files = await readdir(sourcesDir);
74
+ const mdFiles = files.filter((f) => f.endsWith(".md"));
75
+
76
+ if (mdFiles.length === 0) {
77
+ throw new Error("No sources found. Run 'llm-kb run' first to parse documents.");
78
+ }
79
+
80
+ if (options.save) {
81
+ await mkdir(join(folder, ".llm-kb", "wiki", "outputs"), { recursive: true });
82
+ }
83
+
84
+ process.env.NODE_PATH = getNodeModulesPath();
85
+
86
+ const agentsContent = buildQueryAgents(mdFiles, !!options.save);
87
+
88
+ const loader = new DefaultResourceLoader({
89
+ cwd: folder,
90
+ agentsFilesOverride: (current) => ({
91
+ agentsFiles: [
92
+ ...current.agentsFiles,
93
+ { path: ".llm-kb/AGENTS.md", content: agentsContent },
94
+ ],
95
+ }),
96
+ });
97
+ await loader.reload();
98
+
99
+ const tools = [createReadTool(folder)];
100
+ if (options.save) {
101
+ tools.push(createBashTool(folder), createWriteTool(folder));
102
+ }
103
+
104
+ const { session } = await createAgentSession({
105
+ cwd: folder,
106
+ resourceLoader: loader,
107
+ tools,
108
+ sessionManager: SessionManager.inMemory(),
109
+ settingsManager: SettingsManager.inMemory({
110
+ compaction: { enabled: false },
111
+ }),
112
+ });
113
+
114
+ session.subscribe((event) => {
115
+ if (
116
+ event.type === "message_update" &&
117
+ event.assistantMessageEvent.type === "text_delta"
118
+ ) {
119
+ process.stdout.write(event.assistantMessageEvent.delta);
120
+ }
121
+ });
122
+
123
+ await session.prompt(question);
124
+ console.log();
125
+ session.dispose();
126
+
127
+ // Re-index after save so the compounding loop works
128
+ if (options.save) {
129
+ const { buildIndex } = await import("./indexer.js");
130
+ await buildIndex(folder, sourcesDir);
131
+ }
132
+ }
@@ -0,0 +1,19 @@
1
+ import { existsSync } from "node:fs";
2
+ import { resolve, join, dirname } from "node:path";
3
+
4
+ /**
5
+ * Walk up from startDir looking for a .llm-kb/ directory.
6
+ * Returns the folder containing .llm-kb/, or null if not found.
7
+ */
8
+ export function resolveKnowledgeBase(startDir: string): string | null {
9
+ let dir = resolve(startDir);
10
+
11
+ while (true) {
12
+ if (existsSync(join(dir, ".llm-kb"))) {
13
+ return dir;
14
+ }
15
+ const parent = dirname(dir);
16
+ if (parent === dir) return null;
17
+ dir = parent;
18
+ }
19
+ }
package/src/scan.ts ADDED
@@ -0,0 +1,59 @@
1
+ import { readdir } from "node:fs/promises";
2
+ import { resolve, extname, relative } from "node:path";
3
+
4
+ export interface ScannedFile {
5
+ name: string;
6
+ path: string;
7
+ ext: string;
8
+ }
9
+
10
+ const SUPPORTED_EXTENSIONS = new Set([
11
+ ".pdf",
12
+ ".xlsx",
13
+ ".xls",
14
+ ".docx",
15
+ ".pptx",
16
+ ".jpg",
17
+ ".jpeg",
18
+ ".png",
19
+ ".txt",
20
+ ".md",
21
+ ".csv",
22
+ ]);
23
+
24
+ export async function scan(folder: string): Promise<ScannedFile[]> {
25
+ const root = resolve(folder);
26
+ const entries = await readdir(root, { recursive: true, withFileTypes: true });
27
+
28
+ const files: ScannedFile[] = [];
29
+
30
+ for (const entry of entries) {
31
+ if (!entry.isFile()) continue;
32
+
33
+ const fullPath = resolve(entry.parentPath, entry.name);
34
+ const rel = relative(root, fullPath);
35
+
36
+ // Skip .llm-kb internal folder
37
+ if (rel.startsWith(".llm-kb")) continue;
38
+
39
+ const ext = extname(entry.name).toLowerCase();
40
+ if (!SUPPORTED_EXTENSIONS.has(ext)) continue;
41
+
42
+ files.push({ name: entry.name, path: rel, ext });
43
+ }
44
+
45
+ return files;
46
+ }
47
+
48
+ export function summarize(files: ScannedFile[]): string {
49
+ const counts = new Map<string, number>();
50
+ for (const f of files) {
51
+ counts.set(f.ext, (counts.get(f.ext) || 0) + 1);
52
+ }
53
+
54
+ const parts = Array.from(counts.entries())
55
+ .sort((a, b) => b[1] - a[1])
56
+ .map(([ext, count]) => `${count} ${ext.toUpperCase().slice(1)}`);
57
+
58
+ return parts.join(", ");
59
+ }
package/src/watcher.ts ADDED
@@ -0,0 +1,84 @@
1
+ import { watch } from "chokidar";
2
+ import { extname, join, basename } from "node:path";
3
+ import { parsePDF } from "./pdf.js";
4
+ import { buildIndex } from "./indexer.js";
5
+ import chalk from "chalk";
6
+
7
+ interface WatcherOptions {
8
+ folder: string;
9
+ sourcesDir: string;
10
+ debounceMs?: number;
11
+ }
12
+
13
+ export function startWatcher({ folder, sourcesDir, debounceMs = 2000 }: WatcherOptions) {
14
+ let pendingFiles: string[] = [];
15
+ let debounceTimer: ReturnType<typeof setTimeout> | null = null;
16
+
17
+ async function processBatch() {
18
+ const files = [...pendingFiles];
19
+ pendingFiles = [];
20
+
21
+ if (files.length === 0) return;
22
+
23
+ console.log();
24
+ for (const filePath of files) {
25
+ const name = basename(filePath);
26
+ process.stdout.write(` Parsing ${name}...`);
27
+ try {
28
+ const result = await parsePDF(filePath, sourcesDir);
29
+ if (result.skipped) {
30
+ console.log(chalk.dim(` skipped (up to date)`));
31
+ } else {
32
+ console.log(chalk.green(` ✓ ${result.totalPages} pages`));
33
+ }
34
+ } catch (err: any) {
35
+ console.log(chalk.red(` ✗ ${err.message}`));
36
+ }
37
+ }
38
+
39
+ // Re-index
40
+ process.stdout.write(` Re-indexing...`);
41
+ try {
42
+ await buildIndex(folder, sourcesDir);
43
+ console.log(chalk.green(` ✓ index.md updated`));
44
+ } catch (err: any) {
45
+ console.log(chalk.red(` ✗ ${err.message}`));
46
+ }
47
+ }
48
+
49
+ function queueFile(filePath: string) {
50
+ if (!pendingFiles.includes(filePath)) {
51
+ pendingFiles.push(filePath);
52
+ }
53
+ if (debounceTimer) clearTimeout(debounceTimer);
54
+ debounceTimer = setTimeout(processBatch, debounceMs);
55
+ }
56
+
57
+ const watcher = watch(folder, {
58
+ ignoreInitial: true,
59
+ ignored: [
60
+ "**/node_modules/**",
61
+ "**/.llm-kb/**",
62
+ "**/.git/**",
63
+ ],
64
+ depth: 10,
65
+ });
66
+
67
+ watcher.on("add", (filePath) => {
68
+ const ext = extname(filePath).toLowerCase();
69
+ if (ext === ".pdf") {
70
+ console.log(chalk.dim(`\n New file: ${basename(filePath)}`));
71
+ queueFile(filePath);
72
+ }
73
+ });
74
+
75
+ watcher.on("change", (filePath) => {
76
+ const ext = extname(filePath).toLowerCase();
77
+ if (ext === ".pdf") {
78
+ console.log(chalk.dim(`\n Changed: ${basename(filePath)}`));
79
+ queueFile(filePath);
80
+ }
81
+ });
82
+
83
+ return watcher;
84
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,14 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "ES2022",
5
+ "moduleResolution": "bundler",
6
+ "strict": true,
7
+ "esModuleInterop": true,
8
+ "outDir": "dist",
9
+ "rootDir": "src",
10
+ "declaration": true,
11
+ "sourceMap": true
12
+ },
13
+ "include": ["src"]
14
+ }