npm - bluera-knowledge - Versions diffs - 0.11.18 → 0.11.20 - Mend

bluera-knowledge 0.11.18 → 0.11.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/.claude-plugin/plugin.json +1 -1
package/CHANGELOG.md +55 -0
package/dist/{chunk-6FHWC36B.js → chunk-HRQD3MPH.js} +8 -6
package/dist/chunk-HRQD3MPH.js.map +1 -0
package/dist/{chunk-ZZNABJMQ.js → chunk-MQGRQ2EG.js} +99 -34
package/dist/chunk-MQGRQ2EG.js.map +1 -0
package/dist/{chunk-ZDEO4WJT.js → chunk-Q2ZGPJ66.js} +22 -70
package/dist/chunk-Q2ZGPJ66.js.map +1 -0
package/dist/{chunk-5NUI6JL6.js → chunk-ZSKQIMD7.js} +5 -2
package/dist/chunk-ZSKQIMD7.js.map +1 -0
package/dist/index.js +36 -18
package/dist/index.js.map +1 -1
package/dist/mcp/server.js +3 -3
package/dist/watch.service-OPLKIDFQ.js +7 -0
package/dist/workers/background-worker-cli.js +3 -3
package/package.json +1 -1
package/src/cli/commands/crawl.ts +1 -1
package/src/cli/commands/index-cmd.test.ts +14 -4
package/src/cli/commands/index-cmd.ts +11 -4
package/src/cli/commands/store.test.ts +211 -18
package/src/cli/commands/store.ts +26 -8
package/src/crawl/article-converter.test.ts +30 -61
package/src/crawl/article-converter.ts +2 -8
package/src/crawl/bridge.test.ts +14 -0
package/src/crawl/bridge.ts +17 -5
package/src/crawl/intelligent-crawler.test.ts +65 -76
package/src/crawl/intelligent-crawler.ts +33 -69
package/src/db/lance.test.ts +3 -4
package/src/db/lance.ts +14 -19
package/src/mcp/server.test.ts +56 -1
package/src/mcp/server.ts +5 -1
package/src/plugin/git-clone.test.ts +44 -0
package/src/plugin/git-clone.ts +4 -0
package/src/services/code-unit.service.test.ts +59 -6
package/src/services/code-unit.service.ts +47 -2
package/src/services/index.ts +19 -3
package/src/services/job.service.test.ts +10 -7
package/src/services/job.service.ts +12 -6
package/src/services/search.service.ts +15 -9
package/src/services/services.test.ts +19 -6
package/src/services/watch.service.test.ts +80 -56
package/src/services/watch.service.ts +9 -6
package/dist/chunk-5NUI6JL6.js.map +0 -1
package/dist/chunk-6FHWC36B.js.map +0 -1
package/dist/chunk-ZDEO4WJT.js.map +0 -1
package/dist/chunk-ZZNABJMQ.js.map +0 -1
package/dist/watch.service-BJV3TI3F.js +0 -7
/package/dist/{watch.service-BJV3TI3F.js.map → watch.service-OPLKIDFQ.js.map} +0 -0

package/src/crawl/article-converter.test.ts CHANGED Viewed

@@ -44,7 +44,7 @@ describe('convertHtmlToMarkdown', () => {
         html,
         'https://example.com'
       );
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
     });
     it('should include title from extracted article', async () => {
@@ -71,7 +71,7 @@ describe('convertHtmlToMarkdown', () => {
       const html = '<html><body><h1>Full HTML</h1></body></html>';
       const result = await convertHtmlToMarkdown(html, 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
       // Should have processed the full HTML through markdown conversion
       expect(vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks)).toHaveBeenCalledWith(html);
     });
@@ -92,7 +92,7 @@ describe('convertHtmlToMarkdown', () => {
       const html = '<html><body><h1>Full HTML</h1></body></html>';
       const result = await convertHtmlToMarkdown(html, 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
       expect(vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks)).toHaveBeenCalledWith(html);
     });
@@ -112,7 +112,7 @@ describe('convertHtmlToMarkdown', () => {
       const html = '<html><body><h1>Full HTML</h1></body></html>';
       const result = await convertHtmlToMarkdown(html, 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
       expect(vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks)).toHaveBeenCalledWith(html);
     });
@@ -122,7 +122,7 @@ describe('convertHtmlToMarkdown', () => {
       const html = '<html><body><h1>Full HTML</h1></body></html>';
       const result = await convertHtmlToMarkdown(html, 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
       expect(vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks)).toHaveBeenCalledWith(html);
     });
@@ -192,7 +192,7 @@ describe('convertHtmlToMarkdown', () => {
       const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
       expect(result.markdown).toContain('# Heading 1');
       expect(result.markdown).toContain('## Heading 2');
       expect(result.markdown).toContain('### Heading 3');
@@ -213,7 +213,7 @@ describe('convertHtmlToMarkdown', () => {
       const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
       expect(result.markdown).toContain('```');
     });
@@ -232,7 +232,7 @@ describe('convertHtmlToMarkdown', () => {
       const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
       expect(result.markdown).toContain('[Link Text](https://example.com)');
     });
@@ -251,7 +251,7 @@ describe('convertHtmlToMarkdown', () => {
       const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
       expect(result.markdown).toContain('|');
     });
@@ -270,7 +270,7 @@ describe('convertHtmlToMarkdown', () => {
       const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
       expect(result.markdown).toContain('# Heading with Anchor');
       expect(result.markdown).not.toContain('[]()');
     });
@@ -290,7 +290,7 @@ describe('convertHtmlToMarkdown', () => {
       const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
       expect(result.markdown).toContain('# Heading with spaces');
     });
@@ -309,62 +309,31 @@ describe('convertHtmlToMarkdown', () => {
       const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
       // Empty heading should not appear in markdown
       expect(result.markdown).not.toMatch(/^#\s*$/m);
     });
   });
   describe('Error Handling', () => {
-    it('should return error result when conversion throws error', async () => {
-      vi.mocked(articleExtractor.extractFromHtml).mockImplementation(() => {
-        throw new Error('Fatal conversion error');
-      });
-      vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks).mockImplementation(() => {
-        throw new Error('Fatal conversion error');
-      });
-      const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
-      expect(result.success).toBe(false);
-      expect(result.markdown).toBe('');
-      expect(result.error).toBe('Fatal conversion error');
-    });
-    it('should handle non-Error thrown values', async () => {
-      vi.mocked(articleExtractor.extractFromHtml).mockImplementation(() => {
-        throw 'String error';
-      });
-      vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks).mockImplementation(() => {
-        throw 'String error';
-      });
-      const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
-      expect(result.success).toBe(false);
-      expect(result.error).toBe('String error');
-    });
-    it('should return empty markdown on error', async () => {
+    it('should throw when conversion fails due to preprocessing error', async () => {
       vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks).mockImplementation(() => {
         throw new Error('Preprocessing failed');
       });
-      const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
-      expect(result.success).toBe(false);
-      expect(result.markdown).toBe('');
+      await expect(convertHtmlToMarkdown('<html></html>', 'https://example.com')).rejects.toThrow(
+        'Preprocessing failed'
+      );
     });
-    it('should not include title on error', async () => {
+    it('should throw with non-Error values wrapped as Error', async () => {
       vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks).mockImplementation(() => {
-        throw new Error('Preprocessing failed');
+        throw 'String error';
       });
-      const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
-      expect(result.success).toBe(false);
-      expect(result.title).toBeUndefined();
+      await expect(convertHtmlToMarkdown('<html></html>', 'https://example.com')).rejects.toThrow(
+        'String error'
+      );
     });
   });
@@ -372,27 +341,27 @@ describe('convertHtmlToMarkdown', () => {
     it('should handle empty HTML string', async () => {
       const result = await convertHtmlToMarkdown('', 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
     });
     it('should handle whitespace-only HTML', async () => {
       const result = await convertHtmlToMarkdown('   \n  \t  ', 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
     });
     it('should handle malformed HTML', async () => {
       const html = '<html><body><div><p>Unclosed tags';
       const result = await convertHtmlToMarkdown(html, 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
     });
     it('should handle HTML with no content', async () => {
       const html = '<html><head><title>Title</title></head><body></body></html>';
       const result = await convertHtmlToMarkdown(html, 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
     });
     it('should handle HTML with only navigation elements', async () => {
@@ -411,7 +380,7 @@ describe('convertHtmlToMarkdown', () => {
       const html = '<html><nav><a href="/">Home</a></nav></html>';
       const result = await convertHtmlToMarkdown(html, 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
     });
   });
@@ -447,7 +416,7 @@ describe('convertHtmlToMarkdown', () => {
       const result = await convertHtmlToMarkdown(mkdocsHtml, 'https://example.com/docs');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
       expect(vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks)).toHaveBeenCalledWith(mkdocsHtml);
     });
@@ -483,7 +452,7 @@ describe('convertHtmlToMarkdown', () => {
       const result = await convertHtmlToMarkdown(sphinxHtml, 'https://example.com/docs');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
     });
     it('should handle nested code blocks with syntax highlighting', async () => {
@@ -511,7 +480,7 @@ describe('convertHtmlToMarkdown', () => {
       const result = await convertHtmlToMarkdown(complexHtml, 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
     });
     it('should handle documentation with table of contents', async () => {
@@ -544,7 +513,7 @@ describe('convertHtmlToMarkdown', () => {
       const result = await convertHtmlToMarkdown(htmlWithToc, 'https://example.com');
-      expect(result.success).toBe(true);
+      // Function throws on error, so reaching here means success
     });
   });

package/src/crawl/article-converter.ts CHANGED Viewed

@@ -14,8 +14,6 @@ const logger = createLogger('article-converter');
 export interface ConversionResult {
   markdown: string;
   title?: string;
-  success: boolean;
-  error?: string;
 }
 /**
@@ -128,7 +126,6 @@ export async function convertHtmlToMarkdown(html: string, url: string): Promise<
     return {
       markdown,
       ...(title !== undefined && { title }),
-      success: true,
     };
   } catch (error) {
     logger.error(
@@ -139,10 +136,7 @@ export async function convertHtmlToMarkdown(html: string, url: string): Promise<
       'HTML to markdown conversion failed'
     );
-    return {
-      markdown: '',
-      success: false,
-      error: error instanceof Error ? error.message : String(error),
-    };
+    // Re-throw errors - do not return graceful degradation
+    throw error instanceof Error ? error : new Error(String(error));
   }
 }

package/src/crawl/bridge.test.ts CHANGED Viewed

@@ -654,6 +654,20 @@ describe('PythonBridge', () => {
       expect(mockProcess.kill).toHaveBeenCalled();
     });
+    it('should close stderr readline interface on stop', async () => {
+      await bridge.start();
+      await bridge.stop();
+      expect(mockStderrReadline.close).toHaveBeenCalled();
+    });
+    it('should close stdout readline interface on stop', async () => {
+      await bridge.start();
+      await bridge.stop();
+      expect(mockReadline.close).toHaveBeenCalled();
+    });
     it('should set process to null on stop', async () => {
       await bridge.start();
       await bridge.stop();

package/src/crawl/bridge.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { spawn, type ChildProcess } from 'node:child_process';
 import { randomUUID } from 'node:crypto';
-import { createInterface } from 'node:readline';
+import { createInterface, type Interface as ReadlineInterface } from 'node:readline';
 import { ZodError } from 'zod';
 import {
   type CrawlResult,
@@ -31,6 +31,8 @@ export class PythonBridge {
   private process: ChildProcess | null = null;
   private readonly pending: Map<string, PendingRequest> = new Map();
   private stoppingIntentionally = false;
+  private stdoutReadline: ReadlineInterface | null = null;
+  private stderrReadline: ReadlineInterface | null = null;
   start(): Promise<void> {
     if (this.process) return Promise.resolve();
@@ -63,8 +65,8 @@ export class PythonBridge {
     // Add stderr logging
     if (this.process.stderr) {
-      const stderrRl = createInterface({ input: this.process.stderr });
-      stderrRl.on('line', (line) => {
+      this.stderrReadline = createInterface({ input: this.process.stderr });
+      this.stderrReadline.on('line', (line) => {
         logger.warn({ stderr: line }, 'Python bridge stderr output');
       });
     }
@@ -74,8 +76,8 @@ export class PythonBridge {
       this.process = null; // Clean up reference
       return Promise.reject(new Error('Python bridge process stdout is null'));
     }
-    const rl = createInterface({ input: this.process.stdout });
-    rl.on('line', (line) => {
+    this.stdoutReadline = createInterface({ input: this.process.stdout });
+    this.stdoutReadline.on('line', (line) => {
       // Filter out non-JSON lines (crawl4ai verbose output)
       if (!line.trim().startsWith('{')) {
         return;
@@ -266,6 +268,16 @@ export class PythonBridge {
       this.stoppingIntentionally = true;
       this.rejectAllPending(new Error('Python bridge stopped'));
+      // Close readline interfaces to prevent resource leaks
+      if (this.stdoutReadline) {
+        this.stdoutReadline.close();
+        this.stdoutReadline = null;
+      }
+      if (this.stderrReadline) {
+        this.stderrReadline.close();
+        this.stderrReadline = null;
+      }
       // Wait for process to actually exit before resolving
       const proc = this.process;
       if (proc === null) {

package/src/crawl/intelligent-crawler.test.ts CHANGED Viewed

@@ -58,7 +58,6 @@ describe('IntelligentCrawler', () => {
     // Setup convertHtmlToMarkdown mock
     vi.mocked(articleConverter.convertHtmlToMarkdown).mockResolvedValue({
-      success: true,
       markdown: '# Test\n\nContent',
       title: 'Test Page',
     });
@@ -581,24 +580,20 @@ describe('IntelligentCrawler', () => {
     });
   });
-  describe('Intelligent Mode Fallback', () => {
-    it('should fallback to simple mode when Claude strategy fails', async () => {
+  describe('Intelligent Mode Error Handling', () => {
+    it('should throw when Claude strategy fails', async () => {
       mockClaudeClient.determineCrawlUrls.mockRejectedValue(new Error('Claude API error'));
-      mockPythonBridge.crawl.mockResolvedValue({ pages: [{ links: [] }] });
       const results = [];
-      for await (const result of crawler.crawl('https://example.com', {
-        crawlInstruction: 'Find all docs',
-      })) {
-        results.push(result);
-      }
+      await expect(async () => {
+        for await (const result of crawler.crawl('https://example.com', {
+          crawlInstruction: 'Find all docs',
+        })) {
+          results.push(result);
+        }
+      }).rejects.toThrow('Claude API error');
-      // Should still crawl using simple mode
-      expect(results).toHaveLength(1);
-      const errorEvents = progressEvents.filter((e) => e.type === 'error');
-      expect(errorEvents.some((e) => e.message?.includes('falling back to simple mode'))).toBe(
-        true
-      );
+      expect(results).toHaveLength(0);
     });
   });
@@ -623,22 +618,21 @@ describe('IntelligentCrawler', () => {
       );
     });
-    it('should continue without extraction if extraction fails', async () => {
+    it('should throw when extraction fails', async () => {
       mockClaudeClient.extractContent.mockRejectedValue(new Error('Extraction failed'));
       mockPythonBridge.crawl.mockResolvedValue({ pages: [{ links: [] }] });
       const results = [];
-      for await (const result of crawler.crawl('https://example.com', {
-        simple: true,
-        extractInstruction: 'Extract pricing',
-      })) {
-        results.push(result);
-      }
+      await expect(async () => {
+        for await (const result of crawler.crawl('https://example.com', {
+          simple: true,
+          extractInstruction: 'Extract pricing',
+        })) {
+          results.push(result);
+        }
+      }).rejects.toThrow('Extraction failed');
-      expect(results).toHaveLength(1);
-      expect(results[0]?.extracted).toBeUndefined();
-      const errorEvents = progressEvents.filter((e) => e.type === 'error');
-      expect(errorEvents.some((e) => e.message?.includes('storing raw markdown'))).toBe(true);
+      expect(results).toHaveLength(0);
     });
     it('should not extract when extractInstruction is empty', async () => {
@@ -824,7 +818,6 @@ describe('IntelligentCrawler', () => {
     it('should include title when available', async () => {
       mockPythonBridge.crawl.mockResolvedValue({ pages: [{ links: [] }] });
       vi.mocked(articleConverter.convertHtmlToMarkdown).mockResolvedValue({
-        success: true,
         markdown: '# Test',
         title: 'Test Page Title',
       });
@@ -839,11 +832,9 @@ describe('IntelligentCrawler', () => {
     it('should handle conversion failures', async () => {
       mockPythonBridge.crawl.mockResolvedValue({ pages: [{ links: [] }] });
-      vi.mocked(articleConverter.convertHtmlToMarkdown).mockResolvedValue({
-        success: false,
-        markdown: '',
-        error: 'Conversion error',
-      });
+      vi.mocked(articleConverter.convertHtmlToMarkdown).mockRejectedValue(
+        new Error('Conversion error')
+      );
       const results = [];
       for await (const result of crawler.crawl('https://example.com', { simple: true })) {
@@ -905,69 +896,67 @@ describe('IntelligentCrawler', () => {
     });
   });
-  describe('npm Package Mode (Claude CLI Not Installed)', () => {
-    it('should use simple mode when Claude CLI is not available', async () => {
-      // Simulate npm package usage without Claude Code installed
-      vi.mocked(ClaudeClient.isAvailable).mockReturnValue(false);
+  describe('Headless Mode Error Handling', () => {
+    it('should throw when headless fetch fails', async () => {
+      mockPythonBridge.fetchHeadless.mockRejectedValue(new Error('Browser crashed'));
-      // Setup link extraction for simple mode
-      mockPythonBridge.crawl.mockResolvedValue({
-        pages: [{ links: [] }],
-      });
+      const results = [];
+      await expect(async () => {
+        for await (const result of crawler.crawl('https://example.com', {
+          simple: true,
+          useHeadless: true,
+        })) {
+          results.push(result);
+        }
+      }).rejects.toThrow('Headless fetch failed: Browser crashed');
-      const results: { url: string }[] = [];
+      expect(results).toHaveLength(0);
+      // Should not have fallen back to axios
+      expect(axios.get).not.toHaveBeenCalled();
+    });
+  });
-      for await (const result of crawler.crawl('https://example.com', {
-        crawlInstruction: 'Find all documentation pages', // Would use intelligent mode
-        maxPages: 5,
-      })) {
-        results.push(result);
-      }
+  describe('Claude CLI Not Installed', () => {
+    it('should throw when intelligent mode requested but Claude CLI not available', async () => {
+      // Simulate npm package usage without Claude Code installed
+      vi.mocked(ClaudeClient.isAvailable).mockReturnValue(false);
-      // Should have crawled using simple BFS mode
-      expect(results.length).toBeGreaterThan(0);
-      expect(results[0]?.url).toBe('https://example.com');
+      const results: { url: string }[] = [];
-      // Should have emitted progress event about mode switch
-      const modeEvent = progressEvents.find(
-        (e) => e.type === 'error' && e.message?.includes('Claude CLI not found')
-      );
-      expect(modeEvent).toBeDefined();
-      expect(modeEvent?.message).toContain('using simple crawl mode');
+      await expect(async () => {
+        for await (const result of crawler.crawl('https://example.com', {
+          crawlInstruction: 'Find all documentation pages', // Requires intelligent mode
+          maxPages: 5,
+        })) {
+          results.push(result);
+        }
+      }).rejects.toThrow('Claude CLI not available');
-      // Should NOT have called Claude's determineCrawlUrls
+      expect(results).toHaveLength(0);
       expect(mockClaudeClient.determineCrawlUrls).not.toHaveBeenCalled();
     });
-    it('should skip extraction when Claude CLI is not available', async () => {
+    it('should throw when extraction requested but Claude CLI not available', async () => {
       // Simulate npm package usage without Claude Code installed
       vi.mocked(ClaudeClient.isAvailable).mockReturnValue(false);
-      // Setup for simple mode
       mockPythonBridge.crawl.mockResolvedValue({
         pages: [{ links: [] }],
       });
       const results: { url: string; extracted?: string }[] = [];
-      for await (const result of crawler.crawl('https://example.com', {
-        simple: true,
-        extractInstruction: 'Extract pricing info', // Would use Claude
-        maxPages: 1,
-      })) {
-        results.push(result);
-      }
-      expect(results.length).toBe(1);
-      expect(results[0]?.extracted).toBeUndefined(); // Should not have extracted
-      // Should have emitted skip extraction progress event
-      const skipEvent = progressEvents.find(
-        (e) => e.type === 'error' && e.message?.includes('Skipping extraction')
-      );
-      expect(skipEvent).toBeDefined();
+      await expect(async () => {
+        for await (const result of crawler.crawl('https://example.com', {
+          simple: true,
+          extractInstruction: 'Extract pricing info', // Requires Claude
+          maxPages: 1,
+        })) {
+          results.push(result);
+        }
+      }).rejects.toThrow('Claude CLI not available');
-      // Should NOT have called Claude's extractContent
+      expect(results).toHaveLength(0);
       expect(mockClaudeClient.extractContent).not.toHaveBeenCalled();
     });
   });