bluera-knowledge 0.11.18 → 0.11.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +55 -0
- package/dist/{chunk-6FHWC36B.js → chunk-HRQD3MPH.js} +8 -6
- package/dist/chunk-HRQD3MPH.js.map +1 -0
- package/dist/{chunk-ZZNABJMQ.js → chunk-MQGRQ2EG.js} +99 -34
- package/dist/chunk-MQGRQ2EG.js.map +1 -0
- package/dist/{chunk-ZDEO4WJT.js → chunk-Q2ZGPJ66.js} +22 -70
- package/dist/chunk-Q2ZGPJ66.js.map +1 -0
- package/dist/{chunk-5NUI6JL6.js → chunk-ZSKQIMD7.js} +5 -2
- package/dist/chunk-ZSKQIMD7.js.map +1 -0
- package/dist/index.js +36 -18
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +3 -3
- package/dist/watch.service-OPLKIDFQ.js +7 -0
- package/dist/workers/background-worker-cli.js +3 -3
- package/package.json +1 -1
- package/src/cli/commands/crawl.ts +1 -1
- package/src/cli/commands/index-cmd.test.ts +14 -4
- package/src/cli/commands/index-cmd.ts +11 -4
- package/src/cli/commands/store.test.ts +211 -18
- package/src/cli/commands/store.ts +26 -8
- package/src/crawl/article-converter.test.ts +30 -61
- package/src/crawl/article-converter.ts +2 -8
- package/src/crawl/bridge.test.ts +14 -0
- package/src/crawl/bridge.ts +17 -5
- package/src/crawl/intelligent-crawler.test.ts +65 -76
- package/src/crawl/intelligent-crawler.ts +33 -69
- package/src/db/lance.test.ts +3 -4
- package/src/db/lance.ts +14 -19
- package/src/mcp/server.test.ts +56 -1
- package/src/mcp/server.ts +5 -1
- package/src/plugin/git-clone.test.ts +44 -0
- package/src/plugin/git-clone.ts +4 -0
- package/src/services/code-unit.service.test.ts +59 -6
- package/src/services/code-unit.service.ts +47 -2
- package/src/services/index.ts +19 -3
- package/src/services/job.service.test.ts +10 -7
- package/src/services/job.service.ts +12 -6
- package/src/services/search.service.ts +15 -9
- package/src/services/services.test.ts +19 -6
- package/src/services/watch.service.test.ts +80 -56
- package/src/services/watch.service.ts +9 -6
- package/dist/chunk-5NUI6JL6.js.map +0 -1
- package/dist/chunk-6FHWC36B.js.map +0 -1
- package/dist/chunk-ZDEO4WJT.js.map +0 -1
- package/dist/chunk-ZZNABJMQ.js.map +0 -1
- package/dist/watch.service-BJV3TI3F.js +0 -7
- /package/dist/{watch.service-BJV3TI3F.js.map → watch.service-OPLKIDFQ.js.map} +0 -0
|
@@ -44,7 +44,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
44
44
|
html,
|
|
45
45
|
'https://example.com'
|
|
46
46
|
);
|
|
47
|
-
|
|
47
|
+
// Function throws on error, so reaching here means success
|
|
48
48
|
});
|
|
49
49
|
|
|
50
50
|
it('should include title from extracted article', async () => {
|
|
@@ -71,7 +71,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
71
71
|
const html = '<html><body><h1>Full HTML</h1></body></html>';
|
|
72
72
|
const result = await convertHtmlToMarkdown(html, 'https://example.com');
|
|
73
73
|
|
|
74
|
-
|
|
74
|
+
// Function throws on error, so reaching here means success
|
|
75
75
|
// Should have processed the full HTML through markdown conversion
|
|
76
76
|
expect(vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks)).toHaveBeenCalledWith(html);
|
|
77
77
|
});
|
|
@@ -92,7 +92,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
92
92
|
const html = '<html><body><h1>Full HTML</h1></body></html>';
|
|
93
93
|
const result = await convertHtmlToMarkdown(html, 'https://example.com');
|
|
94
94
|
|
|
95
|
-
|
|
95
|
+
// Function throws on error, so reaching here means success
|
|
96
96
|
expect(vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks)).toHaveBeenCalledWith(html);
|
|
97
97
|
});
|
|
98
98
|
|
|
@@ -112,7 +112,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
112
112
|
const html = '<html><body><h1>Full HTML</h1></body></html>';
|
|
113
113
|
const result = await convertHtmlToMarkdown(html, 'https://example.com');
|
|
114
114
|
|
|
115
|
-
|
|
115
|
+
// Function throws on error, so reaching here means success
|
|
116
116
|
expect(vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks)).toHaveBeenCalledWith(html);
|
|
117
117
|
});
|
|
118
118
|
|
|
@@ -122,7 +122,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
122
122
|
const html = '<html><body><h1>Full HTML</h1></body></html>';
|
|
123
123
|
const result = await convertHtmlToMarkdown(html, 'https://example.com');
|
|
124
124
|
|
|
125
|
-
|
|
125
|
+
// Function throws on error, so reaching here means success
|
|
126
126
|
expect(vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks)).toHaveBeenCalledWith(html);
|
|
127
127
|
});
|
|
128
128
|
|
|
@@ -192,7 +192,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
192
192
|
|
|
193
193
|
const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
|
|
194
194
|
|
|
195
|
-
|
|
195
|
+
// Function throws on error, so reaching here means success
|
|
196
196
|
expect(result.markdown).toContain('# Heading 1');
|
|
197
197
|
expect(result.markdown).toContain('## Heading 2');
|
|
198
198
|
expect(result.markdown).toContain('### Heading 3');
|
|
@@ -213,7 +213,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
213
213
|
|
|
214
214
|
const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
|
|
215
215
|
|
|
216
|
-
|
|
216
|
+
// Function throws on error, so reaching here means success
|
|
217
217
|
expect(result.markdown).toContain('```');
|
|
218
218
|
});
|
|
219
219
|
|
|
@@ -232,7 +232,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
232
232
|
|
|
233
233
|
const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
|
|
234
234
|
|
|
235
|
-
|
|
235
|
+
// Function throws on error, so reaching here means success
|
|
236
236
|
expect(result.markdown).toContain('[Link Text](https://example.com)');
|
|
237
237
|
});
|
|
238
238
|
|
|
@@ -251,7 +251,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
251
251
|
|
|
252
252
|
const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
|
|
253
253
|
|
|
254
|
-
|
|
254
|
+
// Function throws on error, so reaching here means success
|
|
255
255
|
expect(result.markdown).toContain('|');
|
|
256
256
|
});
|
|
257
257
|
|
|
@@ -270,7 +270,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
270
270
|
|
|
271
271
|
const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
|
|
272
272
|
|
|
273
|
-
|
|
273
|
+
// Function throws on error, so reaching here means success
|
|
274
274
|
expect(result.markdown).toContain('# Heading with Anchor');
|
|
275
275
|
expect(result.markdown).not.toContain('[]()');
|
|
276
276
|
});
|
|
@@ -290,7 +290,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
290
290
|
|
|
291
291
|
const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
|
|
292
292
|
|
|
293
|
-
|
|
293
|
+
// Function throws on error, so reaching here means success
|
|
294
294
|
expect(result.markdown).toContain('# Heading with spaces');
|
|
295
295
|
});
|
|
296
296
|
|
|
@@ -309,62 +309,31 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
309
309
|
|
|
310
310
|
const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
|
|
311
311
|
|
|
312
|
-
|
|
312
|
+
// Function throws on error, so reaching here means success
|
|
313
313
|
// Empty heading should not appear in markdown
|
|
314
314
|
expect(result.markdown).not.toMatch(/^#\s*$/m);
|
|
315
315
|
});
|
|
316
316
|
});
|
|
317
317
|
|
|
318
318
|
describe('Error Handling', () => {
|
|
319
|
-
it('should
|
|
320
|
-
vi.mocked(articleExtractor.extractFromHtml).mockImplementation(() => {
|
|
321
|
-
throw new Error('Fatal conversion error');
|
|
322
|
-
});
|
|
323
|
-
vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks).mockImplementation(() => {
|
|
324
|
-
throw new Error('Fatal conversion error');
|
|
325
|
-
});
|
|
326
|
-
|
|
327
|
-
const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
|
|
328
|
-
|
|
329
|
-
expect(result.success).toBe(false);
|
|
330
|
-
expect(result.markdown).toBe('');
|
|
331
|
-
expect(result.error).toBe('Fatal conversion error');
|
|
332
|
-
});
|
|
333
|
-
|
|
334
|
-
it('should handle non-Error thrown values', async () => {
|
|
335
|
-
vi.mocked(articleExtractor.extractFromHtml).mockImplementation(() => {
|
|
336
|
-
throw 'String error';
|
|
337
|
-
});
|
|
338
|
-
vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks).mockImplementation(() => {
|
|
339
|
-
throw 'String error';
|
|
340
|
-
});
|
|
341
|
-
|
|
342
|
-
const result = await convertHtmlToMarkdown('<html></html>', 'https://example.com');
|
|
343
|
-
|
|
344
|
-
expect(result.success).toBe(false);
|
|
345
|
-
expect(result.error).toBe('String error');
|
|
346
|
-
});
|
|
347
|
-
|
|
348
|
-
it('should return empty markdown on error', async () => {
|
|
319
|
+
it('should throw when conversion fails due to preprocessing error', async () => {
|
|
349
320
|
vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks).mockImplementation(() => {
|
|
350
321
|
throw new Error('Preprocessing failed');
|
|
351
322
|
});
|
|
352
323
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
expect(result.markdown).toBe('');
|
|
324
|
+
await expect(convertHtmlToMarkdown('<html></html>', 'https://example.com')).rejects.toThrow(
|
|
325
|
+
'Preprocessing failed'
|
|
326
|
+
);
|
|
357
327
|
});
|
|
358
328
|
|
|
359
|
-
it('should
|
|
329
|
+
it('should throw with non-Error values wrapped as Error', async () => {
|
|
360
330
|
vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks).mockImplementation(() => {
|
|
361
|
-
throw
|
|
331
|
+
throw 'String error';
|
|
362
332
|
});
|
|
363
333
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
expect(result.title).toBeUndefined();
|
|
334
|
+
await expect(convertHtmlToMarkdown('<html></html>', 'https://example.com')).rejects.toThrow(
|
|
335
|
+
'String error'
|
|
336
|
+
);
|
|
368
337
|
});
|
|
369
338
|
});
|
|
370
339
|
|
|
@@ -372,27 +341,27 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
372
341
|
it('should handle empty HTML string', async () => {
|
|
373
342
|
const result = await convertHtmlToMarkdown('', 'https://example.com');
|
|
374
343
|
|
|
375
|
-
|
|
344
|
+
// Function throws on error, so reaching here means success
|
|
376
345
|
});
|
|
377
346
|
|
|
378
347
|
it('should handle whitespace-only HTML', async () => {
|
|
379
348
|
const result = await convertHtmlToMarkdown(' \n \t ', 'https://example.com');
|
|
380
349
|
|
|
381
|
-
|
|
350
|
+
// Function throws on error, so reaching here means success
|
|
382
351
|
});
|
|
383
352
|
|
|
384
353
|
it('should handle malformed HTML', async () => {
|
|
385
354
|
const html = '<html><body><div><p>Unclosed tags';
|
|
386
355
|
const result = await convertHtmlToMarkdown(html, 'https://example.com');
|
|
387
356
|
|
|
388
|
-
|
|
357
|
+
// Function throws on error, so reaching here means success
|
|
389
358
|
});
|
|
390
359
|
|
|
391
360
|
it('should handle HTML with no content', async () => {
|
|
392
361
|
const html = '<html><head><title>Title</title></head><body></body></html>';
|
|
393
362
|
const result = await convertHtmlToMarkdown(html, 'https://example.com');
|
|
394
363
|
|
|
395
|
-
|
|
364
|
+
// Function throws on error, so reaching here means success
|
|
396
365
|
});
|
|
397
366
|
|
|
398
367
|
it('should handle HTML with only navigation elements', async () => {
|
|
@@ -411,7 +380,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
411
380
|
const html = '<html><nav><a href="/">Home</a></nav></html>';
|
|
412
381
|
const result = await convertHtmlToMarkdown(html, 'https://example.com');
|
|
413
382
|
|
|
414
|
-
|
|
383
|
+
// Function throws on error, so reaching here means success
|
|
415
384
|
});
|
|
416
385
|
});
|
|
417
386
|
|
|
@@ -447,7 +416,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
447
416
|
|
|
448
417
|
const result = await convertHtmlToMarkdown(mkdocsHtml, 'https://example.com/docs');
|
|
449
418
|
|
|
450
|
-
|
|
419
|
+
// Function throws on error, so reaching here means success
|
|
451
420
|
expect(vi.mocked(markdownUtils.preprocessHtmlForCodeBlocks)).toHaveBeenCalledWith(mkdocsHtml);
|
|
452
421
|
});
|
|
453
422
|
|
|
@@ -483,7 +452,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
483
452
|
|
|
484
453
|
const result = await convertHtmlToMarkdown(sphinxHtml, 'https://example.com/docs');
|
|
485
454
|
|
|
486
|
-
|
|
455
|
+
// Function throws on error, so reaching here means success
|
|
487
456
|
});
|
|
488
457
|
|
|
489
458
|
it('should handle nested code blocks with syntax highlighting', async () => {
|
|
@@ -511,7 +480,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
511
480
|
|
|
512
481
|
const result = await convertHtmlToMarkdown(complexHtml, 'https://example.com');
|
|
513
482
|
|
|
514
|
-
|
|
483
|
+
// Function throws on error, so reaching here means success
|
|
515
484
|
});
|
|
516
485
|
|
|
517
486
|
it('should handle documentation with table of contents', async () => {
|
|
@@ -544,7 +513,7 @@ describe('convertHtmlToMarkdown', () => {
|
|
|
544
513
|
|
|
545
514
|
const result = await convertHtmlToMarkdown(htmlWithToc, 'https://example.com');
|
|
546
515
|
|
|
547
|
-
|
|
516
|
+
// Function throws on error, so reaching here means success
|
|
548
517
|
});
|
|
549
518
|
});
|
|
550
519
|
|
|
@@ -14,8 +14,6 @@ const logger = createLogger('article-converter');
|
|
|
14
14
|
export interface ConversionResult {
|
|
15
15
|
markdown: string;
|
|
16
16
|
title?: string;
|
|
17
|
-
success: boolean;
|
|
18
|
-
error?: string;
|
|
19
17
|
}
|
|
20
18
|
|
|
21
19
|
/**
|
|
@@ -128,7 +126,6 @@ export async function convertHtmlToMarkdown(html: string, url: string): Promise<
|
|
|
128
126
|
return {
|
|
129
127
|
markdown,
|
|
130
128
|
...(title !== undefined && { title }),
|
|
131
|
-
success: true,
|
|
132
129
|
};
|
|
133
130
|
} catch (error) {
|
|
134
131
|
logger.error(
|
|
@@ -139,10 +136,7 @@ export async function convertHtmlToMarkdown(html: string, url: string): Promise<
|
|
|
139
136
|
'HTML to markdown conversion failed'
|
|
140
137
|
);
|
|
141
138
|
|
|
142
|
-
return
|
|
143
|
-
|
|
144
|
-
success: false,
|
|
145
|
-
error: error instanceof Error ? error.message : String(error),
|
|
146
|
-
};
|
|
139
|
+
// Re-throw errors - do not return graceful degradation
|
|
140
|
+
throw error instanceof Error ? error : new Error(String(error));
|
|
147
141
|
}
|
|
148
142
|
}
|
package/src/crawl/bridge.test.ts
CHANGED
|
@@ -654,6 +654,20 @@ describe('PythonBridge', () => {
|
|
|
654
654
|
expect(mockProcess.kill).toHaveBeenCalled();
|
|
655
655
|
});
|
|
656
656
|
|
|
657
|
+
it('should close stderr readline interface on stop', async () => {
|
|
658
|
+
await bridge.start();
|
|
659
|
+
await bridge.stop();
|
|
660
|
+
|
|
661
|
+
expect(mockStderrReadline.close).toHaveBeenCalled();
|
|
662
|
+
});
|
|
663
|
+
|
|
664
|
+
it('should close stdout readline interface on stop', async () => {
|
|
665
|
+
await bridge.start();
|
|
666
|
+
await bridge.stop();
|
|
667
|
+
|
|
668
|
+
expect(mockReadline.close).toHaveBeenCalled();
|
|
669
|
+
});
|
|
670
|
+
|
|
657
671
|
it('should set process to null on stop', async () => {
|
|
658
672
|
await bridge.start();
|
|
659
673
|
await bridge.stop();
|
package/src/crawl/bridge.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { spawn, type ChildProcess } from 'node:child_process';
|
|
2
2
|
import { randomUUID } from 'node:crypto';
|
|
3
|
-
import { createInterface } from 'node:readline';
|
|
3
|
+
import { createInterface, type Interface as ReadlineInterface } from 'node:readline';
|
|
4
4
|
import { ZodError } from 'zod';
|
|
5
5
|
import {
|
|
6
6
|
type CrawlResult,
|
|
@@ -31,6 +31,8 @@ export class PythonBridge {
|
|
|
31
31
|
private process: ChildProcess | null = null;
|
|
32
32
|
private readonly pending: Map<string, PendingRequest> = new Map();
|
|
33
33
|
private stoppingIntentionally = false;
|
|
34
|
+
private stdoutReadline: ReadlineInterface | null = null;
|
|
35
|
+
private stderrReadline: ReadlineInterface | null = null;
|
|
34
36
|
|
|
35
37
|
start(): Promise<void> {
|
|
36
38
|
if (this.process) return Promise.resolve();
|
|
@@ -63,8 +65,8 @@ export class PythonBridge {
|
|
|
63
65
|
|
|
64
66
|
// Add stderr logging
|
|
65
67
|
if (this.process.stderr) {
|
|
66
|
-
|
|
67
|
-
|
|
68
|
+
this.stderrReadline = createInterface({ input: this.process.stderr });
|
|
69
|
+
this.stderrReadline.on('line', (line) => {
|
|
68
70
|
logger.warn({ stderr: line }, 'Python bridge stderr output');
|
|
69
71
|
});
|
|
70
72
|
}
|
|
@@ -74,8 +76,8 @@ export class PythonBridge {
|
|
|
74
76
|
this.process = null; // Clean up reference
|
|
75
77
|
return Promise.reject(new Error('Python bridge process stdout is null'));
|
|
76
78
|
}
|
|
77
|
-
|
|
78
|
-
|
|
79
|
+
this.stdoutReadline = createInterface({ input: this.process.stdout });
|
|
80
|
+
this.stdoutReadline.on('line', (line) => {
|
|
79
81
|
// Filter out non-JSON lines (crawl4ai verbose output)
|
|
80
82
|
if (!line.trim().startsWith('{')) {
|
|
81
83
|
return;
|
|
@@ -266,6 +268,16 @@ export class PythonBridge {
|
|
|
266
268
|
this.stoppingIntentionally = true;
|
|
267
269
|
this.rejectAllPending(new Error('Python bridge stopped'));
|
|
268
270
|
|
|
271
|
+
// Close readline interfaces to prevent resource leaks
|
|
272
|
+
if (this.stdoutReadline) {
|
|
273
|
+
this.stdoutReadline.close();
|
|
274
|
+
this.stdoutReadline = null;
|
|
275
|
+
}
|
|
276
|
+
if (this.stderrReadline) {
|
|
277
|
+
this.stderrReadline.close();
|
|
278
|
+
this.stderrReadline = null;
|
|
279
|
+
}
|
|
280
|
+
|
|
269
281
|
// Wait for process to actually exit before resolving
|
|
270
282
|
const proc = this.process;
|
|
271
283
|
if (proc === null) {
|
|
@@ -58,7 +58,6 @@ describe('IntelligentCrawler', () => {
|
|
|
58
58
|
|
|
59
59
|
// Setup convertHtmlToMarkdown mock
|
|
60
60
|
vi.mocked(articleConverter.convertHtmlToMarkdown).mockResolvedValue({
|
|
61
|
-
success: true,
|
|
62
61
|
markdown: '# Test\n\nContent',
|
|
63
62
|
title: 'Test Page',
|
|
64
63
|
});
|
|
@@ -581,24 +580,20 @@ describe('IntelligentCrawler', () => {
|
|
|
581
580
|
});
|
|
582
581
|
});
|
|
583
582
|
|
|
584
|
-
describe('Intelligent Mode
|
|
585
|
-
it('should
|
|
583
|
+
describe('Intelligent Mode Error Handling', () => {
|
|
584
|
+
it('should throw when Claude strategy fails', async () => {
|
|
586
585
|
mockClaudeClient.determineCrawlUrls.mockRejectedValue(new Error('Claude API error'));
|
|
587
|
-
mockPythonBridge.crawl.mockResolvedValue({ pages: [{ links: [] }] });
|
|
588
586
|
|
|
589
587
|
const results = [];
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
588
|
+
await expect(async () => {
|
|
589
|
+
for await (const result of crawler.crawl('https://example.com', {
|
|
590
|
+
crawlInstruction: 'Find all docs',
|
|
591
|
+
})) {
|
|
592
|
+
results.push(result);
|
|
593
|
+
}
|
|
594
|
+
}).rejects.toThrow('Claude API error');
|
|
595
595
|
|
|
596
|
-
|
|
597
|
-
expect(results).toHaveLength(1);
|
|
598
|
-
const errorEvents = progressEvents.filter((e) => e.type === 'error');
|
|
599
|
-
expect(errorEvents.some((e) => e.message?.includes('falling back to simple mode'))).toBe(
|
|
600
|
-
true
|
|
601
|
-
);
|
|
596
|
+
expect(results).toHaveLength(0);
|
|
602
597
|
});
|
|
603
598
|
});
|
|
604
599
|
|
|
@@ -623,22 +618,21 @@ describe('IntelligentCrawler', () => {
|
|
|
623
618
|
);
|
|
624
619
|
});
|
|
625
620
|
|
|
626
|
-
it('should
|
|
621
|
+
it('should throw when extraction fails', async () => {
|
|
627
622
|
mockClaudeClient.extractContent.mockRejectedValue(new Error('Extraction failed'));
|
|
628
623
|
mockPythonBridge.crawl.mockResolvedValue({ pages: [{ links: [] }] });
|
|
629
624
|
|
|
630
625
|
const results = [];
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
626
|
+
await expect(async () => {
|
|
627
|
+
for await (const result of crawler.crawl('https://example.com', {
|
|
628
|
+
simple: true,
|
|
629
|
+
extractInstruction: 'Extract pricing',
|
|
630
|
+
})) {
|
|
631
|
+
results.push(result);
|
|
632
|
+
}
|
|
633
|
+
}).rejects.toThrow('Extraction failed');
|
|
637
634
|
|
|
638
|
-
expect(results).toHaveLength(
|
|
639
|
-
expect(results[0]?.extracted).toBeUndefined();
|
|
640
|
-
const errorEvents = progressEvents.filter((e) => e.type === 'error');
|
|
641
|
-
expect(errorEvents.some((e) => e.message?.includes('storing raw markdown'))).toBe(true);
|
|
635
|
+
expect(results).toHaveLength(0);
|
|
642
636
|
});
|
|
643
637
|
|
|
644
638
|
it('should not extract when extractInstruction is empty', async () => {
|
|
@@ -824,7 +818,6 @@ describe('IntelligentCrawler', () => {
|
|
|
824
818
|
it('should include title when available', async () => {
|
|
825
819
|
mockPythonBridge.crawl.mockResolvedValue({ pages: [{ links: [] }] });
|
|
826
820
|
vi.mocked(articleConverter.convertHtmlToMarkdown).mockResolvedValue({
|
|
827
|
-
success: true,
|
|
828
821
|
markdown: '# Test',
|
|
829
822
|
title: 'Test Page Title',
|
|
830
823
|
});
|
|
@@ -839,11 +832,9 @@ describe('IntelligentCrawler', () => {
|
|
|
839
832
|
|
|
840
833
|
it('should handle conversion failures', async () => {
|
|
841
834
|
mockPythonBridge.crawl.mockResolvedValue({ pages: [{ links: [] }] });
|
|
842
|
-
vi.mocked(articleConverter.convertHtmlToMarkdown).
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
error: 'Conversion error',
|
|
846
|
-
});
|
|
835
|
+
vi.mocked(articleConverter.convertHtmlToMarkdown).mockRejectedValue(
|
|
836
|
+
new Error('Conversion error')
|
|
837
|
+
);
|
|
847
838
|
|
|
848
839
|
const results = [];
|
|
849
840
|
for await (const result of crawler.crawl('https://example.com', { simple: true })) {
|
|
@@ -905,69 +896,67 @@ describe('IntelligentCrawler', () => {
|
|
|
905
896
|
});
|
|
906
897
|
});
|
|
907
898
|
|
|
908
|
-
describe('
|
|
909
|
-
it('should
|
|
910
|
-
|
|
911
|
-
vi.mocked(ClaudeClient.isAvailable).mockReturnValue(false);
|
|
899
|
+
describe('Headless Mode Error Handling', () => {
|
|
900
|
+
it('should throw when headless fetch fails', async () => {
|
|
901
|
+
mockPythonBridge.fetchHeadless.mockRejectedValue(new Error('Browser crashed'));
|
|
912
902
|
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
903
|
+
const results = [];
|
|
904
|
+
await expect(async () => {
|
|
905
|
+
for await (const result of crawler.crawl('https://example.com', {
|
|
906
|
+
simple: true,
|
|
907
|
+
useHeadless: true,
|
|
908
|
+
})) {
|
|
909
|
+
results.push(result);
|
|
910
|
+
}
|
|
911
|
+
}).rejects.toThrow('Headless fetch failed: Browser crashed');
|
|
917
912
|
|
|
918
|
-
|
|
913
|
+
expect(results).toHaveLength(0);
|
|
914
|
+
// Should not have fallen back to axios
|
|
915
|
+
expect(axios.get).not.toHaveBeenCalled();
|
|
916
|
+
});
|
|
917
|
+
});
|
|
919
918
|
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
results.push(result);
|
|
925
|
-
}
|
|
919
|
+
describe('Claude CLI Not Installed', () => {
|
|
920
|
+
it('should throw when intelligent mode requested but Claude CLI not available', async () => {
|
|
921
|
+
// Simulate npm package usage without Claude Code installed
|
|
922
|
+
vi.mocked(ClaudeClient.isAvailable).mockReturnValue(false);
|
|
926
923
|
|
|
927
|
-
|
|
928
|
-
expect(results.length).toBeGreaterThan(0);
|
|
929
|
-
expect(results[0]?.url).toBe('https://example.com');
|
|
924
|
+
const results: { url: string }[] = [];
|
|
930
925
|
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
926
|
+
await expect(async () => {
|
|
927
|
+
for await (const result of crawler.crawl('https://example.com', {
|
|
928
|
+
crawlInstruction: 'Find all documentation pages', // Requires intelligent mode
|
|
929
|
+
maxPages: 5,
|
|
930
|
+
})) {
|
|
931
|
+
results.push(result);
|
|
932
|
+
}
|
|
933
|
+
}).rejects.toThrow('Claude CLI not available');
|
|
937
934
|
|
|
938
|
-
|
|
935
|
+
expect(results).toHaveLength(0);
|
|
939
936
|
expect(mockClaudeClient.determineCrawlUrls).not.toHaveBeenCalled();
|
|
940
937
|
});
|
|
941
938
|
|
|
942
|
-
it('should
|
|
939
|
+
it('should throw when extraction requested but Claude CLI not available', async () => {
|
|
943
940
|
// Simulate npm package usage without Claude Code installed
|
|
944
941
|
vi.mocked(ClaudeClient.isAvailable).mockReturnValue(false);
|
|
945
942
|
|
|
946
|
-
// Setup for simple mode
|
|
947
943
|
mockPythonBridge.crawl.mockResolvedValue({
|
|
948
944
|
pages: [{ links: [] }],
|
|
949
945
|
});
|
|
950
946
|
|
|
951
947
|
const results: { url: string; extracted?: string }[] = [];
|
|
952
948
|
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
expect(results[0]?.extracted).toBeUndefined(); // Should not have extracted
|
|
963
|
-
|
|
964
|
-
// Should have emitted skip extraction progress event
|
|
965
|
-
const skipEvent = progressEvents.find(
|
|
966
|
-
(e) => e.type === 'error' && e.message?.includes('Skipping extraction')
|
|
967
|
-
);
|
|
968
|
-
expect(skipEvent).toBeDefined();
|
|
949
|
+
await expect(async () => {
|
|
950
|
+
for await (const result of crawler.crawl('https://example.com', {
|
|
951
|
+
simple: true,
|
|
952
|
+
extractInstruction: 'Extract pricing info', // Requires Claude
|
|
953
|
+
maxPages: 1,
|
|
954
|
+
})) {
|
|
955
|
+
results.push(result);
|
|
956
|
+
}
|
|
957
|
+
}).rejects.toThrow('Claude CLI not available');
|
|
969
958
|
|
|
970
|
-
|
|
959
|
+
expect(results).toHaveLength(0);
|
|
971
960
|
expect(mockClaudeClient.extractContent).not.toHaveBeenCalled();
|
|
972
961
|
});
|
|
973
962
|
});
|