bluera-knowledge 0.9.32 → 0.9.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/.claude/hooks/post-edit-check.sh +5 -3
  2. package/.claude/skills/atomic-commits/SKILL.md +3 -1
  3. package/.husky/pre-commit +3 -2
  4. package/.prettierrc +9 -0
  5. package/.versionrc.json +1 -1
  6. package/CHANGELOG.md +33 -0
  7. package/CLAUDE.md +6 -0
  8. package/README.md +25 -13
  9. package/bun.lock +277 -33
  10. package/dist/{chunk-L2YVNC63.js → chunk-6FHWC36B.js} +9 -1
  11. package/dist/chunk-6FHWC36B.js.map +1 -0
  12. package/dist/{chunk-RST4XGRL.js → chunk-DC7CGSGT.js} +288 -241
  13. package/dist/chunk-DC7CGSGT.js.map +1 -0
  14. package/dist/{chunk-6PBP5DVD.js → chunk-WFNPNAAP.js} +3212 -3054
  15. package/dist/chunk-WFNPNAAP.js.map +1 -0
  16. package/dist/{chunk-WT2DAEO7.js → chunk-Z2KKVH45.js} +548 -482
  17. package/dist/chunk-Z2KKVH45.js.map +1 -0
  18. package/dist/index.js +871 -758
  19. package/dist/index.js.map +1 -1
  20. package/dist/mcp/server.js +3 -3
  21. package/dist/watch.service-BJV3TI3F.js +7 -0
  22. package/dist/workers/background-worker-cli.js +46 -45
  23. package/dist/workers/background-worker-cli.js.map +1 -1
  24. package/eslint.config.js +43 -1
  25. package/package.json +18 -11
  26. package/plugin.json +8 -0
  27. package/python/requirements.txt +1 -1
  28. package/src/analysis/ast-parser.test.ts +12 -11
  29. package/src/analysis/ast-parser.ts +28 -22
  30. package/src/analysis/code-graph.test.ts +52 -62
  31. package/src/analysis/code-graph.ts +9 -13
  32. package/src/analysis/dependency-usage-analyzer.test.ts +91 -271
  33. package/src/analysis/dependency-usage-analyzer.ts +52 -24
  34. package/src/analysis/go-ast-parser.test.ts +22 -22
  35. package/src/analysis/go-ast-parser.ts +18 -25
  36. package/src/analysis/parser-factory.test.ts +9 -9
  37. package/src/analysis/parser-factory.ts +3 -3
  38. package/src/analysis/python-ast-parser.test.ts +27 -27
  39. package/src/analysis/python-ast-parser.ts +2 -2
  40. package/src/analysis/repo-url-resolver.test.ts +82 -82
  41. package/src/analysis/rust-ast-parser.test.ts +19 -19
  42. package/src/analysis/rust-ast-parser.ts +17 -27
  43. package/src/analysis/tree-sitter-parser.test.ts +3 -3
  44. package/src/analysis/tree-sitter-parser.ts +10 -16
  45. package/src/cli/commands/crawl.test.ts +40 -24
  46. package/src/cli/commands/crawl.ts +186 -166
  47. package/src/cli/commands/index-cmd.test.ts +90 -90
  48. package/src/cli/commands/index-cmd.ts +52 -36
  49. package/src/cli/commands/mcp.test.ts +6 -6
  50. package/src/cli/commands/mcp.ts +2 -2
  51. package/src/cli/commands/plugin-api.test.ts +16 -18
  52. package/src/cli/commands/plugin-api.ts +9 -6
  53. package/src/cli/commands/search.test.ts +16 -7
  54. package/src/cli/commands/search.ts +124 -87
  55. package/src/cli/commands/serve.test.ts +67 -25
  56. package/src/cli/commands/serve.ts +18 -3
  57. package/src/cli/commands/setup.test.ts +176 -101
  58. package/src/cli/commands/setup.ts +140 -117
  59. package/src/cli/commands/store.test.ts +82 -53
  60. package/src/cli/commands/store.ts +56 -37
  61. package/src/cli/program.ts +2 -2
  62. package/src/crawl/article-converter.test.ts +4 -1
  63. package/src/crawl/article-converter.ts +46 -31
  64. package/src/crawl/bridge.test.ts +240 -132
  65. package/src/crawl/bridge.ts +87 -30
  66. package/src/crawl/claude-client.test.ts +124 -56
  67. package/src/crawl/claude-client.ts +7 -15
  68. package/src/crawl/intelligent-crawler.test.ts +65 -22
  69. package/src/crawl/intelligent-crawler.ts +86 -53
  70. package/src/crawl/markdown-utils.ts +1 -4
  71. package/src/db/embeddings.ts +4 -6
  72. package/src/db/lance.test.ts +4 -4
  73. package/src/db/lance.ts +16 -12
  74. package/src/index.ts +26 -17
  75. package/src/logging/index.ts +1 -5
  76. package/src/logging/logger.ts +3 -5
  77. package/src/logging/payload.test.ts +1 -1
  78. package/src/logging/payload.ts +3 -5
  79. package/src/mcp/commands/index.ts +2 -2
  80. package/src/mcp/commands/job.commands.ts +12 -18
  81. package/src/mcp/commands/meta.commands.ts +13 -13
  82. package/src/mcp/commands/registry.ts +5 -8
  83. package/src/mcp/commands/store.commands.ts +19 -19
  84. package/src/mcp/handlers/execute.handler.test.ts +10 -10
  85. package/src/mcp/handlers/execute.handler.ts +4 -5
  86. package/src/mcp/handlers/index.ts +10 -14
  87. package/src/mcp/handlers/job.handler.test.ts +10 -10
  88. package/src/mcp/handlers/job.handler.ts +22 -25
  89. package/src/mcp/handlers/search.handler.test.ts +36 -65
  90. package/src/mcp/handlers/search.handler.ts +135 -104
  91. package/src/mcp/handlers/store.handler.test.ts +41 -52
  92. package/src/mcp/handlers/store.handler.ts +108 -88
  93. package/src/mcp/schemas/index.test.ts +73 -68
  94. package/src/mcp/schemas/index.ts +18 -12
  95. package/src/mcp/server.test.ts +1 -1
  96. package/src/mcp/server.ts +59 -46
  97. package/src/plugin/commands.test.ts +230 -95
  98. package/src/plugin/commands.ts +24 -25
  99. package/src/plugin/dependency-analyzer.test.ts +52 -52
  100. package/src/plugin/dependency-analyzer.ts +85 -22
  101. package/src/plugin/git-clone.test.ts +24 -13
  102. package/src/plugin/git-clone.ts +3 -7
  103. package/src/server/app.test.ts +109 -109
  104. package/src/server/app.ts +32 -23
  105. package/src/server/index.test.ts +64 -66
  106. package/src/services/chunking.service.test.ts +32 -32
  107. package/src/services/chunking.service.ts +16 -9
  108. package/src/services/code-graph.service.test.ts +30 -36
  109. package/src/services/code-graph.service.ts +24 -10
  110. package/src/services/code-unit.service.test.ts +55 -11
  111. package/src/services/code-unit.service.ts +85 -11
  112. package/src/services/config.service.test.ts +37 -18
  113. package/src/services/config.service.ts +30 -7
  114. package/src/services/index.service.test.ts +49 -18
  115. package/src/services/index.service.ts +98 -48
  116. package/src/services/index.ts +6 -9
  117. package/src/services/job.service.test.ts +22 -22
  118. package/src/services/job.service.ts +18 -18
  119. package/src/services/project-root.service.test.ts +1 -3
  120. package/src/services/search.service.test.ts +248 -120
  121. package/src/services/search.service.ts +286 -156
  122. package/src/services/services.test.ts +1 -1
  123. package/src/services/snippet.service.test.ts +14 -6
  124. package/src/services/snippet.service.ts +7 -5
  125. package/src/services/store.service.test.ts +68 -29
  126. package/src/services/store.service.ts +41 -12
  127. package/src/services/watch.service.test.ts +34 -14
  128. package/src/services/watch.service.ts +11 -1
  129. package/src/types/brands.test.ts +3 -1
  130. package/src/types/index.ts +2 -13
  131. package/src/types/search.ts +10 -8
  132. package/src/utils/type-guards.test.ts +20 -15
  133. package/src/utils/type-guards.ts +1 -1
  134. package/src/workers/background-worker-cli.ts +2 -2
  135. package/src/workers/background-worker.test.ts +54 -40
  136. package/src/workers/background-worker.ts +76 -60
  137. package/src/workers/spawn-worker.test.ts +22 -10
  138. package/src/workers/spawn-worker.ts +6 -6
  139. package/tests/analysis/ast-parser.test.ts +3 -3
  140. package/tests/analysis/code-graph.test.ts +5 -5
  141. package/tests/fixtures/code-snippets/api/error-handling.ts +4 -15
  142. package/tests/fixtures/code-snippets/api/rest-controller.ts +3 -9
  143. package/tests/fixtures/code-snippets/auth/jwt-auth.ts +5 -21
  144. package/tests/fixtures/code-snippets/auth/oauth-flow.ts +4 -4
  145. package/tests/fixtures/code-snippets/database/repository-pattern.ts +11 -3
  146. package/tests/fixtures/corpus/oss-repos/hono/src/adapter/aws-lambda/handler.ts +2 -2
  147. package/tests/fixtures/corpus/oss-repos/hono/src/adapter/cloudflare-pages/handler.ts +1 -1
  148. package/tests/fixtures/corpus/oss-repos/hono/src/adapter/cloudflare-workers/serve-static.ts +2 -2
  149. package/tests/fixtures/corpus/oss-repos/hono/src/client/client.ts +2 -2
  150. package/tests/fixtures/corpus/oss-repos/hono/src/client/types.ts +22 -20
  151. package/tests/fixtures/corpus/oss-repos/hono/src/context.ts +13 -10
  152. package/tests/fixtures/corpus/oss-repos/hono/src/helper/accepts/accepts.ts +10 -7
  153. package/tests/fixtures/corpus/oss-repos/hono/src/helper/adapter/index.ts +2 -2
  154. package/tests/fixtures/corpus/oss-repos/hono/src/helper/css/index.ts +1 -1
  155. package/tests/fixtures/corpus/oss-repos/hono/src/helper/factory/index.ts +16 -16
  156. package/tests/fixtures/corpus/oss-repos/hono/src/helper/ssg/ssg.ts +2 -2
  157. package/tests/fixtures/corpus/oss-repos/hono/src/hono-base.ts +3 -3
  158. package/tests/fixtures/corpus/oss-repos/hono/src/hono.ts +1 -1
  159. package/tests/fixtures/corpus/oss-repos/hono/src/jsx/dom/css.ts +2 -2
  160. package/tests/fixtures/corpus/oss-repos/hono/src/jsx/dom/intrinsic-element/components.ts +1 -1
  161. package/tests/fixtures/corpus/oss-repos/hono/src/jsx/dom/render.ts +7 -7
  162. package/tests/fixtures/corpus/oss-repos/hono/src/jsx/hooks/index.ts +3 -3
  163. package/tests/fixtures/corpus/oss-repos/hono/src/jsx/intrinsic-element/components.ts +1 -1
  164. package/tests/fixtures/corpus/oss-repos/hono/src/jsx/utils.ts +6 -6
  165. package/tests/fixtures/corpus/oss-repos/hono/src/middleware/jsx-renderer/index.ts +3 -3
  166. package/tests/fixtures/corpus/oss-repos/hono/src/middleware/serve-static/index.ts +1 -1
  167. package/tests/fixtures/corpus/oss-repos/hono/src/preset/quick.ts +1 -1
  168. package/tests/fixtures/corpus/oss-repos/hono/src/preset/tiny.ts +1 -1
  169. package/tests/fixtures/corpus/oss-repos/hono/src/router/pattern-router/router.ts +2 -2
  170. package/tests/fixtures/corpus/oss-repos/hono/src/router/reg-exp-router/node.ts +4 -4
  171. package/tests/fixtures/corpus/oss-repos/hono/src/router/reg-exp-router/router.ts +1 -1
  172. package/tests/fixtures/corpus/oss-repos/hono/src/router/trie-router/node.ts +1 -1
  173. package/tests/fixtures/corpus/oss-repos/hono/src/types.ts +166 -169
  174. package/tests/fixtures/corpus/oss-repos/hono/src/utils/body.ts +8 -8
  175. package/tests/fixtures/corpus/oss-repos/hono/src/utils/color.ts +3 -3
  176. package/tests/fixtures/corpus/oss-repos/hono/src/utils/cookie.ts +2 -2
  177. package/tests/fixtures/corpus/oss-repos/hono/src/utils/encode.ts +2 -2
  178. package/tests/fixtures/corpus/oss-repos/hono/src/utils/types.ts +30 -33
  179. package/tests/fixtures/corpus/oss-repos/hono/src/validator/validator.ts +2 -2
  180. package/tests/fixtures/test-server.ts +3 -2
  181. package/tests/helpers/performance-metrics.ts +8 -25
  182. package/tests/helpers/search-relevance.ts +14 -69
  183. package/tests/integration/cli-consistency.test.ts +5 -4
  184. package/tests/integration/python-bridge.test.ts +13 -3
  185. package/tests/mcp/server.test.ts +1 -1
  186. package/tests/services/code-unit.service.test.ts +48 -0
  187. package/tests/services/job.service.test.ts +124 -0
  188. package/tests/services/search.progressive-context.test.ts +2 -2
  189. package/.claude-plugin/plugin.json +0 -13
  190. package/dist/chunk-6PBP5DVD.js.map +0 -1
  191. package/dist/chunk-L2YVNC63.js.map +0 -1
  192. package/dist/chunk-RST4XGRL.js.map +0 -1
  193. package/dist/chunk-WT2DAEO7.js.map +0 -1
  194. package/dist/watch.service-YAIKKDCF.js +0 -7
  195. package/skills/atomic-commits/SKILL.md +0 -77
  196. /package/dist/{watch.service-YAIKKDCF.js.map → watch.service-BJV3TI3F.js.map} +0 -0
@@ -35,7 +35,9 @@ describe('IntelligentCrawler', () => {
35
35
  determineCrawlUrls: vi.fn(),
36
36
  extractContent: vi.fn(),
37
37
  };
38
- vi.mocked(ClaudeClient).mockImplementation(function() { return mockClaudeClient; });
38
+ vi.mocked(ClaudeClient).mockImplementation(function () {
39
+ return mockClaudeClient;
40
+ });
39
41
  // Mock static isAvailable to return true (Claude CLI is available in tests)
40
42
  vi.mocked(ClaudeClient.isAvailable).mockReturnValue(true);
41
43
 
@@ -45,7 +47,9 @@ describe('IntelligentCrawler', () => {
45
47
  fetchHeadless: vi.fn(),
46
48
  stop: vi.fn().mockResolvedValue(undefined),
47
49
  };
48
- vi.mocked(PythonBridge).mockImplementation(function() { return mockPythonBridge; });
50
+ vi.mocked(PythonBridge).mockImplementation(function () {
51
+ return mockPythonBridge;
52
+ });
49
53
 
50
54
  // Setup axios mock
51
55
  vi.mocked(axios.get).mockResolvedValue({
@@ -228,8 +232,9 @@ describe('IntelligentCrawler', () => {
228
232
  pages: [{ links: [circular] }], // Link back to itself
229
233
  });
230
234
 
231
- vi.mocked(axios.get)
232
- .mockResolvedValueOnce({ data: `<html><body><a href="${circular}">Self</a></body></html>` });
235
+ vi.mocked(axios.get).mockResolvedValueOnce({
236
+ data: `<html><body><a href="${circular}">Self</a></body></html>`,
237
+ });
233
238
 
234
239
  const results = [];
235
240
  for await (const result of crawler.crawl(circular, { simple: true, maxPages: 10 })) {
@@ -324,7 +329,10 @@ describe('IntelligentCrawler', () => {
324
329
  mockPythonBridge.crawl.mockResolvedValue({ pages: [{ links: [] }] });
325
330
 
326
331
  const results = [];
327
- for await (const result of crawler.crawl('https://example.com', { simple: true, maxPages: 10 })) {
332
+ for await (const result of crawler.crawl('https://example.com', {
333
+ simple: true,
334
+ maxPages: 10,
335
+ })) {
328
336
  results.push(result);
329
337
  }
330
338
 
@@ -418,7 +426,10 @@ describe('IntelligentCrawler', () => {
418
426
 
419
427
  const results = [];
420
428
  const crawlPromise = (async () => {
421
- for await (const result of crawler.crawl('https://example.com', { simple: true, maxPages: 100 })) {
429
+ for await (const result of crawler.crawl('https://example.com', {
430
+ simple: true,
431
+ maxPages: 100,
432
+ })) {
422
433
  results.push(result);
423
434
  }
424
435
  })();
@@ -455,7 +466,10 @@ describe('IntelligentCrawler', () => {
455
466
  mockPythonBridge.crawl.mockResolvedValue({ pages: [{ links }] });
456
467
 
457
468
  const results = [];
458
- for await (const result of crawler.crawl('https://example.com', { simple: true, maxPages: 3 })) {
469
+ for await (const result of crawler.crawl('https://example.com', {
470
+ simple: true,
471
+ maxPages: 3,
472
+ })) {
459
473
  results.push(result);
460
474
  }
461
475
 
@@ -513,7 +527,10 @@ describe('IntelligentCrawler', () => {
513
527
  .mockResolvedValueOnce({ data: '<html><body>Page1</body></html>' });
514
528
 
515
529
  const results = [];
516
- for await (const result of crawler.crawl('https://example.com', { simple: true, maxPages: 10 })) {
530
+ for await (const result of crawler.crawl('https://example.com', {
531
+ simple: true,
532
+ maxPages: 10,
533
+ })) {
517
534
  results.push(result);
518
535
  }
519
536
 
@@ -534,7 +551,10 @@ describe('IntelligentCrawler', () => {
534
551
  .mockResolvedValueOnce({ data: '<html><body>Page1</body></html>' });
535
552
 
536
553
  const results = [];
537
- for await (const result of crawler.crawl('https://example.com', { simple: true, maxPages: 10 })) {
554
+ for await (const result of crawler.crawl('https://example.com', {
555
+ simple: true,
556
+ maxPages: 10,
557
+ })) {
538
558
  results.push(result);
539
559
  }
540
560
 
@@ -546,11 +566,7 @@ describe('IntelligentCrawler', () => {
546
566
  mockPythonBridge.crawl.mockResolvedValue({
547
567
  pages: [
548
568
  {
549
- links: [
550
- 'not-a-valid-url',
551
- 'javascript:void(0)',
552
- 'mailto:test@example.com',
553
- ],
569
+ links: ['not-a-valid-url', 'javascript:void(0)', 'mailto:test@example.com'],
554
570
  },
555
571
  ],
556
572
  });
@@ -580,7 +596,9 @@ describe('IntelligentCrawler', () => {
580
596
  // Should still crawl using simple mode
581
597
  expect(results).toHaveLength(1);
582
598
  const errorEvents = progressEvents.filter((e) => e.type === 'error');
583
- expect(errorEvents.some((e) => e.message?.includes('falling back to simple mode'))).toBe(true);
599
+ expect(errorEvents.some((e) => e.message?.includes('falling back to simple mode'))).toBe(
600
+ true
601
+ );
584
602
  });
585
603
  });
586
604
 
@@ -601,7 +619,7 @@ describe('IntelligentCrawler', () => {
601
619
  expect(results[0]?.extracted).toBe('Extracted pricing info');
602
620
  expect(mockClaudeClient.extractContent).toHaveBeenCalledWith(
603
621
  expect.any(String),
604
- 'Extract pricing',
622
+ 'Extract pricing'
605
623
  );
606
624
  });
607
625
 
@@ -700,8 +718,30 @@ describe('IntelligentCrawler', () => {
700
718
  html: '<html/>',
701
719
  markdown: 'test',
702
720
  links: [
703
- { href: 'https://example.com/page1', text: 'Page 1', title: '', base_domain: 'example.com', head_data: null, head_extraction_status: null, head_extraction_error: null, intrinsic_score: 0, contextual_score: null, total_score: null },
704
- { href: 'https://example.com/page2', text: 'Page 2', title: '', base_domain: 'example.com', head_data: null, head_extraction_status: null, head_extraction_error: null, intrinsic_score: 0, contextual_score: null, total_score: null },
721
+ {
722
+ href: 'https://example.com/page1',
723
+ text: 'Page 1',
724
+ title: '',
725
+ base_domain: 'example.com',
726
+ head_data: null,
727
+ head_extraction_status: null,
728
+ head_extraction_error: null,
729
+ intrinsic_score: 0,
730
+ contextual_score: null,
731
+ total_score: null,
732
+ },
733
+ {
734
+ href: 'https://example.com/page2',
735
+ text: 'Page 2',
736
+ title: '',
737
+ base_domain: 'example.com',
738
+ head_data: null,
739
+ head_extraction_status: null,
740
+ head_extraction_error: null,
741
+ intrinsic_score: 0,
742
+ contextual_score: null,
743
+ total_score: null,
744
+ },
705
745
  'https://example.com/page3', // Also support plain strings
706
746
  ],
707
747
  };
@@ -776,7 +816,7 @@ describe('IntelligentCrawler', () => {
776
816
 
777
817
  expect(vi.mocked(articleConverter.convertHtmlToMarkdown)).toHaveBeenCalledWith(
778
818
  expect.any(String),
779
- 'https://example.com',
819
+ 'https://example.com'
780
820
  );
781
821
  expect(results[0]?.markdown).toBe('# Test\n\nContent');
782
822
  });
@@ -827,7 +867,10 @@ describe('IntelligentCrawler', () => {
827
867
  vi.mocked(axios.get).mockResolvedValue({ data: '<html><body>Test</body></html>' });
828
868
 
829
869
  const results = [];
830
- for await (const result of crawler.crawl('https://example.com', { simple: true, maxPages: 10 })) {
870
+ for await (const result of crawler.crawl('https://example.com', {
871
+ simple: true,
872
+ maxPages: 10,
873
+ })) {
831
874
  results.push(result);
832
875
  }
833
876
 
@@ -887,7 +930,7 @@ describe('IntelligentCrawler', () => {
887
930
 
888
931
  // Should have emitted progress event about mode switch
889
932
  const modeEvent = progressEvents.find(
890
- e => e.type === 'error' && e.message?.includes('Claude CLI not found')
933
+ (e) => e.type === 'error' && e.message?.includes('Claude CLI not found')
891
934
  );
892
935
  expect(modeEvent).toBeDefined();
893
936
  expect(modeEvent?.message).toContain('using simple crawl mode');
@@ -920,7 +963,7 @@ describe('IntelligentCrawler', () => {
920
963
 
921
964
  // Should have emitted skip extraction progress event
922
965
  const skipEvent = progressEvents.find(
923
- e => e.type === 'error' && e.message?.includes('Skipping extraction')
966
+ (e) => e.type === 'error' && e.message?.includes('Skipping extraction')
924
967
  );
925
968
  expect(skipEvent).toBeDefined();
926
969
 
@@ -5,9 +5,9 @@
5
5
 
6
6
  import { EventEmitter } from 'node:events';
7
7
  import axios from 'axios';
8
- import { ClaudeClient, type CrawlStrategy } from './claude-client.js';
9
8
  import { convertHtmlToMarkdown } from './article-converter.js';
10
9
  import { PythonBridge, type CrawledLink } from './bridge.js';
10
+ import { ClaudeClient, type CrawlStrategy } from './claude-client.js';
11
11
  import { createLogger, summarizePayload } from '../logging/index.js';
12
12
 
13
13
  const logger = createLogger('crawler');
@@ -58,26 +58,25 @@ export class IntelligentCrawler extends EventEmitter {
58
58
  /**
59
59
  * Crawl a website with intelligent or simple mode
60
60
  */
61
- async *crawl(
62
- seedUrl: string,
63
- options: CrawlOptions = {},
64
- ): AsyncIterable<CrawlResult> {
65
- const {
66
- crawlInstruction,
67
- extractInstruction,
68
- maxPages = 50,
69
- simple = false,
70
- } = options;
61
+ async *crawl(seedUrl: string, options: CrawlOptions = {}): AsyncIterable<CrawlResult> {
62
+ const { crawlInstruction, extractInstruction, maxPages = 50, simple = false } = options;
71
63
 
72
64
  this.visited.clear();
73
65
  this.stopped = false;
74
66
 
75
- logger.info({
76
- seedUrl,
77
- maxPages,
78
- mode: simple ? 'simple' : (crawlInstruction !== undefined && crawlInstruction !== '' ? 'intelligent' : 'simple'),
79
- hasExtractInstruction: extractInstruction !== undefined,
80
- }, 'Starting crawl');
67
+ logger.info(
68
+ {
69
+ seedUrl,
70
+ maxPages,
71
+ mode: simple
72
+ ? 'simple'
73
+ : crawlInstruction !== undefined && crawlInstruction !== ''
74
+ ? 'intelligent'
75
+ : 'simple',
76
+ hasExtractInstruction: extractInstruction !== undefined,
77
+ },
78
+ 'Starting crawl'
79
+ );
81
80
 
82
81
  const startProgress: CrawlProgress = {
83
82
  type: 'start',
@@ -91,15 +90,24 @@ export class IntelligentCrawler extends EventEmitter {
91
90
 
92
91
  if (useIntelligentMode) {
93
92
  // TypeScript knows crawlInstruction is defined here due to useIntelligentMode check
94
- yield* this.crawlIntelligent(seedUrl, crawlInstruction, extractInstruction, maxPages, options.useHeadless ?? false);
93
+ yield* this.crawlIntelligent(
94
+ seedUrl,
95
+ crawlInstruction,
96
+ extractInstruction,
97
+ maxPages,
98
+ options.useHeadless ?? false
99
+ );
95
100
  } else {
96
101
  yield* this.crawlSimple(seedUrl, extractInstruction, maxPages, options.useHeadless ?? false);
97
102
  }
98
103
 
99
- logger.info({
100
- seedUrl,
101
- pagesVisited: this.visited.size,
102
- }, 'Crawl complete');
104
+ logger.info(
105
+ {
106
+ seedUrl,
107
+ pagesVisited: this.visited.size,
108
+ },
109
+ 'Crawl complete'
110
+ );
103
111
 
104
112
  const completeProgress: CrawlProgress = {
105
113
  type: 'complete',
@@ -117,7 +125,7 @@ export class IntelligentCrawler extends EventEmitter {
117
125
  crawlInstruction: string,
118
126
  extractInstruction: string | undefined,
119
127
  maxPages: number,
120
- useHeadless: boolean = false,
128
+ useHeadless: boolean = false
121
129
  ): AsyncIterable<CrawlResult> {
122
130
  // Check if Claude CLI is available before attempting intelligent mode
123
131
  if (!ClaudeClient.isAvailable()) {
@@ -125,7 +133,8 @@ export class IntelligentCrawler extends EventEmitter {
125
133
  type: 'error',
126
134
  pagesVisited: 0,
127
135
  totalPages: maxPages,
128
- message: 'Claude CLI not found, using simple crawl mode (install Claude Code for intelligent crawling)',
136
+ message:
137
+ 'Claude CLI not found, using simple crawl mode (install Claude Code for intelligent crawling)',
129
138
  error: new Error('Claude CLI not available'),
130
139
  };
131
140
  this.emit('progress', fallbackProgress);
@@ -181,7 +190,12 @@ export class IntelligentCrawler extends EventEmitter {
181
190
  if (this.visited.has(url)) continue;
182
191
 
183
192
  try {
184
- const result = await this.crawlSinglePage(url, extractInstruction, pagesVisited, useHeadless);
193
+ const result = await this.crawlSinglePage(
194
+ url,
195
+ extractInstruction,
196
+ pagesVisited,
197
+ useHeadless
198
+ );
185
199
  pagesVisited++;
186
200
  yield result;
187
201
  } catch (error) {
@@ -204,7 +218,7 @@ export class IntelligentCrawler extends EventEmitter {
204
218
  seedUrl: string,
205
219
  extractInstruction: string | undefined,
206
220
  maxPages: number,
207
- useHeadless: boolean = false,
221
+ useHeadless: boolean = false
208
222
  ): AsyncIterable<CrawlResult> {
209
223
  const queue: Array<{ url: string; depth: number }> = [{ url: seedUrl, depth: 0 }];
210
224
  const maxDepth = 2; // Default depth limit for simple mode
@@ -222,7 +236,7 @@ export class IntelligentCrawler extends EventEmitter {
222
236
  current.url,
223
237
  extractInstruction,
224
238
  pagesVisited,
225
- useHeadless,
239
+ useHeadless
226
240
  );
227
241
  result.depth = current.depth;
228
242
  pagesVisited++;
@@ -237,7 +251,10 @@ export class IntelligentCrawler extends EventEmitter {
237
251
  if (links.length === 0) {
238
252
  logger.debug({ url: current.url }, 'No links found - page may be a leaf node');
239
253
  } else {
240
- logger.debug({ url: current.url, linkCount: links.length }, 'Links extracted from page');
254
+ logger.debug(
255
+ { url: current.url, linkCount: links.length },
256
+ 'Links extracted from page'
257
+ );
241
258
  }
242
259
 
243
260
  for (const link of links) {
@@ -278,7 +295,7 @@ export class IntelligentCrawler extends EventEmitter {
278
295
  url: string,
279
296
  extractInstruction: string | undefined,
280
297
  pagesVisited: number,
281
- useHeadless: boolean = false,
298
+ useHeadless: boolean = false
282
299
  ): Promise<CrawlResult> {
283
300
  const pageProgress: CrawlProgress = {
284
301
  type: 'page',
@@ -302,11 +319,14 @@ export class IntelligentCrawler extends EventEmitter {
302
319
  throw new Error(`Failed to convert HTML: ${conversion.error ?? 'Unknown error'}`);
303
320
  }
304
321
 
305
- logger.debug({
306
- url,
307
- title: conversion.title,
308
- markdownLength: conversion.markdown.length,
309
- }, 'Article converted to markdown');
322
+ logger.debug(
323
+ {
324
+ url,
325
+ title: conversion.title,
326
+ markdownLength: conversion.markdown.length,
327
+ },
328
+ 'Article converted to markdown'
329
+ );
310
330
 
311
331
  let extracted: string | undefined;
312
332
 
@@ -335,7 +355,7 @@ export class IntelligentCrawler extends EventEmitter {
335
355
 
336
356
  extracted = await this.claudeClient.extractContent(
337
357
  conversion.markdown,
338
- extractInstruction,
358
+ extractInstruction
339
359
  );
340
360
  } catch (error) {
341
361
  // If extraction fails, just store raw markdown
@@ -371,16 +391,22 @@ export class IntelligentCrawler extends EventEmitter {
371
391
  try {
372
392
  const result = await this.pythonBridge.fetchHeadless(url);
373
393
  const durationMs = Date.now() - startTime;
374
- logger.info({
375
- url,
376
- useHeadless: true,
377
- durationMs,
378
- ...summarizePayload(result.html, 'raw-html', url),
379
- }, 'Raw HTML fetched');
394
+ logger.info(
395
+ {
396
+ url,
397
+ useHeadless: true,
398
+ durationMs,
399
+ ...summarizePayload(result.html, 'raw-html', url),
400
+ },
401
+ 'Raw HTML fetched'
402
+ );
380
403
  return result.html;
381
404
  } catch (error) {
382
405
  // Fallback to axios if headless fails
383
- logger.warn({ url, error: error instanceof Error ? error.message : String(error) }, 'Headless fetch failed, falling back to axios');
406
+ logger.warn(
407
+ { url, error: error instanceof Error ? error.message : String(error) },
408
+ 'Headless fetch failed, falling back to axios'
409
+ );
384
410
  }
385
411
  }
386
412
 
@@ -389,24 +415,29 @@ export class IntelligentCrawler extends EventEmitter {
389
415
  const response = await axios.get<string>(url, {
390
416
  timeout: 30000,
391
417
  headers: {
392
- 'User-Agent':
393
- 'Mozilla/5.0 (compatible; bluera-knowledge-crawler/1.0)',
418
+ 'User-Agent': 'Mozilla/5.0 (compatible; bluera-knowledge-crawler/1.0)',
394
419
  },
395
420
  });
396
421
 
397
422
  const durationMs = Date.now() - startTime;
398
- logger.info({
399
- url,
400
- useHeadless: false,
401
- durationMs,
402
- ...summarizePayload(response.data, 'raw-html', url),
403
- }, 'Raw HTML fetched');
423
+ logger.info(
424
+ {
425
+ url,
426
+ useHeadless: false,
427
+ durationMs,
428
+ ...summarizePayload(response.data, 'raw-html', url),
429
+ },
430
+ 'Raw HTML fetched'
431
+ );
404
432
 
405
433
  return response.data;
406
434
  } catch (error) {
407
- logger.error({ url, error: error instanceof Error ? error.message : String(error) }, 'Failed to fetch HTML');
435
+ logger.error(
436
+ { url, error: error instanceof Error ? error.message : String(error) },
437
+ 'Failed to fetch HTML'
438
+ );
408
439
  throw new Error(
409
- `Failed to fetch ${url}: ${error instanceof Error ? error.message : String(error)}`,
440
+ `Failed to fetch ${url}: ${error instanceof Error ? error.message : String(error)}`
410
441
  );
411
442
  }
412
443
  }
@@ -453,7 +484,9 @@ export class IntelligentCrawler extends EventEmitter {
453
484
  try {
454
485
  const domain1 = new URL(url1).hostname.toLowerCase();
455
486
  const domain2 = new URL(url2).hostname.toLowerCase();
456
- return domain1 === domain2 || domain1.endsWith(`.${domain2}`) || domain2.endsWith(`.${domain1}`);
487
+ return (
488
+ domain1 === domain2 || domain1.endsWith(`.${domain2}`) || domain2.endsWith(`.${domain1}`)
489
+ );
457
490
  } catch {
458
491
  return false;
459
492
  }
@@ -165,10 +165,7 @@ export function cleanupMarkdown(markdown: string): string {
165
165
  result = result.replace(/(#{1,6}\s[^\n]+)\n(#{1,6}\s)/g, '$1\n\n$2');
166
166
 
167
167
  // 3. Lists - ensure all list items have single newlines only
168
- result = result.replace(
169
- /(\* Item 1)\n\n+(\* Item 2)\n\n+(\* Item 3)/g,
170
- '$1\n$2\n$3',
171
- );
168
+ result = result.replace(/(\* Item 1)\n\n+(\* Item 2)\n\n+(\* Item 3)/g, '$1\n$2\n$3');
172
169
 
173
170
  // 3.5. General list item spacing - ensure single newlines between list items
174
171
  result = result.replace(/(^\*\s[^\n]+)\n{2,}(^\*\s)/gm, '$1\n$2');
@@ -1,6 +1,6 @@
1
- import { pipeline, env, type FeatureExtractionPipeline } from '@huggingface/transformers';
2
1
  import { homedir } from 'node:os';
3
2
  import { join } from 'node:path';
3
+ import { pipeline, env, type FeatureExtractionPipeline } from '@huggingface/transformers';
4
4
 
5
5
  // Set cache directory to ~/.cache/huggingface-transformers (outside node_modules)
6
6
  // This allows CI caching and prevents model re-downloads on each npm install
@@ -37,7 +37,7 @@ export class EmbeddingEngine {
37
37
  normalize: true,
38
38
  });
39
39
  const result = Array.from(output.data);
40
- return result.map(v => Number(v));
40
+ return result.map((v) => Number(v));
41
41
  }
42
42
 
43
43
  async embedBatch(texts: string[]): Promise<number[][]> {
@@ -48,15 +48,13 @@ export class EmbeddingEngine {
48
48
  const batch = texts.slice(i, i + BATCH_SIZE);
49
49
 
50
50
  // Process batch in parallel using Promise.all
51
- const batchResults = await Promise.all(
52
- batch.map(text => this.embed(text))
53
- );
51
+ const batchResults = await Promise.all(batch.map((text) => this.embed(text)));
54
52
 
55
53
  results.push(...batchResults);
56
54
 
57
55
  // Small delay between batches to prevent memory issues
58
56
  if (i + BATCH_SIZE < texts.length) {
59
- await new Promise(resolve => setTimeout(resolve, 100));
57
+ await new Promise((resolve) => setTimeout(resolve, 100));
60
58
  }
61
59
  }
62
60
 
@@ -111,7 +111,7 @@ describe('LanceStore', () => {
111
111
  const results = await store.search(storeId, new Array(384).fill(1.0), 10, 0.9);
112
112
 
113
113
  // Should filter out low-similarity results
114
- expect(results.every(r => r.score >= 0.9)).toBe(true);
114
+ expect(results.every((r) => r.score >= 0.9)).toBe(true);
115
115
  });
116
116
 
117
117
  it('sets cosine distance type when threshold is provided', async () => {
@@ -466,14 +466,14 @@ describe('LanceStore', () => {
466
466
  await store.addDocuments(multiDelStoreId, docs);
467
467
  await store.deleteDocuments(multiDelStoreId, [
468
468
  createDocumentId('del-1'),
469
- createDocumentId('del-2')
469
+ createDocumentId('del-2'),
470
470
  ]);
471
471
 
472
472
  const results = await store.search(multiDelStoreId, new Array(384).fill(0.2), 10);
473
- const deletedIds = results.filter(r => r.id === 'del-1' || r.id === 'del-2');
473
+ const deletedIds = results.filter((r) => r.id === 'del-1' || r.id === 'del-2');
474
474
  expect(deletedIds.length).toBe(0);
475
475
 
476
- const kept = results.find(r => r.id === 'keep-3');
476
+ const kept = results.find((r) => r.id === 'keep-3');
477
477
  expect(kept).toBeDefined();
478
478
  });
479
479
  });
package/src/db/lance.ts CHANGED
@@ -1,8 +1,8 @@
1
1
  import * as lancedb from '@lancedb/lancedb';
2
- import type { Table, Connection } from '@lancedb/lancedb';
3
- import type { Document, DocumentMetadata } from '../types/document.js';
4
- import type { StoreId, DocumentId } from '../types/brands.js';
5
2
  import { createDocumentId } from '../types/brands.js';
3
+ import type { StoreId, DocumentId } from '../types/brands.js';
4
+ import type { Document, DocumentMetadata } from '../types/document.js';
5
+ import type { Table, Connection } from '@lancedb/lancedb';
6
6
 
7
7
  interface LanceDocument {
8
8
  id: string;
@@ -29,9 +29,7 @@ export class LanceStore {
29
29
  }
30
30
 
31
31
  async initialize(storeId: StoreId): Promise<void> {
32
- if (this.connection === null) {
33
- this.connection = await lancedb.connect(this.dataDir);
34
- }
32
+ this.connection ??= await lancedb.connect(this.dataDir);
35
33
 
36
34
  const tableName = this.getTableName(storeId);
37
35
  const tableNames = await this.connection.tableNames();
@@ -77,7 +75,9 @@ export class LanceStore {
77
75
  vector: number[],
78
76
  limit: number,
79
77
  threshold?: number
80
- ): Promise<Array<{ id: DocumentId; content: string; score: number; metadata: DocumentMetadata }>> {
78
+ ): Promise<
79
+ Array<{ id: DocumentId; content: string; score: number; metadata: DocumentMetadata }>
80
+ > {
81
81
  const table = await this.getTable(storeId);
82
82
  let query = table.vectorSearch(vector).limit(limit);
83
83
 
@@ -114,15 +114,19 @@ export class LanceStore {
114
114
  storeId: StoreId,
115
115
  query: string,
116
116
  limit: number
117
- ): Promise<Array<{ id: DocumentId; content: string; score: number; metadata: DocumentMetadata }>> {
117
+ ): Promise<
118
+ Array<{ id: DocumentId; content: string; score: number; metadata: DocumentMetadata }>
119
+ > {
118
120
  const table = await this.getTable(storeId);
119
121
 
120
122
  try {
121
123
  // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
122
- const results = await table
123
- .search(query, 'fts')
124
- .limit(limit)
125
- .toArray() as Array<{ id: string; content: string; metadata: string; score: number }>;
124
+ const results = (await table.search(query, 'fts').limit(limit).toArray()) as Array<{
125
+ id: string;
126
+ content: string;
127
+ metadata: string;
128
+ score: number;
129
+ }>;
126
130
 
127
131
  return results.map((r) => ({
128
132
  id: createDocumentId(r.id),
package/src/index.ts CHANGED
@@ -1,17 +1,22 @@
1
1
  #!/usr/bin/env node
2
2
 
3
- import { Command } from 'commander';
4
3
  import { homedir } from 'node:os';
5
4
  import { join } from 'node:path';
6
- import { createProgram, getGlobalOptions } from './cli/program.js';
7
- import { createStoreCommand } from './cli/commands/store.js';
8
- import { createSearchCommand } from './cli/commands/search.js';
5
+ import { Command } from 'commander';
6
+ import { createCrawlCommand } from './cli/commands/crawl.js';
9
7
  import { createIndexCommand } from './cli/commands/index-cmd.js';
8
+ import { createMCPCommand } from './cli/commands/mcp.js';
9
+ import {
10
+ createAddRepoCommand,
11
+ createAddFolderCommand,
12
+ createStoresCommand,
13
+ createSuggestCommand,
14
+ } from './cli/commands/plugin-api.js';
15
+ import { createSearchCommand } from './cli/commands/search.js';
10
16
  import { createServeCommand } from './cli/commands/serve.js';
11
- import { createCrawlCommand } from './cli/commands/crawl.js';
12
17
  import { createSetupCommand } from './cli/commands/setup.js';
13
- import { createMCPCommand } from './cli/commands/mcp.js';
14
- import { createAddRepoCommand, createAddFolderCommand, createStoresCommand, createSuggestCommand } from './cli/commands/plugin-api.js';
18
+ import { createStoreCommand } from './cli/commands/store.js';
19
+ import { createProgram, getGlobalOptions } from './cli/program.js';
15
20
 
16
21
  // Default paths
17
22
  const DEFAULT_DATA_DIR = join(homedir(), '.bluera', 'bluera-knowledge', 'data');
@@ -25,28 +30,30 @@ function formatCommandHelp(cmd: Command, indent: string = ''): string[] {
25
30
  const lines: string[] = [];
26
31
  const name = cmd.name();
27
32
  const desc = cmd.description();
28
- const args = cmd.registeredArguments.map(a => {
29
- const req = a.required;
30
- return req ? `<${a.name()}>` : `[${a.name()}]`;
31
- }).join(' ');
33
+ const args = cmd.registeredArguments
34
+ .map((a) => {
35
+ const req = a.required;
36
+ return req ? `<${a.name()}>` : `[${a.name()}]`;
37
+ })
38
+ .join(' ');
32
39
 
33
40
  // Command header with arguments
34
- lines.push(`${indent}${name}${args ? ' ' + args : ''}`);
41
+ lines.push(`${indent}${name}${args ? ` ${args}` : ''}`);
35
42
  if (desc) {
36
43
  lines.push(`${indent} ${desc}`);
37
44
  }
38
45
 
39
46
  // Options (skip -h, --help which is auto-added)
40
- const options = cmd.options.filter(o => o.flags !== '-h, --help');
47
+ const options = cmd.options.filter((o) => o.flags !== '-h, --help');
41
48
  for (const opt of options) {
42
49
  lines.push(`${indent} ${opt.flags.padEnd(28)} ${opt.description}`);
43
50
  }
44
51
 
45
52
  // Subcommands (recursive)
46
- const subcommands = cmd.commands.filter(c => c.name() !== 'help');
53
+ const subcommands = cmd.commands.filter((c) => c.name() !== 'help');
47
54
  for (const sub of subcommands) {
48
55
  lines.push('');
49
- lines.push(...formatCommandHelp(sub, indent + ' '));
56
+ lines.push(...formatCommandHelp(sub, `${indent} `));
50
57
  }
51
58
 
52
59
  return lines;
@@ -66,7 +73,9 @@ function printFullHelp(program: Command): void {
66
73
 
67
74
  // Global options
68
75
  console.log('\nGlobal options:');
69
- const globalOpts = program.options.filter(o => o.flags !== '-h, --help' && o.flags !== '-V, --version');
76
+ const globalOpts = program.options.filter(
77
+ (o) => o.flags !== '-h, --help' && o.flags !== '-V, --version'
78
+ );
70
79
  for (const opt of globalOpts) {
71
80
  console.log(` ${opt.flags.padEnd(28)} ${opt.description}`);
72
81
  }
@@ -74,7 +83,7 @@ function printFullHelp(program: Command): void {
74
83
  console.log('\nCommands:\n');
75
84
 
76
85
  // All commands except help
77
- const commands = program.commands.filter(c => c.name() !== 'help');
86
+ const commands = program.commands.filter((c) => c.name() !== 'help');
78
87
  for (const cmd of commands) {
79
88
  console.log(formatCommandHelp(cmd).join('\n'));
80
89
  console.log('');