bluera-knowledge 0.9.32 → 0.9.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/.claude/hooks/post-edit-check.sh +5 -3
  2. package/.claude/skills/atomic-commits/SKILL.md +3 -1
  3. package/.husky/pre-commit +3 -2
  4. package/.prettierrc +9 -0
  5. package/.versionrc.json +1 -1
  6. package/CHANGELOG.md +70 -0
  7. package/CLAUDE.md +6 -0
  8. package/README.md +25 -13
  9. package/bun.lock +277 -33
  10. package/dist/{chunk-L2YVNC63.js → chunk-6FHWC36B.js} +9 -1
  11. package/dist/chunk-6FHWC36B.js.map +1 -0
  12. package/dist/{chunk-RST4XGRL.js → chunk-DC7CGSGT.js} +288 -241
  13. package/dist/chunk-DC7CGSGT.js.map +1 -0
  14. package/dist/{chunk-6PBP5DVD.js → chunk-WFNPNAAP.js} +3212 -3054
  15. package/dist/chunk-WFNPNAAP.js.map +1 -0
  16. package/dist/{chunk-WT2DAEO7.js → chunk-Z2KKVH45.js} +548 -482
  17. package/dist/chunk-Z2KKVH45.js.map +1 -0
  18. package/dist/index.js +871 -758
  19. package/dist/index.js.map +1 -1
  20. package/dist/mcp/server.js +3 -3
  21. package/dist/watch.service-BJV3TI3F.js +7 -0
  22. package/dist/workers/background-worker-cli.js +97 -71
  23. package/dist/workers/background-worker-cli.js.map +1 -1
  24. package/eslint.config.js +43 -1
  25. package/package.json +18 -11
  26. package/plugin.json +8 -0
  27. package/python/requirements.txt +1 -1
  28. package/src/analysis/ast-parser.test.ts +12 -11
  29. package/src/analysis/ast-parser.ts +28 -22
  30. package/src/analysis/code-graph.test.ts +52 -62
  31. package/src/analysis/code-graph.ts +9 -13
  32. package/src/analysis/dependency-usage-analyzer.test.ts +91 -271
  33. package/src/analysis/dependency-usage-analyzer.ts +52 -24
  34. package/src/analysis/go-ast-parser.test.ts +22 -22
  35. package/src/analysis/go-ast-parser.ts +18 -25
  36. package/src/analysis/parser-factory.test.ts +9 -9
  37. package/src/analysis/parser-factory.ts +3 -3
  38. package/src/analysis/python-ast-parser.test.ts +27 -27
  39. package/src/analysis/python-ast-parser.ts +2 -2
  40. package/src/analysis/repo-url-resolver.test.ts +82 -82
  41. package/src/analysis/rust-ast-parser.test.ts +19 -19
  42. package/src/analysis/rust-ast-parser.ts +17 -27
  43. package/src/analysis/tree-sitter-parser.test.ts +3 -3
  44. package/src/analysis/tree-sitter-parser.ts +10 -16
  45. package/src/cli/commands/crawl.test.ts +40 -24
  46. package/src/cli/commands/crawl.ts +186 -166
  47. package/src/cli/commands/index-cmd.test.ts +90 -90
  48. package/src/cli/commands/index-cmd.ts +52 -36
  49. package/src/cli/commands/mcp.test.ts +6 -6
  50. package/src/cli/commands/mcp.ts +2 -2
  51. package/src/cli/commands/plugin-api.test.ts +16 -18
  52. package/src/cli/commands/plugin-api.ts +9 -6
  53. package/src/cli/commands/search.test.ts +16 -7
  54. package/src/cli/commands/search.ts +124 -87
  55. package/src/cli/commands/serve.test.ts +67 -25
  56. package/src/cli/commands/serve.ts +18 -3
  57. package/src/cli/commands/setup.test.ts +176 -101
  58. package/src/cli/commands/setup.ts +140 -117
  59. package/src/cli/commands/store.test.ts +82 -53
  60. package/src/cli/commands/store.ts +56 -37
  61. package/src/cli/program.ts +2 -2
  62. package/src/crawl/article-converter.test.ts +4 -1
  63. package/src/crawl/article-converter.ts +46 -31
  64. package/src/crawl/bridge.test.ts +240 -132
  65. package/src/crawl/bridge.ts +87 -30
  66. package/src/crawl/claude-client.test.ts +124 -56
  67. package/src/crawl/claude-client.ts +7 -15
  68. package/src/crawl/intelligent-crawler.test.ts +65 -22
  69. package/src/crawl/intelligent-crawler.ts +86 -53
  70. package/src/crawl/markdown-utils.ts +1 -4
  71. package/src/db/embeddings.ts +4 -6
  72. package/src/db/lance.test.ts +4 -4
  73. package/src/db/lance.ts +16 -12
  74. package/src/index.ts +26 -17
  75. package/src/logging/index.ts +1 -5
  76. package/src/logging/logger.ts +3 -5
  77. package/src/logging/payload.test.ts +1 -1
  78. package/src/logging/payload.ts +3 -5
  79. package/src/mcp/commands/index.ts +2 -2
  80. package/src/mcp/commands/job.commands.ts +12 -18
  81. package/src/mcp/commands/meta.commands.ts +13 -13
  82. package/src/mcp/commands/registry.ts +5 -8
  83. package/src/mcp/commands/store.commands.ts +19 -19
  84. package/src/mcp/handlers/execute.handler.test.ts +10 -10
  85. package/src/mcp/handlers/execute.handler.ts +4 -5
  86. package/src/mcp/handlers/index.ts +10 -14
  87. package/src/mcp/handlers/job.handler.test.ts +10 -10
  88. package/src/mcp/handlers/job.handler.ts +22 -25
  89. package/src/mcp/handlers/search.handler.test.ts +36 -65
  90. package/src/mcp/handlers/search.handler.ts +135 -104
  91. package/src/mcp/handlers/store.handler.test.ts +41 -52
  92. package/src/mcp/handlers/store.handler.ts +108 -88
  93. package/src/mcp/schemas/index.test.ts +73 -68
  94. package/src/mcp/schemas/index.ts +18 -12
  95. package/src/mcp/server.test.ts +1 -1
  96. package/src/mcp/server.ts +59 -46
  97. package/src/plugin/commands.test.ts +230 -95
  98. package/src/plugin/commands.ts +24 -25
  99. package/src/plugin/dependency-analyzer.test.ts +52 -52
  100. package/src/plugin/dependency-analyzer.ts +85 -22
  101. package/src/plugin/git-clone.test.ts +24 -13
  102. package/src/plugin/git-clone.ts +3 -7
  103. package/src/server/app.test.ts +109 -109
  104. package/src/server/app.ts +32 -23
  105. package/src/server/index.test.ts +64 -66
  106. package/src/services/chunking.service.test.ts +32 -32
  107. package/src/services/chunking.service.ts +16 -9
  108. package/src/services/code-graph.service.test.ts +30 -36
  109. package/src/services/code-graph.service.ts +24 -10
  110. package/src/services/code-unit.service.test.ts +55 -11
  111. package/src/services/code-unit.service.ts +85 -11
  112. package/src/services/config.service.test.ts +37 -18
  113. package/src/services/config.service.ts +30 -7
  114. package/src/services/index.service.test.ts +49 -18
  115. package/src/services/index.service.ts +98 -48
  116. package/src/services/index.ts +6 -9
  117. package/src/services/job.service.test.ts +22 -22
  118. package/src/services/job.service.ts +18 -18
  119. package/src/services/project-root.service.test.ts +1 -3
  120. package/src/services/search.service.test.ts +248 -120
  121. package/src/services/search.service.ts +286 -156
  122. package/src/services/services.test.ts +1 -1
  123. package/src/services/snippet.service.test.ts +14 -6
  124. package/src/services/snippet.service.ts +7 -5
  125. package/src/services/store.service.test.ts +68 -29
  126. package/src/services/store.service.ts +41 -12
  127. package/src/services/watch.service.test.ts +34 -14
  128. package/src/services/watch.service.ts +11 -1
  129. package/src/types/brands.test.ts +3 -1
  130. package/src/types/index.ts +2 -13
  131. package/src/types/search.ts +10 -8
  132. package/src/utils/type-guards.test.ts +20 -15
  133. package/src/utils/type-guards.ts +1 -1
  134. package/src/workers/background-worker-cli.ts +28 -30
  135. package/src/workers/background-worker.test.ts +54 -40
  136. package/src/workers/background-worker.ts +76 -60
  137. package/src/workers/pid-file.test.ts +167 -0
  138. package/src/workers/pid-file.ts +82 -0
  139. package/src/workers/spawn-worker.test.ts +22 -10
  140. package/src/workers/spawn-worker.ts +6 -6
  141. package/tests/analysis/ast-parser.test.ts +3 -3
  142. package/tests/analysis/code-graph.test.ts +5 -5
  143. package/tests/fixtures/code-snippets/api/error-handling.ts +4 -15
  144. package/tests/fixtures/code-snippets/api/rest-controller.ts +3 -9
  145. package/tests/fixtures/code-snippets/auth/jwt-auth.ts +5 -21
  146. package/tests/fixtures/code-snippets/auth/oauth-flow.ts +4 -4
  147. package/tests/fixtures/code-snippets/database/repository-pattern.ts +11 -3
  148. package/tests/fixtures/corpus/oss-repos/hono/src/adapter/aws-lambda/handler.ts +2 -2
  149. package/tests/fixtures/corpus/oss-repos/hono/src/adapter/cloudflare-pages/handler.ts +1 -1
  150. package/tests/fixtures/corpus/oss-repos/hono/src/adapter/cloudflare-workers/serve-static.ts +2 -2
  151. package/tests/fixtures/corpus/oss-repos/hono/src/client/client.ts +2 -2
  152. package/tests/fixtures/corpus/oss-repos/hono/src/client/types.ts +22 -20
  153. package/tests/fixtures/corpus/oss-repos/hono/src/context.ts +13 -10
  154. package/tests/fixtures/corpus/oss-repos/hono/src/helper/accepts/accepts.ts +10 -7
  155. package/tests/fixtures/corpus/oss-repos/hono/src/helper/adapter/index.ts +2 -2
  156. package/tests/fixtures/corpus/oss-repos/hono/src/helper/css/index.ts +1 -1
  157. package/tests/fixtures/corpus/oss-repos/hono/src/helper/factory/index.ts +16 -16
  158. package/tests/fixtures/corpus/oss-repos/hono/src/helper/ssg/ssg.ts +2 -2
  159. package/tests/fixtures/corpus/oss-repos/hono/src/hono-base.ts +3 -3
  160. package/tests/fixtures/corpus/oss-repos/hono/src/hono.ts +1 -1
  161. package/tests/fixtures/corpus/oss-repos/hono/src/jsx/dom/css.ts +2 -2
  162. package/tests/fixtures/corpus/oss-repos/hono/src/jsx/dom/intrinsic-element/components.ts +1 -1
  163. package/tests/fixtures/corpus/oss-repos/hono/src/jsx/dom/render.ts +7 -7
  164. package/tests/fixtures/corpus/oss-repos/hono/src/jsx/hooks/index.ts +3 -3
  165. package/tests/fixtures/corpus/oss-repos/hono/src/jsx/intrinsic-element/components.ts +1 -1
  166. package/tests/fixtures/corpus/oss-repos/hono/src/jsx/utils.ts +6 -6
  167. package/tests/fixtures/corpus/oss-repos/hono/src/middleware/jsx-renderer/index.ts +3 -3
  168. package/tests/fixtures/corpus/oss-repos/hono/src/middleware/serve-static/index.ts +1 -1
  169. package/tests/fixtures/corpus/oss-repos/hono/src/preset/quick.ts +1 -1
  170. package/tests/fixtures/corpus/oss-repos/hono/src/preset/tiny.ts +1 -1
  171. package/tests/fixtures/corpus/oss-repos/hono/src/router/pattern-router/router.ts +2 -2
  172. package/tests/fixtures/corpus/oss-repos/hono/src/router/reg-exp-router/node.ts +4 -4
  173. package/tests/fixtures/corpus/oss-repos/hono/src/router/reg-exp-router/router.ts +1 -1
  174. package/tests/fixtures/corpus/oss-repos/hono/src/router/trie-router/node.ts +1 -1
  175. package/tests/fixtures/corpus/oss-repos/hono/src/types.ts +166 -169
  176. package/tests/fixtures/corpus/oss-repos/hono/src/utils/body.ts +8 -8
  177. package/tests/fixtures/corpus/oss-repos/hono/src/utils/color.ts +3 -3
  178. package/tests/fixtures/corpus/oss-repos/hono/src/utils/cookie.ts +2 -2
  179. package/tests/fixtures/corpus/oss-repos/hono/src/utils/encode.ts +2 -2
  180. package/tests/fixtures/corpus/oss-repos/hono/src/utils/types.ts +30 -33
  181. package/tests/fixtures/corpus/oss-repos/hono/src/validator/validator.ts +2 -2
  182. package/tests/fixtures/test-server.ts +3 -2
  183. package/tests/helpers/performance-metrics.ts +8 -25
  184. package/tests/helpers/search-relevance.ts +14 -69
  185. package/tests/integration/cli-consistency.test.ts +6 -5
  186. package/tests/integration/python-bridge.test.ts +13 -3
  187. package/tests/mcp/server.test.ts +1 -1
  188. package/tests/services/code-unit.service.test.ts +48 -0
  189. package/tests/services/job.service.test.ts +124 -0
  190. package/tests/services/search.progressive-context.test.ts +2 -2
  191. package/.claude-plugin/plugin.json +0 -13
  192. package/dist/chunk-6PBP5DVD.js.map +0 -1
  193. package/dist/chunk-L2YVNC63.js.map +0 -1
  194. package/dist/chunk-RST4XGRL.js.map +0 -1
  195. package/dist/chunk-WT2DAEO7.js.map +0 -1
  196. package/dist/watch.service-YAIKKDCF.js +0 -7
  197. package/skills/atomic-commits/SKILL.md +0 -77
  198. /package/dist/{watch.service-YAIKKDCF.js.map → watch.service-BJV3TI3F.js.map} +0 -0
@@ -17,7 +17,7 @@ import {
17
17
  queryNodesByType,
18
18
  extractImportPath,
19
19
  type TreeSitterNode,
20
- type TreeSitterTree
20
+ type TreeSitterTree,
21
21
  } from './tree-sitter-parser.js';
22
22
 
23
23
  describe('tree-sitter-parser', () => {
@@ -252,7 +252,7 @@ describe('tree-sitter-parser', () => {
252
252
  // Create a mock node without a name field
253
253
  const mockNode: Partial<TreeSitterNode> = {
254
254
  children: [],
255
- childForFieldName: () => null
255
+ childForFieldName: () => null,
256
256
  };
257
257
  const sig = getFunctionSignature(mockNode as TreeSitterNode);
258
258
  expect(sig).toBe('');
@@ -288,7 +288,7 @@ describe('tree-sitter-parser', () => {
288
288
  it('returns empty string when argument node is missing', () => {
289
289
  // Create a mock node without an argument field
290
290
  const mockNode: Partial<TreeSitterNode> = {
291
- childForFieldName: () => null
291
+ childForFieldName: () => null,
292
292
  };
293
293
  const path = extractImportPath(mockNode as TreeSitterNode);
294
294
  expect(path).toBe('');
@@ -1,6 +1,6 @@
1
1
  import Parser from 'tree-sitter';
2
- import Rust from 'tree-sitter-rust';
3
2
  import Go from 'tree-sitter-go';
3
+ import Rust from 'tree-sitter-rust';
4
4
 
5
5
  /**
6
6
  * Tree-sitter infrastructure for parsing Rust and Go code.
@@ -105,21 +105,15 @@ export function getNodeText(node: TreeSitterNode): string {
105
105
  /**
106
106
  * Get all children of a specific type
107
107
  */
108
- export function getChildrenOfType(
109
- node: TreeSitterNode,
110
- type: string
111
- ): TreeSitterNode[] {
112
- return node.children.filter(child => child.type === type);
108
+ export function getChildrenOfType(node: TreeSitterNode, type: string): TreeSitterNode[] {
109
+ return node.children.filter((child) => child.type === type);
113
110
  }
114
111
 
115
112
  /**
116
113
  * Get the first child of a specific type
117
114
  */
118
- export function getFirstChildOfType(
119
- node: TreeSitterNode,
120
- type: string
121
- ): TreeSitterNode | null {
122
- return node.children.find(child => child.type === type) ?? null;
115
+ export function getFirstChildOfType(node: TreeSitterNode, type: string): TreeSitterNode | null {
116
+ return node.children.find((child) => child.type === type) ?? null;
123
117
  }
124
118
 
125
119
  /**
@@ -136,14 +130,14 @@ export function getChildByFieldName(
136
130
  * Check if node has a visibility modifier (pub)
137
131
  */
138
132
  export function hasVisibilityModifier(node: TreeSitterNode): boolean {
139
- return node.children.some(child => child.type === 'visibility_modifier');
133
+ return node.children.some((child) => child.type === 'visibility_modifier');
140
134
  }
141
135
 
142
136
  /**
143
137
  * Get visibility modifier text (e.g., "pub", "pub(crate)")
144
138
  */
145
139
  export function getVisibilityModifier(node: TreeSitterNode): string | null {
146
- const visNode = node.children.find(child => child.type === 'visibility_modifier');
140
+ const visNode = node.children.find((child) => child.type === 'visibility_modifier');
147
141
  return visNode !== undefined ? visNode.text : null;
148
142
  }
149
143
 
@@ -152,14 +146,14 @@ export function getVisibilityModifier(node: TreeSitterNode): string | null {
152
146
  */
153
147
  export function isAsyncFunction(node: TreeSitterNode): boolean {
154
148
  // Check for 'async' keyword in function_item or function_signature_item
155
- return node.children.some(child => child.type === 'async' || child.text === 'async');
149
+ return node.children.some((child) => child.type === 'async' || child.text === 'async');
156
150
  }
157
151
 
158
152
  /**
159
153
  * Check if a function is unsafe
160
154
  */
161
155
  export function isUnsafeFunction(node: TreeSitterNode): boolean {
162
- return node.children.some(child => child.type === 'unsafe' || child.text === 'unsafe');
156
+ return node.children.some((child) => child.type === 'unsafe' || child.text === 'unsafe');
163
157
  }
164
158
 
165
159
  /**
@@ -190,7 +184,7 @@ export function getFunctionSignature(node: TreeSitterNode): string {
190
184
 
191
185
  // Add return type
192
186
  if (returnTypeNode !== null) {
193
- signature += ' ' + returnTypeNode.text;
187
+ signature += ` ${returnTypeNode.text}`;
194
188
  }
195
189
 
196
190
  return signature;
@@ -57,7 +57,7 @@ describe('crawl command execution', () => {
57
57
  };
58
58
 
59
59
  vi.mocked(createServices).mockResolvedValue(mockServices);
60
- vi.mocked(IntelligentCrawler).mockImplementation(function(this: any) {
60
+ vi.mocked(IntelligentCrawler).mockImplementation(function (this: any) {
61
61
  return mockCrawler as any;
62
62
  } as any);
63
63
 
@@ -278,7 +278,14 @@ describe('crawl command execution', () => {
278
278
  );
279
279
 
280
280
  const command = createCrawlCommand(getOptions);
281
- command.parseOptions(['--crawl', 'all Getting Started pages', '--extract', 'code examples', '--max-pages', '100']);
281
+ command.parseOptions([
282
+ '--crawl',
283
+ 'all Getting Started pages',
284
+ '--extract',
285
+ 'code examples',
286
+ '--max-pages',
287
+ '100',
288
+ ]);
282
289
  const actionHandler = command._actionHandler;
283
290
 
284
291
  await actionHandler(['https://example.com', 'test-store']);
@@ -337,7 +344,10 @@ describe('crawl command execution', () => {
337
344
 
338
345
  it('throws error when store creation fails', async () => {
339
346
  mockServices.store.getByIdOrName.mockResolvedValue(undefined);
340
- mockServices.store.create.mockResolvedValue({ success: false, error: new Error('Name already exists') });
347
+ mockServices.store.create.mockResolvedValue({
348
+ success: false,
349
+ error: new Error('Name already exists'),
350
+ });
341
351
 
342
352
  const command = createCrawlCommand(getOptions);
343
353
  const actionHandler = command._actionHandler;
@@ -388,9 +398,7 @@ describe('crawl command execution', () => {
388
398
 
389
399
  await actionHandler(['https://example.com', 'new-store']);
390
400
 
391
- expect(consoleLogSpy).toHaveBeenCalledWith(
392
- expect.stringContaining('"storeCreated": true')
393
- );
401
+ expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringContaining('"storeCreated": true'));
394
402
  });
395
403
  });
396
404
 
@@ -439,7 +447,9 @@ describe('crawl command execution', () => {
439
447
  const command = createCrawlCommand(getOptions);
440
448
  const actionHandler = command._actionHandler;
441
449
 
442
- await expect(actionHandler(['https://example.com', 'test-store'])).rejects.toThrow('process.exit: 6');
450
+ await expect(actionHandler(['https://example.com', 'test-store'])).rejects.toThrow(
451
+ 'process.exit: 6'
452
+ );
443
453
 
444
454
  expect(consoleErrorSpy).toHaveBeenCalledWith('Error: Crawl failed: Network timeout');
445
455
  expect(processExitSpy).toHaveBeenCalledWith(6);
@@ -475,7 +485,9 @@ describe('crawl command execution', () => {
475
485
  const command = createCrawlCommand(getOptions);
476
486
  const actionHandler = command._actionHandler;
477
487
 
478
- await expect(actionHandler(['https://example.com', 'test-store'])).rejects.toThrow('process.exit: 6');
488
+ await expect(actionHandler(['https://example.com', 'test-store'])).rejects.toThrow(
489
+ 'process.exit: 6'
490
+ );
479
491
 
480
492
  expect(consoleErrorSpy).toHaveBeenCalledWith(
481
493
  'Error: Crawl failed: Embedding service unavailable'
@@ -514,7 +526,9 @@ describe('crawl command execution', () => {
514
526
  const command = createCrawlCommand(getOptions);
515
527
  const actionHandler = command._actionHandler;
516
528
 
517
- await expect(actionHandler(['https://example.com', 'test-store'])).rejects.toThrow('process.exit: 6');
529
+ await expect(actionHandler(['https://example.com', 'test-store'])).rejects.toThrow(
530
+ 'process.exit: 6'
531
+ );
518
532
 
519
533
  expect(consoleErrorSpy).toHaveBeenCalledWith('Error: Crawl failed: Database write error');
520
534
  expect(processExitSpy).toHaveBeenCalledWith(6);
@@ -592,18 +606,10 @@ describe('crawl command execution', () => {
592
606
 
593
607
  await actionHandler(['https://example.com', 'test-store']);
594
608
 
595
- expect(consoleLogSpy).toHaveBeenCalledWith(
596
- expect.stringContaining('"success": true')
597
- );
598
- expect(consoleLogSpy).toHaveBeenCalledWith(
599
- expect.stringContaining('"store": "test-store"')
600
- );
601
- expect(consoleLogSpy).toHaveBeenCalledWith(
602
- expect.stringContaining('"pagesCrawled": 1')
603
- );
604
- expect(consoleLogSpy).toHaveBeenCalledWith(
605
- expect.stringContaining('"mode": "intelligent"')
606
- );
609
+ expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringContaining('"success": true'));
610
+ expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringContaining('"store": "test-store"'));
611
+ expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringContaining('"pagesCrawled": 1'));
612
+ expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringContaining('"mode": "intelligent"'));
607
613
  expect(consoleLogSpy).toHaveBeenCalledWith(
608
614
  expect.stringContaining('"hadCrawlInstruction": true')
609
615
  );
@@ -950,7 +956,11 @@ describe('crawl command execution', () => {
950
956
  if (progressCallback) {
951
957
  progressCallback({ type: 'strategy', message: 'Planning crawl...' });
952
958
  progressCallback({ type: 'strategy', message: undefined }); // Test fallback
953
- progressCallback({ type: 'page', pagesVisited: 0, currentUrl: 'https://example.com/page1' });
959
+ progressCallback({
960
+ type: 'page',
961
+ pagesVisited: 0,
962
+ currentUrl: 'https://example.com/page1',
963
+ });
954
964
  progressCallback({ type: 'page', pagesVisited: 1, currentUrl: undefined }); // Test fallback
955
965
  progressCallback({ type: 'extraction', currentUrl: 'https://example.com/page1' });
956
966
  progressCallback({ type: 'extraction', currentUrl: undefined }); // Test fallback
@@ -978,7 +988,10 @@ describe('crawl command execution', () => {
978
988
 
979
989
  expect(mockCrawler.on).toHaveBeenCalledWith('progress', expect.any(Function));
980
990
  } finally {
981
- Object.defineProperty(process.stdout, 'isTTY', { value: originalIsTTY, configurable: true });
991
+ Object.defineProperty(process.stdout, 'isTTY', {
992
+ value: originalIsTTY,
993
+ configurable: true,
994
+ });
982
995
  }
983
996
  });
984
997
 
@@ -1022,7 +1035,10 @@ describe('crawl command execution', () => {
1022
1035
  expect(consoleLogSpy).toHaveBeenCalledWith('Crawling https://example.com');
1023
1036
  expect(consoleLogSpy).toHaveBeenCalledWith('Crawled 1 pages, indexed 1 chunks');
1024
1037
  } finally {
1025
- Object.defineProperty(process.stdout, 'isTTY', { value: originalIsTTY, configurable: true });
1038
+ Object.defineProperty(process.stdout, 'isTTY', {
1039
+ value: originalIsTTY,
1040
+ configurable: true,
1041
+ });
1026
1042
  }
1027
1043
  });
1028
1044
  });
@@ -1,198 +1,218 @@
1
- import { Command } from 'commander';
2
1
  import { createHash } from 'node:crypto';
2
+ import { Command } from 'commander';
3
3
  import ora, { type Ora } from 'ora';
4
- import { createServices, destroyServices } from '../../services/index.js';
5
4
  import { IntelligentCrawler, type CrawlProgress } from '../../crawl/intelligent-crawler.js';
5
+ import { ChunkingService } from '../../services/chunking.service.js';
6
+ import { createServices, destroyServices } from '../../services/index.js';
7
+ import { classifyWebContentType } from '../../services/index.service.js';
6
8
  import { createDocumentId } from '../../types/brands.js';
7
- import type { GlobalOptions } from '../program.js';
8
9
  import type { Document } from '../../types/document.js';
9
10
  import type { WebStore } from '../../types/store.js';
10
- import { ChunkingService } from '../../services/chunking.service.js';
11
- import { classifyWebContentType } from '../../services/index.service.js';
11
+ import type { GlobalOptions } from '../program.js';
12
12
 
13
13
  export function createCrawlCommand(getOptions: () => GlobalOptions): Command {
14
14
  return new Command('crawl')
15
15
  .description('Crawl web pages with natural language control and index into store')
16
16
  .argument('<url>', 'URL to crawl')
17
17
  .argument('<store>', 'Target web store to add crawled content to')
18
- .option('--crawl <instruction>', 'Natural language instruction for what to crawl (e.g., "all Getting Started pages")')
19
- .option('--extract <instruction>', 'Natural language instruction for what to extract (e.g., "extract API references")')
18
+ .option(
19
+ '--crawl <instruction>',
20
+ 'Natural language instruction for what to crawl (e.g., "all Getting Started pages")'
21
+ )
22
+ .option(
23
+ '--extract <instruction>',
24
+ 'Natural language instruction for what to extract (e.g., "extract API references")'
25
+ )
20
26
  .option('--simple', 'Use simple BFS mode instead of intelligent crawling')
21
27
  .option('--max-pages <number>', 'Maximum number of pages to crawl', '50')
22
28
  .option('--headless', 'Use headless browser for JavaScript-rendered sites')
23
- .action(async (url: string, storeIdOrName: string, cmdOptions: {
24
- crawl?: string;
25
- extract?: string;
26
- simple?: boolean;
27
- maxPages?: string;
28
- headless?: boolean;
29
- }) => {
30
- const globalOpts = getOptions();
31
- const services = await createServices(globalOpts.config, globalOpts.dataDir);
32
-
33
- // Look up or auto-create web store
34
- let store: WebStore;
35
- let storeCreated = false;
36
- const existingStore = await services.store.getByIdOrName(storeIdOrName);
37
-
38
- if (!existingStore) {
39
- // Auto-create web store
40
- const result = await services.store.create({
41
- name: storeIdOrName,
42
- type: 'web',
43
- url,
44
- });
45
- if (!result.success) {
46
- await destroyServices(services);
47
- throw new Error(`Failed to create store: ${result.error.message}`);
48
- }
49
- // Type narrowing: success check above ensures result.data is Store
50
- // We know it's a WebStore because we created it with type: 'web'
51
- const createdStore = result.data;
52
- if (createdStore.type !== 'web') {
53
- throw new Error('Unexpected store type after creation');
29
+ .action(
30
+ async (
31
+ url: string,
32
+ storeIdOrName: string,
33
+ cmdOptions: {
34
+ crawl?: string;
35
+ extract?: string;
36
+ simple?: boolean;
37
+ maxPages?: string;
38
+ headless?: boolean;
54
39
  }
55
- store = createdStore;
56
- storeCreated = true;
57
- if (globalOpts.quiet !== true && globalOpts.format !== 'json') {
58
- console.log(`Created web store: ${store.name}`);
40
+ ) => {
41
+ const globalOpts = getOptions();
42
+ const services = await createServices(globalOpts.config, globalOpts.dataDir);
43
+
44
+ // Look up or auto-create web store
45
+ let store: WebStore;
46
+ let storeCreated = false;
47
+ const existingStore = await services.store.getByIdOrName(storeIdOrName);
48
+
49
+ if (!existingStore) {
50
+ // Auto-create web store
51
+ const result = await services.store.create({
52
+ name: storeIdOrName,
53
+ type: 'web',
54
+ url,
55
+ });
56
+ if (!result.success) {
57
+ await destroyServices(services);
58
+ throw new Error(`Failed to create store: ${result.error.message}`);
59
+ }
60
+ // Type narrowing: success check above ensures result.data is Store
61
+ // We know it's a WebStore because we created it with type: 'web'
62
+ const createdStore = result.data;
63
+ if (createdStore.type !== 'web') {
64
+ throw new Error('Unexpected store type after creation');
65
+ }
66
+ store = createdStore;
67
+ storeCreated = true;
68
+ if (globalOpts.quiet !== true && globalOpts.format !== 'json') {
69
+ console.log(`Created web store: ${store.name}`);
70
+ }
71
+ } else if (existingStore.type !== 'web') {
72
+ await destroyServices(services);
73
+ throw new Error(
74
+ `Store "${storeIdOrName}" exists but is not a web store (type: ${existingStore.type})`
75
+ );
76
+ } else {
77
+ store = existingStore;
59
78
  }
60
- } else if (existingStore.type !== 'web') {
61
- await destroyServices(services);
62
- throw new Error(`Store "${storeIdOrName}" exists but is not a web store (type: ${existingStore.type})`);
63
- } else {
64
- store = existingStore;
65
- }
66
79
 
67
- const maxPages = cmdOptions.maxPages !== undefined ? parseInt(cmdOptions.maxPages) : 50;
80
+ const maxPages = cmdOptions.maxPages !== undefined ? parseInt(cmdOptions.maxPages) : 50;
68
81
 
69
- // Use spinner in interactive mode
70
- const isInteractive = process.stdout.isTTY && globalOpts.quiet !== true && globalOpts.format !== 'json';
71
- let spinner: Ora | undefined;
82
+ // Use spinner in interactive mode
83
+ const isInteractive =
84
+ process.stdout.isTTY && globalOpts.quiet !== true && globalOpts.format !== 'json';
85
+ let spinner: Ora | undefined;
72
86
 
73
- if (isInteractive) {
74
- const mode = cmdOptions.simple === true ? 'simple' : 'intelligent';
75
- spinner = ora(`Crawling ${url} (${mode} mode)`).start();
76
- } else if (globalOpts.quiet !== true && globalOpts.format !== 'json') {
77
- console.log(`Crawling ${url}`);
78
- }
87
+ if (isInteractive) {
88
+ const mode = cmdOptions.simple === true ? 'simple' : 'intelligent';
89
+ spinner = ora(`Crawling ${url} (${mode} mode)`).start();
90
+ } else if (globalOpts.quiet !== true && globalOpts.format !== 'json') {
91
+ console.log(`Crawling ${url}`);
92
+ }
93
+
94
+ const crawler = new IntelligentCrawler();
95
+ // Use web preset for larger prose-friendly chunks
96
+ const webChunker = ChunkingService.forContentType('web');
97
+ let pagesIndexed = 0;
98
+ let chunksCreated = 0;
99
+ let exitCode = 0;
79
100
 
80
- const crawler = new IntelligentCrawler();
81
- // Use web preset for larger prose-friendly chunks
82
- const webChunker = ChunkingService.forContentType('web');
83
- let pagesIndexed = 0;
84
- let chunksCreated = 0;
85
- let exitCode = 0;
86
-
87
- // Listen for progress events
88
- crawler.on('progress', (progress: CrawlProgress) => {
89
- if (spinner) {
90
- if (progress.type === 'strategy') {
91
- spinner.text = progress.message !== undefined ? progress.message : 'Analyzing crawl strategy...';
92
- } else if (progress.type === 'page') {
93
- const url = progress.currentUrl !== undefined ? progress.currentUrl : 'unknown';
94
- spinner.text = `Crawling ${String(progress.pagesVisited + 1)}/${String(maxPages)} - ${url}`;
95
- } else if (progress.type === 'extraction') {
96
- const url = progress.currentUrl !== undefined ? progress.currentUrl : 'unknown';
97
- spinner.text = `Extracting from ${url}...`;
98
- } else if (progress.type === 'error' && progress.message !== undefined) {
99
- spinner.warn(progress.message);
101
+ // Listen for progress events
102
+ crawler.on('progress', (progress: CrawlProgress) => {
103
+ if (spinner) {
104
+ if (progress.type === 'strategy') {
105
+ spinner.text = progress.message ?? 'Analyzing crawl strategy...';
106
+ } else if (progress.type === 'page') {
107
+ const url = progress.currentUrl ?? 'unknown';
108
+ spinner.text = `Crawling ${String(progress.pagesVisited + 1)}/${String(maxPages)} - ${url}`;
109
+ } else if (progress.type === 'extraction') {
110
+ const url = progress.currentUrl ?? 'unknown';
111
+ spinner.text = `Extracting from ${url}...`;
112
+ } else if (progress.type === 'error' && progress.message !== undefined) {
113
+ spinner.warn(progress.message);
114
+ }
100
115
  }
101
- }
102
- });
103
-
104
- try {
105
- await services.lance.initialize(store.id);
106
- const docs: Document[] = [];
107
-
108
- // Crawl pages using IntelligentCrawler
109
- for await (const result of crawler.crawl(url, {
110
- ...(cmdOptions.crawl !== undefined && { crawlInstruction: cmdOptions.crawl }),
111
- ...(cmdOptions.extract !== undefined && { extractInstruction: cmdOptions.extract }),
112
- maxPages,
113
- ...(cmdOptions.simple !== undefined && { simple: cmdOptions.simple }),
114
- useHeadless: cmdOptions.headless ?? false,
115
- })) {
116
- // Use extracted content if available, otherwise markdown
117
- const contentToProcess = result.extracted !== undefined ? result.extracted : result.markdown;
118
-
119
- // Chunk the content using markdown-aware chunking (web content is converted to markdown)
120
- const chunks = webChunker.chunk(contentToProcess, `${result.url}.md`);
121
- const fileType = classifyWebContentType(result.url, result.title);
122
- const urlHash = createHash('md5').update(result.url).digest('hex');
123
-
124
- for (const chunk of chunks) {
125
- const chunkId = chunks.length > 1
126
- ? `${store.id}-${urlHash}-${String(chunk.chunkIndex)}`
127
- : `${store.id}-${urlHash}`;
128
- const vector = await services.embeddings.embed(chunk.content);
129
-
130
- docs.push({
131
- id: createDocumentId(chunkId),
132
- content: chunk.content,
133
- vector,
134
- metadata: {
135
- type: chunks.length > 1 ? 'chunk' : 'web',
136
- storeId: store.id,
137
- url: result.url,
138
- title: result.title,
139
- extracted: result.extracted !== undefined,
140
- depth: result.depth,
141
- indexedAt: new Date(),
142
- fileType,
143
- chunkIndex: chunk.chunkIndex,
144
- totalChunks: chunk.totalChunks,
145
- sectionHeader: chunk.sectionHeader,
146
- },
147
- });
148
- chunksCreated++;
116
+ });
117
+
118
+ try {
119
+ await services.lance.initialize(store.id);
120
+ const docs: Document[] = [];
121
+
122
+ // Crawl pages using IntelligentCrawler
123
+ for await (const result of crawler.crawl(url, {
124
+ ...(cmdOptions.crawl !== undefined && { crawlInstruction: cmdOptions.crawl }),
125
+ ...(cmdOptions.extract !== undefined && { extractInstruction: cmdOptions.extract }),
126
+ maxPages,
127
+ ...(cmdOptions.simple !== undefined && { simple: cmdOptions.simple }),
128
+ useHeadless: cmdOptions.headless ?? false,
129
+ })) {
130
+ // Use extracted content if available, otherwise markdown
131
+ const contentToProcess = result.extracted ?? result.markdown;
132
+
133
+ // Chunk the content using markdown-aware chunking (web content is converted to markdown)
134
+ const chunks = webChunker.chunk(contentToProcess, `${result.url}.md`);
135
+ const fileType = classifyWebContentType(result.url, result.title);
136
+ const urlHash = createHash('md5').update(result.url).digest('hex');
137
+
138
+ for (const chunk of chunks) {
139
+ const chunkId =
140
+ chunks.length > 1
141
+ ? `${store.id}-${urlHash}-${String(chunk.chunkIndex)}`
142
+ : `${store.id}-${urlHash}`;
143
+ const vector = await services.embeddings.embed(chunk.content);
144
+
145
+ docs.push({
146
+ id: createDocumentId(chunkId),
147
+ content: chunk.content,
148
+ vector,
149
+ metadata: {
150
+ type: chunks.length > 1 ? 'chunk' : 'web',
151
+ storeId: store.id,
152
+ url: result.url,
153
+ title: result.title,
154
+ extracted: result.extracted !== undefined,
155
+ depth: result.depth,
156
+ indexedAt: new Date(),
157
+ fileType,
158
+ chunkIndex: chunk.chunkIndex,
159
+ totalChunks: chunk.totalChunks,
160
+ sectionHeader: chunk.sectionHeader,
161
+ },
162
+ });
163
+ chunksCreated++;
164
+ }
165
+
166
+ pagesIndexed++;
149
167
  }
150
168
 
151
- pagesIndexed++;
152
- }
169
+ // Index all documents
170
+ if (docs.length > 0) {
171
+ if (spinner) {
172
+ spinner.text = 'Indexing documents...';
173
+ }
174
+ await services.lance.addDocuments(store.id, docs);
175
+ }
153
176
 
154
- // Index all documents
155
- if (docs.length > 0) {
177
+ const crawlResult = {
178
+ success: true,
179
+ store: store.name,
180
+ storeCreated,
181
+ url,
182
+ pagesCrawled: pagesIndexed,
183
+ chunksCreated,
184
+ mode: cmdOptions.simple === true ? 'simple' : 'intelligent',
185
+ hadCrawlInstruction: cmdOptions.crawl !== undefined,
186
+ hadExtractInstruction: cmdOptions.extract !== undefined,
187
+ };
188
+
189
+ if (globalOpts.format === 'json') {
190
+ console.log(JSON.stringify(crawlResult, null, 2));
191
+ } else if (spinner !== undefined) {
192
+ spinner.succeed(
193
+ `Crawled ${String(pagesIndexed)} pages, indexed ${String(chunksCreated)} chunks`
194
+ );
195
+ } else if (globalOpts.quiet !== true) {
196
+ console.log(
197
+ `Crawled ${String(pagesIndexed)} pages, indexed ${String(chunksCreated)} chunks`
198
+ );
199
+ }
200
+ } catch (error) {
201
+ const message = `Crawl failed: ${error instanceof Error ? error.message : String(error)}`;
156
202
  if (spinner) {
157
- spinner.text = 'Indexing documents...';
203
+ spinner.fail(message);
204
+ } else {
205
+ console.error(`Error: ${message}`);
158
206
  }
159
- await services.lance.addDocuments(store.id, docs);
207
+ exitCode = 6;
208
+ } finally {
209
+ await crawler.stop();
210
+ await destroyServices(services);
160
211
  }
161
212
 
162
- const crawlResult = {
163
- success: true,
164
- store: store.name,
165
- storeCreated,
166
- url,
167
- pagesCrawled: pagesIndexed,
168
- chunksCreated,
169
- mode: cmdOptions.simple === true ? 'simple' : 'intelligent',
170
- hadCrawlInstruction: cmdOptions.crawl !== undefined,
171
- hadExtractInstruction: cmdOptions.extract !== undefined,
172
- };
173
-
174
- if (globalOpts.format === 'json') {
175
- console.log(JSON.stringify(crawlResult, null, 2));
176
- } else if (spinner !== undefined) {
177
- spinner.succeed(`Crawled ${String(pagesIndexed)} pages, indexed ${String(chunksCreated)} chunks`);
178
- } else if (globalOpts.quiet !== true) {
179
- console.log(`Crawled ${String(pagesIndexed)} pages, indexed ${String(chunksCreated)} chunks`);
213
+ if (exitCode !== 0) {
214
+ process.exit(exitCode);
180
215
  }
181
- } catch (error) {
182
- const message = `Crawl failed: ${error instanceof Error ? error.message : String(error)}`;
183
- if (spinner) {
184
- spinner.fail(message);
185
- } else {
186
- console.error(`Error: ${message}`);
187
- }
188
- exitCode = 6;
189
- } finally {
190
- await crawler.stop();
191
- await destroyServices(services);
192
- }
193
-
194
- if (exitCode !== 0) {
195
- process.exit(exitCode);
196
216
  }
197
- });
217
+ );
198
218
  }