bluera-knowledge 0.9.32 → 0.9.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/hooks/post-edit-check.sh +5 -3
- package/.claude/skills/atomic-commits/SKILL.md +3 -1
- package/.husky/pre-commit +3 -2
- package/.prettierrc +9 -0
- package/.versionrc.json +1 -1
- package/CHANGELOG.md +33 -0
- package/CLAUDE.md +6 -0
- package/README.md +25 -13
- package/bun.lock +277 -33
- package/dist/{chunk-L2YVNC63.js → chunk-6FHWC36B.js} +9 -1
- package/dist/chunk-6FHWC36B.js.map +1 -0
- package/dist/{chunk-RST4XGRL.js → chunk-DC7CGSGT.js} +288 -241
- package/dist/chunk-DC7CGSGT.js.map +1 -0
- package/dist/{chunk-6PBP5DVD.js → chunk-WFNPNAAP.js} +3212 -3054
- package/dist/chunk-WFNPNAAP.js.map +1 -0
- package/dist/{chunk-WT2DAEO7.js → chunk-Z2KKVH45.js} +548 -482
- package/dist/chunk-Z2KKVH45.js.map +1 -0
- package/dist/index.js +871 -758
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +3 -3
- package/dist/watch.service-BJV3TI3F.js +7 -0
- package/dist/workers/background-worker-cli.js +46 -45
- package/dist/workers/background-worker-cli.js.map +1 -1
- package/eslint.config.js +43 -1
- package/package.json +18 -11
- package/plugin.json +8 -0
- package/python/requirements.txt +1 -1
- package/src/analysis/ast-parser.test.ts +12 -11
- package/src/analysis/ast-parser.ts +28 -22
- package/src/analysis/code-graph.test.ts +52 -62
- package/src/analysis/code-graph.ts +9 -13
- package/src/analysis/dependency-usage-analyzer.test.ts +91 -271
- package/src/analysis/dependency-usage-analyzer.ts +52 -24
- package/src/analysis/go-ast-parser.test.ts +22 -22
- package/src/analysis/go-ast-parser.ts +18 -25
- package/src/analysis/parser-factory.test.ts +9 -9
- package/src/analysis/parser-factory.ts +3 -3
- package/src/analysis/python-ast-parser.test.ts +27 -27
- package/src/analysis/python-ast-parser.ts +2 -2
- package/src/analysis/repo-url-resolver.test.ts +82 -82
- package/src/analysis/rust-ast-parser.test.ts +19 -19
- package/src/analysis/rust-ast-parser.ts +17 -27
- package/src/analysis/tree-sitter-parser.test.ts +3 -3
- package/src/analysis/tree-sitter-parser.ts +10 -16
- package/src/cli/commands/crawl.test.ts +40 -24
- package/src/cli/commands/crawl.ts +186 -166
- package/src/cli/commands/index-cmd.test.ts +90 -90
- package/src/cli/commands/index-cmd.ts +52 -36
- package/src/cli/commands/mcp.test.ts +6 -6
- package/src/cli/commands/mcp.ts +2 -2
- package/src/cli/commands/plugin-api.test.ts +16 -18
- package/src/cli/commands/plugin-api.ts +9 -6
- package/src/cli/commands/search.test.ts +16 -7
- package/src/cli/commands/search.ts +124 -87
- package/src/cli/commands/serve.test.ts +67 -25
- package/src/cli/commands/serve.ts +18 -3
- package/src/cli/commands/setup.test.ts +176 -101
- package/src/cli/commands/setup.ts +140 -117
- package/src/cli/commands/store.test.ts +82 -53
- package/src/cli/commands/store.ts +56 -37
- package/src/cli/program.ts +2 -2
- package/src/crawl/article-converter.test.ts +4 -1
- package/src/crawl/article-converter.ts +46 -31
- package/src/crawl/bridge.test.ts +240 -132
- package/src/crawl/bridge.ts +87 -30
- package/src/crawl/claude-client.test.ts +124 -56
- package/src/crawl/claude-client.ts +7 -15
- package/src/crawl/intelligent-crawler.test.ts +65 -22
- package/src/crawl/intelligent-crawler.ts +86 -53
- package/src/crawl/markdown-utils.ts +1 -4
- package/src/db/embeddings.ts +4 -6
- package/src/db/lance.test.ts +4 -4
- package/src/db/lance.ts +16 -12
- package/src/index.ts +26 -17
- package/src/logging/index.ts +1 -5
- package/src/logging/logger.ts +3 -5
- package/src/logging/payload.test.ts +1 -1
- package/src/logging/payload.ts +3 -5
- package/src/mcp/commands/index.ts +2 -2
- package/src/mcp/commands/job.commands.ts +12 -18
- package/src/mcp/commands/meta.commands.ts +13 -13
- package/src/mcp/commands/registry.ts +5 -8
- package/src/mcp/commands/store.commands.ts +19 -19
- package/src/mcp/handlers/execute.handler.test.ts +10 -10
- package/src/mcp/handlers/execute.handler.ts +4 -5
- package/src/mcp/handlers/index.ts +10 -14
- package/src/mcp/handlers/job.handler.test.ts +10 -10
- package/src/mcp/handlers/job.handler.ts +22 -25
- package/src/mcp/handlers/search.handler.test.ts +36 -65
- package/src/mcp/handlers/search.handler.ts +135 -104
- package/src/mcp/handlers/store.handler.test.ts +41 -52
- package/src/mcp/handlers/store.handler.ts +108 -88
- package/src/mcp/schemas/index.test.ts +73 -68
- package/src/mcp/schemas/index.ts +18 -12
- package/src/mcp/server.test.ts +1 -1
- package/src/mcp/server.ts +59 -46
- package/src/plugin/commands.test.ts +230 -95
- package/src/plugin/commands.ts +24 -25
- package/src/plugin/dependency-analyzer.test.ts +52 -52
- package/src/plugin/dependency-analyzer.ts +85 -22
- package/src/plugin/git-clone.test.ts +24 -13
- package/src/plugin/git-clone.ts +3 -7
- package/src/server/app.test.ts +109 -109
- package/src/server/app.ts +32 -23
- package/src/server/index.test.ts +64 -66
- package/src/services/chunking.service.test.ts +32 -32
- package/src/services/chunking.service.ts +16 -9
- package/src/services/code-graph.service.test.ts +30 -36
- package/src/services/code-graph.service.ts +24 -10
- package/src/services/code-unit.service.test.ts +55 -11
- package/src/services/code-unit.service.ts +85 -11
- package/src/services/config.service.test.ts +37 -18
- package/src/services/config.service.ts +30 -7
- package/src/services/index.service.test.ts +49 -18
- package/src/services/index.service.ts +98 -48
- package/src/services/index.ts +6 -9
- package/src/services/job.service.test.ts +22 -22
- package/src/services/job.service.ts +18 -18
- package/src/services/project-root.service.test.ts +1 -3
- package/src/services/search.service.test.ts +248 -120
- package/src/services/search.service.ts +286 -156
- package/src/services/services.test.ts +1 -1
- package/src/services/snippet.service.test.ts +14 -6
- package/src/services/snippet.service.ts +7 -5
- package/src/services/store.service.test.ts +68 -29
- package/src/services/store.service.ts +41 -12
- package/src/services/watch.service.test.ts +34 -14
- package/src/services/watch.service.ts +11 -1
- package/src/types/brands.test.ts +3 -1
- package/src/types/index.ts +2 -13
- package/src/types/search.ts +10 -8
- package/src/utils/type-guards.test.ts +20 -15
- package/src/utils/type-guards.ts +1 -1
- package/src/workers/background-worker-cli.ts +2 -2
- package/src/workers/background-worker.test.ts +54 -40
- package/src/workers/background-worker.ts +76 -60
- package/src/workers/spawn-worker.test.ts +22 -10
- package/src/workers/spawn-worker.ts +6 -6
- package/tests/analysis/ast-parser.test.ts +3 -3
- package/tests/analysis/code-graph.test.ts +5 -5
- package/tests/fixtures/code-snippets/api/error-handling.ts +4 -15
- package/tests/fixtures/code-snippets/api/rest-controller.ts +3 -9
- package/tests/fixtures/code-snippets/auth/jwt-auth.ts +5 -21
- package/tests/fixtures/code-snippets/auth/oauth-flow.ts +4 -4
- package/tests/fixtures/code-snippets/database/repository-pattern.ts +11 -3
- package/tests/fixtures/corpus/oss-repos/hono/src/adapter/aws-lambda/handler.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/adapter/cloudflare-pages/handler.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/adapter/cloudflare-workers/serve-static.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/client/client.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/client/types.ts +22 -20
- package/tests/fixtures/corpus/oss-repos/hono/src/context.ts +13 -10
- package/tests/fixtures/corpus/oss-repos/hono/src/helper/accepts/accepts.ts +10 -7
- package/tests/fixtures/corpus/oss-repos/hono/src/helper/adapter/index.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/helper/css/index.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/helper/factory/index.ts +16 -16
- package/tests/fixtures/corpus/oss-repos/hono/src/helper/ssg/ssg.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/hono-base.ts +3 -3
- package/tests/fixtures/corpus/oss-repos/hono/src/hono.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/jsx/dom/css.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/jsx/dom/intrinsic-element/components.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/jsx/dom/render.ts +7 -7
- package/tests/fixtures/corpus/oss-repos/hono/src/jsx/hooks/index.ts +3 -3
- package/tests/fixtures/corpus/oss-repos/hono/src/jsx/intrinsic-element/components.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/jsx/utils.ts +6 -6
- package/tests/fixtures/corpus/oss-repos/hono/src/middleware/jsx-renderer/index.ts +3 -3
- package/tests/fixtures/corpus/oss-repos/hono/src/middleware/serve-static/index.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/preset/quick.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/preset/tiny.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/router/pattern-router/router.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/router/reg-exp-router/node.ts +4 -4
- package/tests/fixtures/corpus/oss-repos/hono/src/router/reg-exp-router/router.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/router/trie-router/node.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/types.ts +166 -169
- package/tests/fixtures/corpus/oss-repos/hono/src/utils/body.ts +8 -8
- package/tests/fixtures/corpus/oss-repos/hono/src/utils/color.ts +3 -3
- package/tests/fixtures/corpus/oss-repos/hono/src/utils/cookie.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/utils/encode.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/utils/types.ts +30 -33
- package/tests/fixtures/corpus/oss-repos/hono/src/validator/validator.ts +2 -2
- package/tests/fixtures/test-server.ts +3 -2
- package/tests/helpers/performance-metrics.ts +8 -25
- package/tests/helpers/search-relevance.ts +14 -69
- package/tests/integration/cli-consistency.test.ts +5 -4
- package/tests/integration/python-bridge.test.ts +13 -3
- package/tests/mcp/server.test.ts +1 -1
- package/tests/services/code-unit.service.test.ts +48 -0
- package/tests/services/job.service.test.ts +124 -0
- package/tests/services/search.progressive-context.test.ts +2 -2
- package/.claude-plugin/plugin.json +0 -13
- package/dist/chunk-6PBP5DVD.js.map +0 -1
- package/dist/chunk-L2YVNC63.js.map +0 -1
- package/dist/chunk-RST4XGRL.js.map +0 -1
- package/dist/chunk-WT2DAEO7.js.map +0 -1
- package/dist/watch.service-YAIKKDCF.js +0 -7
- package/skills/atomic-commits/SKILL.md +0 -77
- /package/dist/{watch.service-YAIKKDCF.js.map → watch.service-BJV3TI3F.js.map} +0 -0
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
queryNodesByType,
|
|
18
18
|
extractImportPath,
|
|
19
19
|
type TreeSitterNode,
|
|
20
|
-
type TreeSitterTree
|
|
20
|
+
type TreeSitterTree,
|
|
21
21
|
} from './tree-sitter-parser.js';
|
|
22
22
|
|
|
23
23
|
describe('tree-sitter-parser', () => {
|
|
@@ -252,7 +252,7 @@ describe('tree-sitter-parser', () => {
|
|
|
252
252
|
// Create a mock node without a name field
|
|
253
253
|
const mockNode: Partial<TreeSitterNode> = {
|
|
254
254
|
children: [],
|
|
255
|
-
childForFieldName: () => null
|
|
255
|
+
childForFieldName: () => null,
|
|
256
256
|
};
|
|
257
257
|
const sig = getFunctionSignature(mockNode as TreeSitterNode);
|
|
258
258
|
expect(sig).toBe('');
|
|
@@ -288,7 +288,7 @@ describe('tree-sitter-parser', () => {
|
|
|
288
288
|
it('returns empty string when argument node is missing', () => {
|
|
289
289
|
// Create a mock node without an argument field
|
|
290
290
|
const mockNode: Partial<TreeSitterNode> = {
|
|
291
|
-
childForFieldName: () => null
|
|
291
|
+
childForFieldName: () => null,
|
|
292
292
|
};
|
|
293
293
|
const path = extractImportPath(mockNode as TreeSitterNode);
|
|
294
294
|
expect(path).toBe('');
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import Parser from 'tree-sitter';
|
|
2
|
-
import Rust from 'tree-sitter-rust';
|
|
3
2
|
import Go from 'tree-sitter-go';
|
|
3
|
+
import Rust from 'tree-sitter-rust';
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* Tree-sitter infrastructure for parsing Rust and Go code.
|
|
@@ -105,21 +105,15 @@ export function getNodeText(node: TreeSitterNode): string {
|
|
|
105
105
|
/**
|
|
106
106
|
* Get all children of a specific type
|
|
107
107
|
*/
|
|
108
|
-
export function getChildrenOfType(
|
|
109
|
-
node
|
|
110
|
-
type: string
|
|
111
|
-
): TreeSitterNode[] {
|
|
112
|
-
return node.children.filter(child => child.type === type);
|
|
108
|
+
export function getChildrenOfType(node: TreeSitterNode, type: string): TreeSitterNode[] {
|
|
109
|
+
return node.children.filter((child) => child.type === type);
|
|
113
110
|
}
|
|
114
111
|
|
|
115
112
|
/**
|
|
116
113
|
* Get the first child of a specific type
|
|
117
114
|
*/
|
|
118
|
-
export function getFirstChildOfType(
|
|
119
|
-
node
|
|
120
|
-
type: string
|
|
121
|
-
): TreeSitterNode | null {
|
|
122
|
-
return node.children.find(child => child.type === type) ?? null;
|
|
115
|
+
export function getFirstChildOfType(node: TreeSitterNode, type: string): TreeSitterNode | null {
|
|
116
|
+
return node.children.find((child) => child.type === type) ?? null;
|
|
123
117
|
}
|
|
124
118
|
|
|
125
119
|
/**
|
|
@@ -136,14 +130,14 @@ export function getChildByFieldName(
|
|
|
136
130
|
* Check if node has a visibility modifier (pub)
|
|
137
131
|
*/
|
|
138
132
|
export function hasVisibilityModifier(node: TreeSitterNode): boolean {
|
|
139
|
-
return node.children.some(child => child.type === 'visibility_modifier');
|
|
133
|
+
return node.children.some((child) => child.type === 'visibility_modifier');
|
|
140
134
|
}
|
|
141
135
|
|
|
142
136
|
/**
|
|
143
137
|
* Get visibility modifier text (e.g., "pub", "pub(crate)")
|
|
144
138
|
*/
|
|
145
139
|
export function getVisibilityModifier(node: TreeSitterNode): string | null {
|
|
146
|
-
const visNode = node.children.find(child => child.type === 'visibility_modifier');
|
|
140
|
+
const visNode = node.children.find((child) => child.type === 'visibility_modifier');
|
|
147
141
|
return visNode !== undefined ? visNode.text : null;
|
|
148
142
|
}
|
|
149
143
|
|
|
@@ -152,14 +146,14 @@ export function getVisibilityModifier(node: TreeSitterNode): string | null {
|
|
|
152
146
|
*/
|
|
153
147
|
export function isAsyncFunction(node: TreeSitterNode): boolean {
|
|
154
148
|
// Check for 'async' keyword in function_item or function_signature_item
|
|
155
|
-
return node.children.some(child => child.type === 'async' || child.text === 'async');
|
|
149
|
+
return node.children.some((child) => child.type === 'async' || child.text === 'async');
|
|
156
150
|
}
|
|
157
151
|
|
|
158
152
|
/**
|
|
159
153
|
* Check if a function is unsafe
|
|
160
154
|
*/
|
|
161
155
|
export function isUnsafeFunction(node: TreeSitterNode): boolean {
|
|
162
|
-
return node.children.some(child => child.type === 'unsafe' || child.text === 'unsafe');
|
|
156
|
+
return node.children.some((child) => child.type === 'unsafe' || child.text === 'unsafe');
|
|
163
157
|
}
|
|
164
158
|
|
|
165
159
|
/**
|
|
@@ -190,7 +184,7 @@ export function getFunctionSignature(node: TreeSitterNode): string {
|
|
|
190
184
|
|
|
191
185
|
// Add return type
|
|
192
186
|
if (returnTypeNode !== null) {
|
|
193
|
-
signature +=
|
|
187
|
+
signature += ` ${returnTypeNode.text}`;
|
|
194
188
|
}
|
|
195
189
|
|
|
196
190
|
return signature;
|
|
@@ -57,7 +57,7 @@ describe('crawl command execution', () => {
|
|
|
57
57
|
};
|
|
58
58
|
|
|
59
59
|
vi.mocked(createServices).mockResolvedValue(mockServices);
|
|
60
|
-
vi.mocked(IntelligentCrawler).mockImplementation(function(this: any) {
|
|
60
|
+
vi.mocked(IntelligentCrawler).mockImplementation(function (this: any) {
|
|
61
61
|
return mockCrawler as any;
|
|
62
62
|
} as any);
|
|
63
63
|
|
|
@@ -278,7 +278,14 @@ describe('crawl command execution', () => {
|
|
|
278
278
|
);
|
|
279
279
|
|
|
280
280
|
const command = createCrawlCommand(getOptions);
|
|
281
|
-
command.parseOptions([
|
|
281
|
+
command.parseOptions([
|
|
282
|
+
'--crawl',
|
|
283
|
+
'all Getting Started pages',
|
|
284
|
+
'--extract',
|
|
285
|
+
'code examples',
|
|
286
|
+
'--max-pages',
|
|
287
|
+
'100',
|
|
288
|
+
]);
|
|
282
289
|
const actionHandler = command._actionHandler;
|
|
283
290
|
|
|
284
291
|
await actionHandler(['https://example.com', 'test-store']);
|
|
@@ -337,7 +344,10 @@ describe('crawl command execution', () => {
|
|
|
337
344
|
|
|
338
345
|
it('throws error when store creation fails', async () => {
|
|
339
346
|
mockServices.store.getByIdOrName.mockResolvedValue(undefined);
|
|
340
|
-
mockServices.store.create.mockResolvedValue({
|
|
347
|
+
mockServices.store.create.mockResolvedValue({
|
|
348
|
+
success: false,
|
|
349
|
+
error: new Error('Name already exists'),
|
|
350
|
+
});
|
|
341
351
|
|
|
342
352
|
const command = createCrawlCommand(getOptions);
|
|
343
353
|
const actionHandler = command._actionHandler;
|
|
@@ -388,9 +398,7 @@ describe('crawl command execution', () => {
|
|
|
388
398
|
|
|
389
399
|
await actionHandler(['https://example.com', 'new-store']);
|
|
390
400
|
|
|
391
|
-
expect(consoleLogSpy).toHaveBeenCalledWith(
|
|
392
|
-
expect.stringContaining('"storeCreated": true')
|
|
393
|
-
);
|
|
401
|
+
expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringContaining('"storeCreated": true'));
|
|
394
402
|
});
|
|
395
403
|
});
|
|
396
404
|
|
|
@@ -439,7 +447,9 @@ describe('crawl command execution', () => {
|
|
|
439
447
|
const command = createCrawlCommand(getOptions);
|
|
440
448
|
const actionHandler = command._actionHandler;
|
|
441
449
|
|
|
442
|
-
await expect(actionHandler(['https://example.com', 'test-store'])).rejects.toThrow(
|
|
450
|
+
await expect(actionHandler(['https://example.com', 'test-store'])).rejects.toThrow(
|
|
451
|
+
'process.exit: 6'
|
|
452
|
+
);
|
|
443
453
|
|
|
444
454
|
expect(consoleErrorSpy).toHaveBeenCalledWith('Error: Crawl failed: Network timeout');
|
|
445
455
|
expect(processExitSpy).toHaveBeenCalledWith(6);
|
|
@@ -475,7 +485,9 @@ describe('crawl command execution', () => {
|
|
|
475
485
|
const command = createCrawlCommand(getOptions);
|
|
476
486
|
const actionHandler = command._actionHandler;
|
|
477
487
|
|
|
478
|
-
await expect(actionHandler(['https://example.com', 'test-store'])).rejects.toThrow(
|
|
488
|
+
await expect(actionHandler(['https://example.com', 'test-store'])).rejects.toThrow(
|
|
489
|
+
'process.exit: 6'
|
|
490
|
+
);
|
|
479
491
|
|
|
480
492
|
expect(consoleErrorSpy).toHaveBeenCalledWith(
|
|
481
493
|
'Error: Crawl failed: Embedding service unavailable'
|
|
@@ -514,7 +526,9 @@ describe('crawl command execution', () => {
|
|
|
514
526
|
const command = createCrawlCommand(getOptions);
|
|
515
527
|
const actionHandler = command._actionHandler;
|
|
516
528
|
|
|
517
|
-
await expect(actionHandler(['https://example.com', 'test-store'])).rejects.toThrow(
|
|
529
|
+
await expect(actionHandler(['https://example.com', 'test-store'])).rejects.toThrow(
|
|
530
|
+
'process.exit: 6'
|
|
531
|
+
);
|
|
518
532
|
|
|
519
533
|
expect(consoleErrorSpy).toHaveBeenCalledWith('Error: Crawl failed: Database write error');
|
|
520
534
|
expect(processExitSpy).toHaveBeenCalledWith(6);
|
|
@@ -592,18 +606,10 @@ describe('crawl command execution', () => {
|
|
|
592
606
|
|
|
593
607
|
await actionHandler(['https://example.com', 'test-store']);
|
|
594
608
|
|
|
595
|
-
expect(consoleLogSpy).toHaveBeenCalledWith(
|
|
596
|
-
|
|
597
|
-
);
|
|
598
|
-
expect(consoleLogSpy).toHaveBeenCalledWith(
|
|
599
|
-
expect.stringContaining('"store": "test-store"')
|
|
600
|
-
);
|
|
601
|
-
expect(consoleLogSpy).toHaveBeenCalledWith(
|
|
602
|
-
expect.stringContaining('"pagesCrawled": 1')
|
|
603
|
-
);
|
|
604
|
-
expect(consoleLogSpy).toHaveBeenCalledWith(
|
|
605
|
-
expect.stringContaining('"mode": "intelligent"')
|
|
606
|
-
);
|
|
609
|
+
expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringContaining('"success": true'));
|
|
610
|
+
expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringContaining('"store": "test-store"'));
|
|
611
|
+
expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringContaining('"pagesCrawled": 1'));
|
|
612
|
+
expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringContaining('"mode": "intelligent"'));
|
|
607
613
|
expect(consoleLogSpy).toHaveBeenCalledWith(
|
|
608
614
|
expect.stringContaining('"hadCrawlInstruction": true')
|
|
609
615
|
);
|
|
@@ -950,7 +956,11 @@ describe('crawl command execution', () => {
|
|
|
950
956
|
if (progressCallback) {
|
|
951
957
|
progressCallback({ type: 'strategy', message: 'Planning crawl...' });
|
|
952
958
|
progressCallback({ type: 'strategy', message: undefined }); // Test fallback
|
|
953
|
-
progressCallback({
|
|
959
|
+
progressCallback({
|
|
960
|
+
type: 'page',
|
|
961
|
+
pagesVisited: 0,
|
|
962
|
+
currentUrl: 'https://example.com/page1',
|
|
963
|
+
});
|
|
954
964
|
progressCallback({ type: 'page', pagesVisited: 1, currentUrl: undefined }); // Test fallback
|
|
955
965
|
progressCallback({ type: 'extraction', currentUrl: 'https://example.com/page1' });
|
|
956
966
|
progressCallback({ type: 'extraction', currentUrl: undefined }); // Test fallback
|
|
@@ -978,7 +988,10 @@ describe('crawl command execution', () => {
|
|
|
978
988
|
|
|
979
989
|
expect(mockCrawler.on).toHaveBeenCalledWith('progress', expect.any(Function));
|
|
980
990
|
} finally {
|
|
981
|
-
Object.defineProperty(process.stdout, 'isTTY', {
|
|
991
|
+
Object.defineProperty(process.stdout, 'isTTY', {
|
|
992
|
+
value: originalIsTTY,
|
|
993
|
+
configurable: true,
|
|
994
|
+
});
|
|
982
995
|
}
|
|
983
996
|
});
|
|
984
997
|
|
|
@@ -1022,7 +1035,10 @@ describe('crawl command execution', () => {
|
|
|
1022
1035
|
expect(consoleLogSpy).toHaveBeenCalledWith('Crawling https://example.com');
|
|
1023
1036
|
expect(consoleLogSpy).toHaveBeenCalledWith('Crawled 1 pages, indexed 1 chunks');
|
|
1024
1037
|
} finally {
|
|
1025
|
-
Object.defineProperty(process.stdout, 'isTTY', {
|
|
1038
|
+
Object.defineProperty(process.stdout, 'isTTY', {
|
|
1039
|
+
value: originalIsTTY,
|
|
1040
|
+
configurable: true,
|
|
1041
|
+
});
|
|
1026
1042
|
}
|
|
1027
1043
|
});
|
|
1028
1044
|
});
|
|
@@ -1,198 +1,218 @@
|
|
|
1
|
-
import { Command } from 'commander';
|
|
2
1
|
import { createHash } from 'node:crypto';
|
|
2
|
+
import { Command } from 'commander';
|
|
3
3
|
import ora, { type Ora } from 'ora';
|
|
4
|
-
import { createServices, destroyServices } from '../../services/index.js';
|
|
5
4
|
import { IntelligentCrawler, type CrawlProgress } from '../../crawl/intelligent-crawler.js';
|
|
5
|
+
import { ChunkingService } from '../../services/chunking.service.js';
|
|
6
|
+
import { createServices, destroyServices } from '../../services/index.js';
|
|
7
|
+
import { classifyWebContentType } from '../../services/index.service.js';
|
|
6
8
|
import { createDocumentId } from '../../types/brands.js';
|
|
7
|
-
import type { GlobalOptions } from '../program.js';
|
|
8
9
|
import type { Document } from '../../types/document.js';
|
|
9
10
|
import type { WebStore } from '../../types/store.js';
|
|
10
|
-
import {
|
|
11
|
-
import { classifyWebContentType } from '../../services/index.service.js';
|
|
11
|
+
import type { GlobalOptions } from '../program.js';
|
|
12
12
|
|
|
13
13
|
export function createCrawlCommand(getOptions: () => GlobalOptions): Command {
|
|
14
14
|
return new Command('crawl')
|
|
15
15
|
.description('Crawl web pages with natural language control and index into store')
|
|
16
16
|
.argument('<url>', 'URL to crawl')
|
|
17
17
|
.argument('<store>', 'Target web store to add crawled content to')
|
|
18
|
-
.option(
|
|
19
|
-
|
|
18
|
+
.option(
|
|
19
|
+
'--crawl <instruction>',
|
|
20
|
+
'Natural language instruction for what to crawl (e.g., "all Getting Started pages")'
|
|
21
|
+
)
|
|
22
|
+
.option(
|
|
23
|
+
'--extract <instruction>',
|
|
24
|
+
'Natural language instruction for what to extract (e.g., "extract API references")'
|
|
25
|
+
)
|
|
20
26
|
.option('--simple', 'Use simple BFS mode instead of intelligent crawling')
|
|
21
27
|
.option('--max-pages <number>', 'Maximum number of pages to crawl', '50')
|
|
22
28
|
.option('--headless', 'Use headless browser for JavaScript-rendered sites')
|
|
23
|
-
.action(
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
// Look up or auto-create web store
|
|
34
|
-
let store: WebStore;
|
|
35
|
-
let storeCreated = false;
|
|
36
|
-
const existingStore = await services.store.getByIdOrName(storeIdOrName);
|
|
37
|
-
|
|
38
|
-
if (!existingStore) {
|
|
39
|
-
// Auto-create web store
|
|
40
|
-
const result = await services.store.create({
|
|
41
|
-
name: storeIdOrName,
|
|
42
|
-
type: 'web',
|
|
43
|
-
url,
|
|
44
|
-
});
|
|
45
|
-
if (!result.success) {
|
|
46
|
-
await destroyServices(services);
|
|
47
|
-
throw new Error(`Failed to create store: ${result.error.message}`);
|
|
48
|
-
}
|
|
49
|
-
// Type narrowing: success check above ensures result.data is Store
|
|
50
|
-
// We know it's a WebStore because we created it with type: 'web'
|
|
51
|
-
const createdStore = result.data;
|
|
52
|
-
if (createdStore.type !== 'web') {
|
|
53
|
-
throw new Error('Unexpected store type after creation');
|
|
29
|
+
.action(
|
|
30
|
+
async (
|
|
31
|
+
url: string,
|
|
32
|
+
storeIdOrName: string,
|
|
33
|
+
cmdOptions: {
|
|
34
|
+
crawl?: string;
|
|
35
|
+
extract?: string;
|
|
36
|
+
simple?: boolean;
|
|
37
|
+
maxPages?: string;
|
|
38
|
+
headless?: boolean;
|
|
54
39
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
40
|
+
) => {
|
|
41
|
+
const globalOpts = getOptions();
|
|
42
|
+
const services = await createServices(globalOpts.config, globalOpts.dataDir);
|
|
43
|
+
|
|
44
|
+
// Look up or auto-create web store
|
|
45
|
+
let store: WebStore;
|
|
46
|
+
let storeCreated = false;
|
|
47
|
+
const existingStore = await services.store.getByIdOrName(storeIdOrName);
|
|
48
|
+
|
|
49
|
+
if (!existingStore) {
|
|
50
|
+
// Auto-create web store
|
|
51
|
+
const result = await services.store.create({
|
|
52
|
+
name: storeIdOrName,
|
|
53
|
+
type: 'web',
|
|
54
|
+
url,
|
|
55
|
+
});
|
|
56
|
+
if (!result.success) {
|
|
57
|
+
await destroyServices(services);
|
|
58
|
+
throw new Error(`Failed to create store: ${result.error.message}`);
|
|
59
|
+
}
|
|
60
|
+
// Type narrowing: success check above ensures result.data is Store
|
|
61
|
+
// We know it's a WebStore because we created it with type: 'web'
|
|
62
|
+
const createdStore = result.data;
|
|
63
|
+
if (createdStore.type !== 'web') {
|
|
64
|
+
throw new Error('Unexpected store type after creation');
|
|
65
|
+
}
|
|
66
|
+
store = createdStore;
|
|
67
|
+
storeCreated = true;
|
|
68
|
+
if (globalOpts.quiet !== true && globalOpts.format !== 'json') {
|
|
69
|
+
console.log(`Created web store: ${store.name}`);
|
|
70
|
+
}
|
|
71
|
+
} else if (existingStore.type !== 'web') {
|
|
72
|
+
await destroyServices(services);
|
|
73
|
+
throw new Error(
|
|
74
|
+
`Store "${storeIdOrName}" exists but is not a web store (type: ${existingStore.type})`
|
|
75
|
+
);
|
|
76
|
+
} else {
|
|
77
|
+
store = existingStore;
|
|
59
78
|
}
|
|
60
|
-
} else if (existingStore.type !== 'web') {
|
|
61
|
-
await destroyServices(services);
|
|
62
|
-
throw new Error(`Store "${storeIdOrName}" exists but is not a web store (type: ${existingStore.type})`);
|
|
63
|
-
} else {
|
|
64
|
-
store = existingStore;
|
|
65
|
-
}
|
|
66
79
|
|
|
67
|
-
|
|
80
|
+
const maxPages = cmdOptions.maxPages !== undefined ? parseInt(cmdOptions.maxPages) : 50;
|
|
68
81
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
82
|
+
// Use spinner in interactive mode
|
|
83
|
+
const isInteractive =
|
|
84
|
+
process.stdout.isTTY && globalOpts.quiet !== true && globalOpts.format !== 'json';
|
|
85
|
+
let spinner: Ora | undefined;
|
|
72
86
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
87
|
+
if (isInteractive) {
|
|
88
|
+
const mode = cmdOptions.simple === true ? 'simple' : 'intelligent';
|
|
89
|
+
spinner = ora(`Crawling ${url} (${mode} mode)`).start();
|
|
90
|
+
} else if (globalOpts.quiet !== true && globalOpts.format !== 'json') {
|
|
91
|
+
console.log(`Crawling ${url}`);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const crawler = new IntelligentCrawler();
|
|
95
|
+
// Use web preset for larger prose-friendly chunks
|
|
96
|
+
const webChunker = ChunkingService.forContentType('web');
|
|
97
|
+
let pagesIndexed = 0;
|
|
98
|
+
let chunksCreated = 0;
|
|
99
|
+
let exitCode = 0;
|
|
79
100
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
spinner.text = `Crawling ${String(progress.pagesVisited + 1)}/${String(maxPages)} - ${url}`;
|
|
95
|
-
} else if (progress.type === 'extraction') {
|
|
96
|
-
const url = progress.currentUrl !== undefined ? progress.currentUrl : 'unknown';
|
|
97
|
-
spinner.text = `Extracting from ${url}...`;
|
|
98
|
-
} else if (progress.type === 'error' && progress.message !== undefined) {
|
|
99
|
-
spinner.warn(progress.message);
|
|
101
|
+
// Listen for progress events
|
|
102
|
+
crawler.on('progress', (progress: CrawlProgress) => {
|
|
103
|
+
if (spinner) {
|
|
104
|
+
if (progress.type === 'strategy') {
|
|
105
|
+
spinner.text = progress.message ?? 'Analyzing crawl strategy...';
|
|
106
|
+
} else if (progress.type === 'page') {
|
|
107
|
+
const url = progress.currentUrl ?? 'unknown';
|
|
108
|
+
spinner.text = `Crawling ${String(progress.pagesVisited + 1)}/${String(maxPages)} - ${url}`;
|
|
109
|
+
} else if (progress.type === 'extraction') {
|
|
110
|
+
const url = progress.currentUrl ?? 'unknown';
|
|
111
|
+
spinner.text = `Extracting from ${url}...`;
|
|
112
|
+
} else if (progress.type === 'error' && progress.message !== undefined) {
|
|
113
|
+
spinner.warn(progress.message);
|
|
114
|
+
}
|
|
100
115
|
}
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
try {
|
|
119
|
+
await services.lance.initialize(store.id);
|
|
120
|
+
const docs: Document[] = [];
|
|
121
|
+
|
|
122
|
+
// Crawl pages using IntelligentCrawler
|
|
123
|
+
for await (const result of crawler.crawl(url, {
|
|
124
|
+
...(cmdOptions.crawl !== undefined && { crawlInstruction: cmdOptions.crawl }),
|
|
125
|
+
...(cmdOptions.extract !== undefined && { extractInstruction: cmdOptions.extract }),
|
|
126
|
+
maxPages,
|
|
127
|
+
...(cmdOptions.simple !== undefined && { simple: cmdOptions.simple }),
|
|
128
|
+
useHeadless: cmdOptions.headless ?? false,
|
|
129
|
+
})) {
|
|
130
|
+
// Use extracted content if available, otherwise markdown
|
|
131
|
+
const contentToProcess = result.extracted ?? result.markdown;
|
|
132
|
+
|
|
133
|
+
// Chunk the content using markdown-aware chunking (web content is converted to markdown)
|
|
134
|
+
const chunks = webChunker.chunk(contentToProcess, `${result.url}.md`);
|
|
135
|
+
const fileType = classifyWebContentType(result.url, result.title);
|
|
136
|
+
const urlHash = createHash('md5').update(result.url).digest('hex');
|
|
137
|
+
|
|
138
|
+
for (const chunk of chunks) {
|
|
139
|
+
const chunkId =
|
|
140
|
+
chunks.length > 1
|
|
141
|
+
? `${store.id}-${urlHash}-${String(chunk.chunkIndex)}`
|
|
142
|
+
: `${store.id}-${urlHash}`;
|
|
143
|
+
const vector = await services.embeddings.embed(chunk.content);
|
|
144
|
+
|
|
145
|
+
docs.push({
|
|
146
|
+
id: createDocumentId(chunkId),
|
|
147
|
+
content: chunk.content,
|
|
148
|
+
vector,
|
|
149
|
+
metadata: {
|
|
150
|
+
type: chunks.length > 1 ? 'chunk' : 'web',
|
|
151
|
+
storeId: store.id,
|
|
152
|
+
url: result.url,
|
|
153
|
+
title: result.title,
|
|
154
|
+
extracted: result.extracted !== undefined,
|
|
155
|
+
depth: result.depth,
|
|
156
|
+
indexedAt: new Date(),
|
|
157
|
+
fileType,
|
|
158
|
+
chunkIndex: chunk.chunkIndex,
|
|
159
|
+
totalChunks: chunk.totalChunks,
|
|
160
|
+
sectionHeader: chunk.sectionHeader,
|
|
161
|
+
},
|
|
162
|
+
});
|
|
163
|
+
chunksCreated++;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
pagesIndexed++;
|
|
149
167
|
}
|
|
150
168
|
|
|
151
|
-
|
|
152
|
-
|
|
169
|
+
// Index all documents
|
|
170
|
+
if (docs.length > 0) {
|
|
171
|
+
if (spinner) {
|
|
172
|
+
spinner.text = 'Indexing documents...';
|
|
173
|
+
}
|
|
174
|
+
await services.lance.addDocuments(store.id, docs);
|
|
175
|
+
}
|
|
153
176
|
|
|
154
|
-
|
|
155
|
-
|
|
177
|
+
const crawlResult = {
|
|
178
|
+
success: true,
|
|
179
|
+
store: store.name,
|
|
180
|
+
storeCreated,
|
|
181
|
+
url,
|
|
182
|
+
pagesCrawled: pagesIndexed,
|
|
183
|
+
chunksCreated,
|
|
184
|
+
mode: cmdOptions.simple === true ? 'simple' : 'intelligent',
|
|
185
|
+
hadCrawlInstruction: cmdOptions.crawl !== undefined,
|
|
186
|
+
hadExtractInstruction: cmdOptions.extract !== undefined,
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
if (globalOpts.format === 'json') {
|
|
190
|
+
console.log(JSON.stringify(crawlResult, null, 2));
|
|
191
|
+
} else if (spinner !== undefined) {
|
|
192
|
+
spinner.succeed(
|
|
193
|
+
`Crawled ${String(pagesIndexed)} pages, indexed ${String(chunksCreated)} chunks`
|
|
194
|
+
);
|
|
195
|
+
} else if (globalOpts.quiet !== true) {
|
|
196
|
+
console.log(
|
|
197
|
+
`Crawled ${String(pagesIndexed)} pages, indexed ${String(chunksCreated)} chunks`
|
|
198
|
+
);
|
|
199
|
+
}
|
|
200
|
+
} catch (error) {
|
|
201
|
+
const message = `Crawl failed: ${error instanceof Error ? error.message : String(error)}`;
|
|
156
202
|
if (spinner) {
|
|
157
|
-
spinner.
|
|
203
|
+
spinner.fail(message);
|
|
204
|
+
} else {
|
|
205
|
+
console.error(`Error: ${message}`);
|
|
158
206
|
}
|
|
159
|
-
|
|
207
|
+
exitCode = 6;
|
|
208
|
+
} finally {
|
|
209
|
+
await crawler.stop();
|
|
210
|
+
await destroyServices(services);
|
|
160
211
|
}
|
|
161
212
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
store: store.name,
|
|
165
|
-
storeCreated,
|
|
166
|
-
url,
|
|
167
|
-
pagesCrawled: pagesIndexed,
|
|
168
|
-
chunksCreated,
|
|
169
|
-
mode: cmdOptions.simple === true ? 'simple' : 'intelligent',
|
|
170
|
-
hadCrawlInstruction: cmdOptions.crawl !== undefined,
|
|
171
|
-
hadExtractInstruction: cmdOptions.extract !== undefined,
|
|
172
|
-
};
|
|
173
|
-
|
|
174
|
-
if (globalOpts.format === 'json') {
|
|
175
|
-
console.log(JSON.stringify(crawlResult, null, 2));
|
|
176
|
-
} else if (spinner !== undefined) {
|
|
177
|
-
spinner.succeed(`Crawled ${String(pagesIndexed)} pages, indexed ${String(chunksCreated)} chunks`);
|
|
178
|
-
} else if (globalOpts.quiet !== true) {
|
|
179
|
-
console.log(`Crawled ${String(pagesIndexed)} pages, indexed ${String(chunksCreated)} chunks`);
|
|
213
|
+
if (exitCode !== 0) {
|
|
214
|
+
process.exit(exitCode);
|
|
180
215
|
}
|
|
181
|
-
} catch (error) {
|
|
182
|
-
const message = `Crawl failed: ${error instanceof Error ? error.message : String(error)}`;
|
|
183
|
-
if (spinner) {
|
|
184
|
-
spinner.fail(message);
|
|
185
|
-
} else {
|
|
186
|
-
console.error(`Error: ${message}`);
|
|
187
|
-
}
|
|
188
|
-
exitCode = 6;
|
|
189
|
-
} finally {
|
|
190
|
-
await crawler.stop();
|
|
191
|
-
await destroyServices(services);
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
if (exitCode !== 0) {
|
|
195
|
-
process.exit(exitCode);
|
|
196
216
|
}
|
|
197
|
-
|
|
217
|
+
);
|
|
198
218
|
}
|