bluera-knowledge 0.9.32 → 0.9.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/hooks/post-edit-check.sh +5 -3
- package/.claude/skills/atomic-commits/SKILL.md +3 -1
- package/.husky/pre-commit +3 -2
- package/.prettierrc +9 -0
- package/.versionrc.json +1 -1
- package/CHANGELOG.md +70 -0
- package/CLAUDE.md +6 -0
- package/README.md +25 -13
- package/bun.lock +277 -33
- package/dist/{chunk-L2YVNC63.js → chunk-6FHWC36B.js} +9 -1
- package/dist/chunk-6FHWC36B.js.map +1 -0
- package/dist/{chunk-RST4XGRL.js → chunk-DC7CGSGT.js} +288 -241
- package/dist/chunk-DC7CGSGT.js.map +1 -0
- package/dist/{chunk-6PBP5DVD.js → chunk-WFNPNAAP.js} +3212 -3054
- package/dist/chunk-WFNPNAAP.js.map +1 -0
- package/dist/{chunk-WT2DAEO7.js → chunk-Z2KKVH45.js} +548 -482
- package/dist/chunk-Z2KKVH45.js.map +1 -0
- package/dist/index.js +871 -758
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +3 -3
- package/dist/watch.service-BJV3TI3F.js +7 -0
- package/dist/workers/background-worker-cli.js +97 -71
- package/dist/workers/background-worker-cli.js.map +1 -1
- package/eslint.config.js +43 -1
- package/package.json +18 -11
- package/plugin.json +8 -0
- package/python/requirements.txt +1 -1
- package/src/analysis/ast-parser.test.ts +12 -11
- package/src/analysis/ast-parser.ts +28 -22
- package/src/analysis/code-graph.test.ts +52 -62
- package/src/analysis/code-graph.ts +9 -13
- package/src/analysis/dependency-usage-analyzer.test.ts +91 -271
- package/src/analysis/dependency-usage-analyzer.ts +52 -24
- package/src/analysis/go-ast-parser.test.ts +22 -22
- package/src/analysis/go-ast-parser.ts +18 -25
- package/src/analysis/parser-factory.test.ts +9 -9
- package/src/analysis/parser-factory.ts +3 -3
- package/src/analysis/python-ast-parser.test.ts +27 -27
- package/src/analysis/python-ast-parser.ts +2 -2
- package/src/analysis/repo-url-resolver.test.ts +82 -82
- package/src/analysis/rust-ast-parser.test.ts +19 -19
- package/src/analysis/rust-ast-parser.ts +17 -27
- package/src/analysis/tree-sitter-parser.test.ts +3 -3
- package/src/analysis/tree-sitter-parser.ts +10 -16
- package/src/cli/commands/crawl.test.ts +40 -24
- package/src/cli/commands/crawl.ts +186 -166
- package/src/cli/commands/index-cmd.test.ts +90 -90
- package/src/cli/commands/index-cmd.ts +52 -36
- package/src/cli/commands/mcp.test.ts +6 -6
- package/src/cli/commands/mcp.ts +2 -2
- package/src/cli/commands/plugin-api.test.ts +16 -18
- package/src/cli/commands/plugin-api.ts +9 -6
- package/src/cli/commands/search.test.ts +16 -7
- package/src/cli/commands/search.ts +124 -87
- package/src/cli/commands/serve.test.ts +67 -25
- package/src/cli/commands/serve.ts +18 -3
- package/src/cli/commands/setup.test.ts +176 -101
- package/src/cli/commands/setup.ts +140 -117
- package/src/cli/commands/store.test.ts +82 -53
- package/src/cli/commands/store.ts +56 -37
- package/src/cli/program.ts +2 -2
- package/src/crawl/article-converter.test.ts +4 -1
- package/src/crawl/article-converter.ts +46 -31
- package/src/crawl/bridge.test.ts +240 -132
- package/src/crawl/bridge.ts +87 -30
- package/src/crawl/claude-client.test.ts +124 -56
- package/src/crawl/claude-client.ts +7 -15
- package/src/crawl/intelligent-crawler.test.ts +65 -22
- package/src/crawl/intelligent-crawler.ts +86 -53
- package/src/crawl/markdown-utils.ts +1 -4
- package/src/db/embeddings.ts +4 -6
- package/src/db/lance.test.ts +4 -4
- package/src/db/lance.ts +16 -12
- package/src/index.ts +26 -17
- package/src/logging/index.ts +1 -5
- package/src/logging/logger.ts +3 -5
- package/src/logging/payload.test.ts +1 -1
- package/src/logging/payload.ts +3 -5
- package/src/mcp/commands/index.ts +2 -2
- package/src/mcp/commands/job.commands.ts +12 -18
- package/src/mcp/commands/meta.commands.ts +13 -13
- package/src/mcp/commands/registry.ts +5 -8
- package/src/mcp/commands/store.commands.ts +19 -19
- package/src/mcp/handlers/execute.handler.test.ts +10 -10
- package/src/mcp/handlers/execute.handler.ts +4 -5
- package/src/mcp/handlers/index.ts +10 -14
- package/src/mcp/handlers/job.handler.test.ts +10 -10
- package/src/mcp/handlers/job.handler.ts +22 -25
- package/src/mcp/handlers/search.handler.test.ts +36 -65
- package/src/mcp/handlers/search.handler.ts +135 -104
- package/src/mcp/handlers/store.handler.test.ts +41 -52
- package/src/mcp/handlers/store.handler.ts +108 -88
- package/src/mcp/schemas/index.test.ts +73 -68
- package/src/mcp/schemas/index.ts +18 -12
- package/src/mcp/server.test.ts +1 -1
- package/src/mcp/server.ts +59 -46
- package/src/plugin/commands.test.ts +230 -95
- package/src/plugin/commands.ts +24 -25
- package/src/plugin/dependency-analyzer.test.ts +52 -52
- package/src/plugin/dependency-analyzer.ts +85 -22
- package/src/plugin/git-clone.test.ts +24 -13
- package/src/plugin/git-clone.ts +3 -7
- package/src/server/app.test.ts +109 -109
- package/src/server/app.ts +32 -23
- package/src/server/index.test.ts +64 -66
- package/src/services/chunking.service.test.ts +32 -32
- package/src/services/chunking.service.ts +16 -9
- package/src/services/code-graph.service.test.ts +30 -36
- package/src/services/code-graph.service.ts +24 -10
- package/src/services/code-unit.service.test.ts +55 -11
- package/src/services/code-unit.service.ts +85 -11
- package/src/services/config.service.test.ts +37 -18
- package/src/services/config.service.ts +30 -7
- package/src/services/index.service.test.ts +49 -18
- package/src/services/index.service.ts +98 -48
- package/src/services/index.ts +6 -9
- package/src/services/job.service.test.ts +22 -22
- package/src/services/job.service.ts +18 -18
- package/src/services/project-root.service.test.ts +1 -3
- package/src/services/search.service.test.ts +248 -120
- package/src/services/search.service.ts +286 -156
- package/src/services/services.test.ts +1 -1
- package/src/services/snippet.service.test.ts +14 -6
- package/src/services/snippet.service.ts +7 -5
- package/src/services/store.service.test.ts +68 -29
- package/src/services/store.service.ts +41 -12
- package/src/services/watch.service.test.ts +34 -14
- package/src/services/watch.service.ts +11 -1
- package/src/types/brands.test.ts +3 -1
- package/src/types/index.ts +2 -13
- package/src/types/search.ts +10 -8
- package/src/utils/type-guards.test.ts +20 -15
- package/src/utils/type-guards.ts +1 -1
- package/src/workers/background-worker-cli.ts +28 -30
- package/src/workers/background-worker.test.ts +54 -40
- package/src/workers/background-worker.ts +76 -60
- package/src/workers/pid-file.test.ts +167 -0
- package/src/workers/pid-file.ts +82 -0
- package/src/workers/spawn-worker.test.ts +22 -10
- package/src/workers/spawn-worker.ts +6 -6
- package/tests/analysis/ast-parser.test.ts +3 -3
- package/tests/analysis/code-graph.test.ts +5 -5
- package/tests/fixtures/code-snippets/api/error-handling.ts +4 -15
- package/tests/fixtures/code-snippets/api/rest-controller.ts +3 -9
- package/tests/fixtures/code-snippets/auth/jwt-auth.ts +5 -21
- package/tests/fixtures/code-snippets/auth/oauth-flow.ts +4 -4
- package/tests/fixtures/code-snippets/database/repository-pattern.ts +11 -3
- package/tests/fixtures/corpus/oss-repos/hono/src/adapter/aws-lambda/handler.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/adapter/cloudflare-pages/handler.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/adapter/cloudflare-workers/serve-static.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/client/client.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/client/types.ts +22 -20
- package/tests/fixtures/corpus/oss-repos/hono/src/context.ts +13 -10
- package/tests/fixtures/corpus/oss-repos/hono/src/helper/accepts/accepts.ts +10 -7
- package/tests/fixtures/corpus/oss-repos/hono/src/helper/adapter/index.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/helper/css/index.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/helper/factory/index.ts +16 -16
- package/tests/fixtures/corpus/oss-repos/hono/src/helper/ssg/ssg.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/hono-base.ts +3 -3
- package/tests/fixtures/corpus/oss-repos/hono/src/hono.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/jsx/dom/css.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/jsx/dom/intrinsic-element/components.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/jsx/dom/render.ts +7 -7
- package/tests/fixtures/corpus/oss-repos/hono/src/jsx/hooks/index.ts +3 -3
- package/tests/fixtures/corpus/oss-repos/hono/src/jsx/intrinsic-element/components.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/jsx/utils.ts +6 -6
- package/tests/fixtures/corpus/oss-repos/hono/src/middleware/jsx-renderer/index.ts +3 -3
- package/tests/fixtures/corpus/oss-repos/hono/src/middleware/serve-static/index.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/preset/quick.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/preset/tiny.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/router/pattern-router/router.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/router/reg-exp-router/node.ts +4 -4
- package/tests/fixtures/corpus/oss-repos/hono/src/router/reg-exp-router/router.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/router/trie-router/node.ts +1 -1
- package/tests/fixtures/corpus/oss-repos/hono/src/types.ts +166 -169
- package/tests/fixtures/corpus/oss-repos/hono/src/utils/body.ts +8 -8
- package/tests/fixtures/corpus/oss-repos/hono/src/utils/color.ts +3 -3
- package/tests/fixtures/corpus/oss-repos/hono/src/utils/cookie.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/utils/encode.ts +2 -2
- package/tests/fixtures/corpus/oss-repos/hono/src/utils/types.ts +30 -33
- package/tests/fixtures/corpus/oss-repos/hono/src/validator/validator.ts +2 -2
- package/tests/fixtures/test-server.ts +3 -2
- package/tests/helpers/performance-metrics.ts +8 -25
- package/tests/helpers/search-relevance.ts +14 -69
- package/tests/integration/cli-consistency.test.ts +6 -5
- package/tests/integration/python-bridge.test.ts +13 -3
- package/tests/mcp/server.test.ts +1 -1
- package/tests/services/code-unit.service.test.ts +48 -0
- package/tests/services/job.service.test.ts +124 -0
- package/tests/services/search.progressive-context.test.ts +2 -2
- package/.claude-plugin/plugin.json +0 -13
- package/dist/chunk-6PBP5DVD.js.map +0 -1
- package/dist/chunk-L2YVNC63.js.map +0 -1
- package/dist/chunk-RST4XGRL.js.map +0 -1
- package/dist/chunk-WT2DAEO7.js.map +0 -1
- package/dist/watch.service-YAIKKDCF.js +0 -7
- package/skills/atomic-commits/SKILL.md +0 -77
- /package/dist/{watch.service-YAIKKDCF.js.map → watch.service-BJV3TI3F.js.map} +0 -0
package/src/crawl/bridge.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { spawn, type ChildProcess } from 'node:child_process';
|
|
2
|
-
import { createInterface } from 'node:readline';
|
|
3
2
|
import { randomUUID } from 'node:crypto';
|
|
3
|
+
import { createInterface } from 'node:readline';
|
|
4
4
|
import { ZodError } from 'zod';
|
|
5
5
|
import {
|
|
6
6
|
type CrawlResult,
|
|
@@ -70,7 +70,8 @@ export class PythonBridge {
|
|
|
70
70
|
}
|
|
71
71
|
|
|
72
72
|
if (this.process.stdout === null) {
|
|
73
|
-
this.process
|
|
73
|
+
this.process.kill(); // Kill process to prevent zombie
|
|
74
|
+
this.process = null; // Clean up reference
|
|
74
75
|
return Promise.reject(new Error('Python bridge process stdout is null'));
|
|
75
76
|
}
|
|
76
77
|
const rl = createInterface({ input: this.process.stdout });
|
|
@@ -111,11 +112,16 @@ export class PythonBridge {
|
|
|
111
112
|
} catch (error: unknown) {
|
|
112
113
|
// Log validation failure with original response for debugging
|
|
113
114
|
if (error instanceof ZodError) {
|
|
114
|
-
logger.error(
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
115
|
+
logger.error(
|
|
116
|
+
{
|
|
117
|
+
issues: error.issues,
|
|
118
|
+
response: JSON.stringify(response.result),
|
|
119
|
+
},
|
|
120
|
+
'Python bridge response validation failed'
|
|
121
|
+
);
|
|
122
|
+
pending.reject(
|
|
123
|
+
new Error(`Invalid response format from Python bridge: ${error.message}`)
|
|
124
|
+
);
|
|
119
125
|
} else {
|
|
120
126
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
121
127
|
logger.error({ error: errorMessage }, 'Response validation error');
|
|
@@ -126,10 +132,13 @@ export class PythonBridge {
|
|
|
126
132
|
// If neither result nor error, leave pending (will timeout)
|
|
127
133
|
}
|
|
128
134
|
} catch (err) {
|
|
129
|
-
logger.error(
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
135
|
+
logger.error(
|
|
136
|
+
{
|
|
137
|
+
error: err instanceof Error ? err.message : String(err),
|
|
138
|
+
line,
|
|
139
|
+
},
|
|
140
|
+
'Failed to parse JSON response from Python bridge'
|
|
141
|
+
);
|
|
133
142
|
}
|
|
134
143
|
});
|
|
135
144
|
|
|
@@ -157,13 +166,18 @@ export class PythonBridge {
|
|
|
157
166
|
}
|
|
158
167
|
}, timeoutMs);
|
|
159
168
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
169
|
+
this.pending.set(id, {
|
|
170
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions -- Promise resolve type narrowing
|
|
171
|
+
resolve: resolve as (v: PendingResult) => void,
|
|
172
|
+
reject,
|
|
173
|
+
timeout,
|
|
174
|
+
method: 'crawl',
|
|
175
|
+
});
|
|
176
|
+
if (!this.process?.stdin) {
|
|
163
177
|
reject(new Error('Python bridge process not available'));
|
|
164
178
|
return;
|
|
165
179
|
}
|
|
166
|
-
this.process.stdin.write(JSON.stringify(request)
|
|
180
|
+
this.process.stdin.write(`${JSON.stringify(request)}\n`);
|
|
167
181
|
});
|
|
168
182
|
}
|
|
169
183
|
|
|
@@ -187,17 +201,26 @@ export class PythonBridge {
|
|
|
187
201
|
}
|
|
188
202
|
}, timeoutMs);
|
|
189
203
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
204
|
+
this.pending.set(id, {
|
|
205
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions -- Promise resolve type narrowing
|
|
206
|
+
resolve: resolve as (v: PendingResult) => void,
|
|
207
|
+
reject,
|
|
208
|
+
timeout,
|
|
209
|
+
method: 'fetch_headless',
|
|
210
|
+
});
|
|
211
|
+
if (!this.process?.stdin) {
|
|
193
212
|
reject(new Error('Python bridge process not available'));
|
|
194
213
|
return;
|
|
195
214
|
}
|
|
196
|
-
this.process.stdin.write(JSON.stringify(request)
|
|
215
|
+
this.process.stdin.write(`${JSON.stringify(request)}\n`);
|
|
197
216
|
});
|
|
198
217
|
}
|
|
199
218
|
|
|
200
|
-
async parsePython(
|
|
219
|
+
async parsePython(
|
|
220
|
+
code: string,
|
|
221
|
+
filePath: string,
|
|
222
|
+
timeoutMs: number = 10000
|
|
223
|
+
): Promise<ParsePythonResult> {
|
|
201
224
|
if (!this.process) await this.start();
|
|
202
225
|
|
|
203
226
|
const id = randomUUID();
|
|
@@ -213,28 +236,62 @@ export class PythonBridge {
|
|
|
213
236
|
const pending = this.pending.get(id);
|
|
214
237
|
if (pending) {
|
|
215
238
|
this.pending.delete(id);
|
|
216
|
-
reject(
|
|
239
|
+
reject(
|
|
240
|
+
new Error(`Python parsing timeout after ${String(timeoutMs)}ms for file: ${filePath}`)
|
|
241
|
+
);
|
|
217
242
|
}
|
|
218
243
|
}, timeoutMs);
|
|
219
244
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
245
|
+
this.pending.set(id, {
|
|
246
|
+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions -- Promise resolve type narrowing
|
|
247
|
+
resolve: resolve as (v: PendingResult) => void,
|
|
248
|
+
reject,
|
|
249
|
+
timeout,
|
|
250
|
+
method: 'parse_python',
|
|
251
|
+
});
|
|
252
|
+
if (!this.process?.stdin) {
|
|
223
253
|
reject(new Error('Python bridge process not available'));
|
|
224
254
|
return;
|
|
225
255
|
}
|
|
226
|
-
this.process.stdin.write(JSON.stringify(request)
|
|
256
|
+
this.process.stdin.write(`${JSON.stringify(request)}\n`);
|
|
227
257
|
});
|
|
228
258
|
}
|
|
229
259
|
|
|
230
260
|
stop(): Promise<void> {
|
|
231
|
-
if (this.process) {
|
|
261
|
+
if (!this.process) {
|
|
262
|
+
return Promise.resolve();
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
return new Promise((resolve) => {
|
|
232
266
|
this.stoppingIntentionally = true;
|
|
233
267
|
this.rejectAllPending(new Error('Python bridge stopped'));
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
268
|
+
|
|
269
|
+
// Wait for process to actually exit before resolving
|
|
270
|
+
const proc = this.process;
|
|
271
|
+
if (proc === null) {
|
|
272
|
+
resolve();
|
|
273
|
+
return;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Set up exit handler to resolve when process terminates
|
|
277
|
+
const onExit = (): void => {
|
|
278
|
+
resolve();
|
|
279
|
+
};
|
|
280
|
+
proc.once('exit', onExit);
|
|
281
|
+
|
|
282
|
+
// Send SIGTERM to gracefully stop
|
|
283
|
+
proc.kill();
|
|
284
|
+
|
|
285
|
+
// Safety timeout in case process doesn't exit within 1 second
|
|
286
|
+
setTimeout(() => {
|
|
287
|
+
proc.removeListener('exit', onExit);
|
|
288
|
+
if (this.process === proc) {
|
|
289
|
+
proc.kill('SIGKILL'); // Force kill
|
|
290
|
+
this.process = null;
|
|
291
|
+
}
|
|
292
|
+
resolve();
|
|
293
|
+
}, 1000);
|
|
294
|
+
});
|
|
238
295
|
}
|
|
239
296
|
|
|
240
297
|
private rejectAllPending(error: Error): void {
|
|
@@ -85,10 +85,15 @@ describe('ClaudeClient', () => {
|
|
|
85
85
|
|
|
86
86
|
// Simulate successful response
|
|
87
87
|
setTimeout(() => {
|
|
88
|
-
mockProcess.stdout.emit(
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
88
|
+
mockProcess.stdout.emit(
|
|
89
|
+
'data',
|
|
90
|
+
Buffer.from(
|
|
91
|
+
JSON.stringify({
|
|
92
|
+
urls: ['https://example.com/page1', 'https://example.com/page2'],
|
|
93
|
+
reasoning: 'Found documentation pages',
|
|
94
|
+
})
|
|
95
|
+
)
|
|
96
|
+
);
|
|
92
97
|
mockProcess.emit('close', 0);
|
|
93
98
|
}, 10);
|
|
94
99
|
|
|
@@ -101,10 +106,15 @@ describe('ClaudeClient', () => {
|
|
|
101
106
|
const promise = client.determineCrawlUrls('<html>test</html>', 'Find all docs');
|
|
102
107
|
|
|
103
108
|
setTimeout(() => {
|
|
104
|
-
mockProcess.stdout.emit(
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
109
|
+
mockProcess.stdout.emit(
|
|
110
|
+
'data',
|
|
111
|
+
Buffer.from(
|
|
112
|
+
JSON.stringify({
|
|
113
|
+
urls: ['https://example.com/page1'],
|
|
114
|
+
reasoning: 'Test',
|
|
115
|
+
})
|
|
116
|
+
)
|
|
117
|
+
);
|
|
108
118
|
mockProcess.emit('close', 0);
|
|
109
119
|
}, 10);
|
|
110
120
|
|
|
@@ -121,7 +131,7 @@ describe('ClaudeClient', () => {
|
|
|
121
131
|
]),
|
|
122
132
|
expect.objectContaining({
|
|
123
133
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
124
|
-
})
|
|
134
|
+
})
|
|
125
135
|
);
|
|
126
136
|
});
|
|
127
137
|
|
|
@@ -129,16 +139,23 @@ describe('ClaudeClient', () => {
|
|
|
129
139
|
const promise = client.determineCrawlUrls('<html><body>Test</body></html>', 'Find tutorials');
|
|
130
140
|
|
|
131
141
|
setTimeout(() => {
|
|
132
|
-
mockProcess.stdout.emit(
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
142
|
+
mockProcess.stdout.emit(
|
|
143
|
+
'data',
|
|
144
|
+
Buffer.from(
|
|
145
|
+
JSON.stringify({
|
|
146
|
+
urls: ['https://example.com/tutorial'],
|
|
147
|
+
reasoning: 'Found tutorial',
|
|
148
|
+
})
|
|
149
|
+
)
|
|
150
|
+
);
|
|
136
151
|
mockProcess.emit('close', 0);
|
|
137
152
|
}, 10);
|
|
138
153
|
|
|
139
154
|
await promise;
|
|
140
155
|
|
|
141
|
-
expect(mockProcess.stdin.write).toHaveBeenCalledWith(
|
|
156
|
+
expect(mockProcess.stdin.write).toHaveBeenCalledWith(
|
|
157
|
+
expect.stringContaining('Find tutorials')
|
|
158
|
+
);
|
|
142
159
|
expect(mockProcess.stdin.end).toHaveBeenCalled();
|
|
143
160
|
});
|
|
144
161
|
|
|
@@ -146,9 +163,14 @@ describe('ClaudeClient', () => {
|
|
|
146
163
|
const promise = client.determineCrawlUrls('<html>test</html>', 'Find all');
|
|
147
164
|
|
|
148
165
|
setTimeout(() => {
|
|
149
|
-
mockProcess.stdout.emit(
|
|
150
|
-
|
|
151
|
-
|
|
166
|
+
mockProcess.stdout.emit(
|
|
167
|
+
'data',
|
|
168
|
+
Buffer.from(
|
|
169
|
+
JSON.stringify({
|
|
170
|
+
reasoning: 'No URLs found',
|
|
171
|
+
})
|
|
172
|
+
)
|
|
173
|
+
);
|
|
152
174
|
mockProcess.emit('close', 0);
|
|
153
175
|
}, 10);
|
|
154
176
|
|
|
@@ -159,10 +181,15 @@ describe('ClaudeClient', () => {
|
|
|
159
181
|
const promise = client.determineCrawlUrls('<html>test</html>', 'Find all');
|
|
160
182
|
|
|
161
183
|
setTimeout(() => {
|
|
162
|
-
mockProcess.stdout.emit(
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
184
|
+
mockProcess.stdout.emit(
|
|
185
|
+
'data',
|
|
186
|
+
Buffer.from(
|
|
187
|
+
JSON.stringify({
|
|
188
|
+
urls: [],
|
|
189
|
+
reasoning: 'No matching pages',
|
|
190
|
+
})
|
|
191
|
+
)
|
|
192
|
+
);
|
|
166
193
|
mockProcess.emit('close', 0);
|
|
167
194
|
}, 10);
|
|
168
195
|
|
|
@@ -173,9 +200,14 @@ describe('ClaudeClient', () => {
|
|
|
173
200
|
const promise = client.determineCrawlUrls('<html>test</html>', 'Find all');
|
|
174
201
|
|
|
175
202
|
setTimeout(() => {
|
|
176
|
-
mockProcess.stdout.emit(
|
|
177
|
-
|
|
178
|
-
|
|
203
|
+
mockProcess.stdout.emit(
|
|
204
|
+
'data',
|
|
205
|
+
Buffer.from(
|
|
206
|
+
JSON.stringify({
|
|
207
|
+
urls: ['https://example.com/page1'],
|
|
208
|
+
})
|
|
209
|
+
)
|
|
210
|
+
);
|
|
179
211
|
mockProcess.emit('close', 0);
|
|
180
212
|
}, 10);
|
|
181
213
|
|
|
@@ -186,10 +218,15 @@ describe('ClaudeClient', () => {
|
|
|
186
218
|
const promise = client.determineCrawlUrls('<html>test</html>', 'Find all');
|
|
187
219
|
|
|
188
220
|
setTimeout(() => {
|
|
189
|
-
mockProcess.stdout.emit(
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
221
|
+
mockProcess.stdout.emit(
|
|
222
|
+
'data',
|
|
223
|
+
Buffer.from(
|
|
224
|
+
JSON.stringify({
|
|
225
|
+
urls: ['https://example.com/page1', 123, null],
|
|
226
|
+
reasoning: 'Mixed types',
|
|
227
|
+
})
|
|
228
|
+
)
|
|
229
|
+
);
|
|
193
230
|
mockProcess.emit('close', 0);
|
|
194
231
|
}, 10);
|
|
195
232
|
|
|
@@ -223,10 +260,15 @@ describe('ClaudeClient', () => {
|
|
|
223
260
|
const promise = client.determineCrawlUrls(longHtml, 'Find all');
|
|
224
261
|
|
|
225
262
|
setTimeout(() => {
|
|
226
|
-
mockProcess.stdout.emit(
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
263
|
+
mockProcess.stdout.emit(
|
|
264
|
+
'data',
|
|
265
|
+
Buffer.from(
|
|
266
|
+
JSON.stringify({
|
|
267
|
+
urls: ['https://example.com/page1'],
|
|
268
|
+
reasoning: 'Test',
|
|
269
|
+
})
|
|
270
|
+
)
|
|
271
|
+
);
|
|
230
272
|
mockProcess.emit('close', 0);
|
|
231
273
|
}, 10);
|
|
232
274
|
|
|
@@ -242,10 +284,15 @@ describe('ClaudeClient', () => {
|
|
|
242
284
|
const promise = client.determineCrawlUrls(shortHtml, 'Find all');
|
|
243
285
|
|
|
244
286
|
setTimeout(() => {
|
|
245
|
-
mockProcess.stdout.emit(
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
287
|
+
mockProcess.stdout.emit(
|
|
288
|
+
'data',
|
|
289
|
+
Buffer.from(
|
|
290
|
+
JSON.stringify({
|
|
291
|
+
urls: ['https://example.com/page1'],
|
|
292
|
+
reasoning: 'Test',
|
|
293
|
+
})
|
|
294
|
+
)
|
|
295
|
+
);
|
|
249
296
|
mockProcess.emit('close', 0);
|
|
250
297
|
}, 10);
|
|
251
298
|
|
|
@@ -259,7 +306,10 @@ describe('ClaudeClient', () => {
|
|
|
259
306
|
|
|
260
307
|
describe('extractContent', () => {
|
|
261
308
|
it('should successfully extract content', async () => {
|
|
262
|
-
const promise = client.extractContent(
|
|
309
|
+
const promise = client.extractContent(
|
|
310
|
+
'# Documentation\n\nPricing: $10/month',
|
|
311
|
+
'Extract pricing info'
|
|
312
|
+
);
|
|
263
313
|
|
|
264
314
|
setTimeout(() => {
|
|
265
315
|
mockProcess.stdout.emit('data', Buffer.from('The pricing is $10/month\n'));
|
|
@@ -285,7 +335,7 @@ describe('ClaudeClient', () => {
|
|
|
285
335
|
['-p'],
|
|
286
336
|
expect.objectContaining({
|
|
287
337
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
288
|
-
})
|
|
338
|
+
})
|
|
289
339
|
);
|
|
290
340
|
});
|
|
291
341
|
|
|
@@ -515,12 +565,15 @@ describe('ClaudeClient', () => {
|
|
|
515
565
|
const promise = client.determineCrawlUrls('<html>test</html>', 'Find all');
|
|
516
566
|
|
|
517
567
|
setTimeout(() => {
|
|
518
|
-
mockProcess.stdout.emit(
|
|
568
|
+
mockProcess.stdout.emit(
|
|
569
|
+
'data',
|
|
570
|
+
Buffer.from(`
|
|
519
571
|
{
|
|
520
572
|
"urls": ["https://example.com/page1"],
|
|
521
573
|
"reasoning": "Found page"
|
|
522
574
|
}
|
|
523
|
-
`)
|
|
575
|
+
`)
|
|
576
|
+
);
|
|
524
577
|
mockProcess.emit('close', 0);
|
|
525
578
|
}, 10);
|
|
526
579
|
|
|
@@ -556,10 +609,15 @@ describe('ClaudeClient', () => {
|
|
|
556
609
|
const promise = client.determineCrawlUrls('<html>test</html>', 'Find all');
|
|
557
610
|
|
|
558
611
|
setTimeout(() => {
|
|
559
|
-
mockProcess.stdout.emit(
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
612
|
+
mockProcess.stdout.emit(
|
|
613
|
+
'data',
|
|
614
|
+
Buffer.from(
|
|
615
|
+
JSON.stringify({
|
|
616
|
+
urls: 'not an array',
|
|
617
|
+
reasoning: 'Test',
|
|
618
|
+
})
|
|
619
|
+
)
|
|
620
|
+
);
|
|
563
621
|
mockProcess.emit('close', 0);
|
|
564
622
|
}, 10);
|
|
565
623
|
|
|
@@ -570,10 +628,15 @@ describe('ClaudeClient', () => {
|
|
|
570
628
|
const promise = client.determineCrawlUrls('<html>test</html>', 'Find all');
|
|
571
629
|
|
|
572
630
|
setTimeout(() => {
|
|
573
|
-
mockProcess.stdout.emit(
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
631
|
+
mockProcess.stdout.emit(
|
|
632
|
+
'data',
|
|
633
|
+
Buffer.from(
|
|
634
|
+
JSON.stringify({
|
|
635
|
+
urls: ['https://example.com/page1'],
|
|
636
|
+
reasoning: 123,
|
|
637
|
+
})
|
|
638
|
+
)
|
|
639
|
+
);
|
|
577
640
|
mockProcess.emit('close', 0);
|
|
578
641
|
}, 10);
|
|
579
642
|
|
|
@@ -584,14 +647,19 @@ describe('ClaudeClient', () => {
|
|
|
584
647
|
const promise = client.determineCrawlUrls('<html>test</html>', 'Find all');
|
|
585
648
|
|
|
586
649
|
setTimeout(() => {
|
|
587
|
-
mockProcess.stdout.emit(
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
650
|
+
mockProcess.stdout.emit(
|
|
651
|
+
'data',
|
|
652
|
+
Buffer.from(
|
|
653
|
+
JSON.stringify({
|
|
654
|
+
urls: [
|
|
655
|
+
'https://example.com/page1',
|
|
656
|
+
'https://example.com/page2',
|
|
657
|
+
'https://example.com/page3',
|
|
658
|
+
],
|
|
659
|
+
reasoning: 'Found 3 documentation pages',
|
|
660
|
+
})
|
|
661
|
+
)
|
|
662
|
+
);
|
|
595
663
|
mockProcess.emit('close', 0);
|
|
596
664
|
}, 10);
|
|
597
665
|
|
|
@@ -73,10 +73,7 @@ export class ClaudeClient {
|
|
|
73
73
|
* @param instruction - Natural language crawl instruction (e.g., "scrape all Getting Started pages")
|
|
74
74
|
* @returns List of URLs to crawl with reasoning
|
|
75
75
|
*/
|
|
76
|
-
async determineCrawlUrls(
|
|
77
|
-
seedHtml: string,
|
|
78
|
-
instruction: string,
|
|
79
|
-
): Promise<CrawlStrategy> {
|
|
76
|
+
async determineCrawlUrls(seedHtml: string, instruction: string): Promise<CrawlStrategy> {
|
|
80
77
|
const prompt = `You are analyzing a webpage to determine which pages to crawl based on the user's instruction.
|
|
81
78
|
|
|
82
79
|
Instruction: ${instruction}
|
|
@@ -110,7 +107,7 @@ Return only URLs that are relevant to the instruction. If the instruction mentio
|
|
|
110
107
|
return { urls: parsed.urls, reasoning: parsed.reasoning };
|
|
111
108
|
} catch (error) {
|
|
112
109
|
throw new Error(
|
|
113
|
-
`Failed to determine crawl strategy: ${error instanceof Error ? error.message : String(error)}
|
|
110
|
+
`Failed to determine crawl strategy: ${error instanceof Error ? error.message : String(error)}`
|
|
114
111
|
);
|
|
115
112
|
}
|
|
116
113
|
}
|
|
@@ -133,7 +130,7 @@ ${this.truncateMarkdown(markdown, 100000)}`;
|
|
|
133
130
|
return result.trim();
|
|
134
131
|
} catch (error) {
|
|
135
132
|
throw new Error(
|
|
136
|
-
`Failed to extract content: ${error instanceof Error ? error.message : String(error)}
|
|
133
|
+
`Failed to extract content: ${error instanceof Error ? error.message : String(error)}`
|
|
137
134
|
);
|
|
138
135
|
}
|
|
139
136
|
}
|
|
@@ -145,10 +142,7 @@ ${this.truncateMarkdown(markdown, 100000)}`;
|
|
|
145
142
|
* @param jsonSchema - Optional JSON schema for structured output
|
|
146
143
|
* @returns Claude's response as a string
|
|
147
144
|
*/
|
|
148
|
-
private async callClaude(
|
|
149
|
-
prompt: string,
|
|
150
|
-
jsonSchema?: Record<string, unknown>,
|
|
151
|
-
): Promise<string> {
|
|
145
|
+
private async callClaude(prompt: string, jsonSchema?: Record<string, unknown>): Promise<string> {
|
|
152
146
|
return new Promise<string>((resolve, reject) => {
|
|
153
147
|
const args = ['-p'];
|
|
154
148
|
|
|
@@ -193,9 +187,7 @@ ${this.truncateMarkdown(markdown, 100000)}`;
|
|
|
193
187
|
resolve(stdout.trim());
|
|
194
188
|
} else {
|
|
195
189
|
reject(
|
|
196
|
-
new Error(
|
|
197
|
-
`Claude CLI exited with code ${String(code)}${stderr ? `: ${stderr}` : ''}`,
|
|
198
|
-
),
|
|
190
|
+
new Error(`Claude CLI exited with code ${String(code)}${stderr ? `: ${stderr}` : ''}`)
|
|
199
191
|
);
|
|
200
192
|
}
|
|
201
193
|
});
|
|
@@ -220,7 +212,7 @@ ${this.truncateMarkdown(markdown, 100000)}`;
|
|
|
220
212
|
if (html.length <= maxLength) return html;
|
|
221
213
|
|
|
222
214
|
// Try to keep the beginning (usually has navigation)
|
|
223
|
-
return html.substring(0, maxLength)
|
|
215
|
+
return `${html.substring(0, maxLength)}\n\n[... HTML truncated ...]`;
|
|
224
216
|
}
|
|
225
217
|
|
|
226
218
|
/**
|
|
@@ -229,6 +221,6 @@ ${this.truncateMarkdown(markdown, 100000)}`;
|
|
|
229
221
|
private truncateMarkdown(markdown: string, maxLength: number): string {
|
|
230
222
|
if (markdown.length <= maxLength) return markdown;
|
|
231
223
|
|
|
232
|
-
return markdown.substring(0, maxLength)
|
|
224
|
+
return `${markdown.substring(0, maxLength)}\n\n[... content truncated ...]`;
|
|
233
225
|
}
|
|
234
226
|
}
|