bluera-knowledge 0.18.2 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +19 -28
- package/dist/{chunk-NYRKKRRA.js → chunk-27Y4ENUD.js} +2 -2
- package/dist/{chunk-JSCOGKNU.js → chunk-EQYSYRQJ.js} +129 -50
- package/dist/chunk-EQYSYRQJ.js.map +1 -0
- package/dist/{chunk-YMSMKOMF.js → chunk-KQLTWB4T.js} +5 -112
- package/dist/{chunk-YMSMKOMF.js.map → chunk-KQLTWB4T.js.map} +1 -1
- package/dist/index.js +3 -3
- package/dist/mcp/server.d.ts +0 -29
- package/dist/mcp/server.js +2 -2
- package/dist/workers/background-worker-cli.js +2 -2
- package/package.json +3 -1
- package/python/ast_worker.py +209 -0
- package/dist/chunk-JSCOGKNU.js.map +0 -1
- package/python/crawl_worker.py +0 -280
- /package/dist/{chunk-NYRKKRRA.js.map → chunk-27Y4ENUD.js.map} +0 -0
|
@@ -5184,39 +5184,6 @@ import { ZodError } from "zod";
|
|
|
5184
5184
|
|
|
5185
5185
|
// src/crawl/schemas.ts
|
|
5186
5186
|
import { z as z4 } from "zod";
|
|
5187
|
-
var CrawledLinkSchema = z4.object({
|
|
5188
|
-
href: z4.string(),
|
|
5189
|
-
text: z4.string(),
|
|
5190
|
-
title: z4.string().optional(),
|
|
5191
|
-
base_domain: z4.string().optional(),
|
|
5192
|
-
head_data: z4.unknown().optional(),
|
|
5193
|
-
head_extraction_status: z4.unknown().optional(),
|
|
5194
|
-
head_extraction_error: z4.unknown().optional(),
|
|
5195
|
-
intrinsic_score: z4.number().optional(),
|
|
5196
|
-
contextual_score: z4.unknown().optional(),
|
|
5197
|
-
total_score: z4.unknown().optional()
|
|
5198
|
-
});
|
|
5199
|
-
var CrawlPageSchema = z4.object({
|
|
5200
|
-
url: z4.string(),
|
|
5201
|
-
title: z4.string(),
|
|
5202
|
-
content: z4.string(),
|
|
5203
|
-
links: z4.array(z4.string()),
|
|
5204
|
-
crawledAt: z4.string()
|
|
5205
|
-
});
|
|
5206
|
-
var CrawlResultSchema = z4.object({
|
|
5207
|
-
pages: z4.array(CrawlPageSchema)
|
|
5208
|
-
});
|
|
5209
|
-
var HeadlessResultSchema = z4.object({
|
|
5210
|
-
html: z4.string(),
|
|
5211
|
-
markdown: z4.string(),
|
|
5212
|
-
links: z4.array(z4.union([CrawledLinkSchema, z4.string()]))
|
|
5213
|
-
});
|
|
5214
|
-
function validateHeadlessResult(data) {
|
|
5215
|
-
return HeadlessResultSchema.parse(data);
|
|
5216
|
-
}
|
|
5217
|
-
function validateCrawlResult(data) {
|
|
5218
|
-
return CrawlResultSchema.parse(data);
|
|
5219
|
-
}
|
|
5220
5187
|
var MethodInfoSchema = z4.object({
|
|
5221
5188
|
name: z4.string(),
|
|
5222
5189
|
async: z4.boolean(),
|
|
@@ -5276,13 +5243,13 @@ var PythonBridge = class {
|
|
|
5276
5243
|
if (isProduction) {
|
|
5277
5244
|
const distIndex = currentFilePath.indexOf(distPattern);
|
|
5278
5245
|
const pluginRoot = currentFilePath.substring(0, distIndex);
|
|
5279
|
-
pythonWorkerPath = path3.join(pluginRoot, "python", "
|
|
5246
|
+
pythonWorkerPath = path3.join(pluginRoot, "python", "ast_worker.py");
|
|
5280
5247
|
const venvPython = getVenvPythonPath(pluginRoot);
|
|
5281
5248
|
pythonPath = existsSync4(venvPython) ? venvPython : getPythonExecutable();
|
|
5282
5249
|
} else {
|
|
5283
5250
|
const srcDir = path3.dirname(path3.dirname(currentFilePath));
|
|
5284
5251
|
const projectRoot = path3.dirname(srcDir);
|
|
5285
|
-
pythonWorkerPath = path3.join(projectRoot, "python", "
|
|
5252
|
+
pythonWorkerPath = path3.join(projectRoot, "python", "ast_worker.py");
|
|
5286
5253
|
pythonPath = getPythonExecutable();
|
|
5287
5254
|
}
|
|
5288
5255
|
logger3.debug(
|
|
@@ -5335,14 +5302,7 @@ var PythonBridge = class {
|
|
|
5335
5302
|
clearTimeout(pending.timeout);
|
|
5336
5303
|
this.pending.delete(response.id);
|
|
5337
5304
|
try {
|
|
5338
|
-
|
|
5339
|
-
if (pending.method === "crawl") {
|
|
5340
|
-
validated = validateCrawlResult(response.result);
|
|
5341
|
-
} else if (pending.method === "fetch_headless") {
|
|
5342
|
-
validated = validateHeadlessResult(response.result);
|
|
5343
|
-
} else {
|
|
5344
|
-
validated = validateParsePythonResult(response.result);
|
|
5345
|
-
}
|
|
5305
|
+
const validated = validateParsePythonResult(response.result);
|
|
5346
5306
|
pending.resolve(validated);
|
|
5347
5307
|
} catch (error) {
|
|
5348
5308
|
if (error instanceof ZodError) {
|
|
@@ -5376,70 +5336,6 @@ var PythonBridge = class {
|
|
|
5376
5336
|
});
|
|
5377
5337
|
return Promise.resolve();
|
|
5378
5338
|
}
|
|
5379
|
-
async crawl(url, timeoutMs = 3e4) {
|
|
5380
|
-
if (!this.process) await this.start();
|
|
5381
|
-
const id = randomUUID3();
|
|
5382
|
-
const request = {
|
|
5383
|
-
jsonrpc: "2.0",
|
|
5384
|
-
id,
|
|
5385
|
-
method: "crawl",
|
|
5386
|
-
params: { url }
|
|
5387
|
-
};
|
|
5388
|
-
return new Promise((resolve4, reject) => {
|
|
5389
|
-
const timeout = setTimeout(() => {
|
|
5390
|
-
const pending = this.pending.get(id);
|
|
5391
|
-
if (pending) {
|
|
5392
|
-
this.pending.delete(id);
|
|
5393
|
-
reject(new Error(`Crawl timeout after ${String(timeoutMs)}ms for URL: ${url}`));
|
|
5394
|
-
}
|
|
5395
|
-
}, timeoutMs);
|
|
5396
|
-
this.pending.set(id, {
|
|
5397
|
-
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions -- Promise resolve type narrowing
|
|
5398
|
-
resolve: resolve4,
|
|
5399
|
-
reject,
|
|
5400
|
-
timeout,
|
|
5401
|
-
method: "crawl"
|
|
5402
|
-
});
|
|
5403
|
-
if (!this.process?.stdin) {
|
|
5404
|
-
reject(new Error("Python bridge process not available"));
|
|
5405
|
-
return;
|
|
5406
|
-
}
|
|
5407
|
-
this.process.stdin.write(`${JSON.stringify(request)}
|
|
5408
|
-
`);
|
|
5409
|
-
});
|
|
5410
|
-
}
|
|
5411
|
-
async fetchHeadless(url, timeoutMs = 6e4) {
|
|
5412
|
-
if (!this.process) await this.start();
|
|
5413
|
-
const id = randomUUID3();
|
|
5414
|
-
const request = {
|
|
5415
|
-
jsonrpc: "2.0",
|
|
5416
|
-
id,
|
|
5417
|
-
method: "fetch_headless",
|
|
5418
|
-
params: { url }
|
|
5419
|
-
};
|
|
5420
|
-
return new Promise((resolve4, reject) => {
|
|
5421
|
-
const timeout = setTimeout(() => {
|
|
5422
|
-
const pending = this.pending.get(id);
|
|
5423
|
-
if (pending) {
|
|
5424
|
-
this.pending.delete(id);
|
|
5425
|
-
reject(new Error(`Headless fetch timeout after ${String(timeoutMs)}ms for URL: ${url}`));
|
|
5426
|
-
}
|
|
5427
|
-
}, timeoutMs);
|
|
5428
|
-
this.pending.set(id, {
|
|
5429
|
-
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions -- Promise resolve type narrowing
|
|
5430
|
-
resolve: resolve4,
|
|
5431
|
-
reject,
|
|
5432
|
-
timeout,
|
|
5433
|
-
method: "fetch_headless"
|
|
5434
|
-
});
|
|
5435
|
-
if (!this.process?.stdin) {
|
|
5436
|
-
reject(new Error("Python bridge process not available"));
|
|
5437
|
-
return;
|
|
5438
|
-
}
|
|
5439
|
-
this.process.stdin.write(`${JSON.stringify(request)}
|
|
5440
|
-
`);
|
|
5441
|
-
});
|
|
5442
|
-
}
|
|
5443
5339
|
async parsePython(code, filePath, timeoutMs = 1e4) {
|
|
5444
5340
|
if (!this.process) await this.start();
|
|
5445
5341
|
const id = randomUUID3();
|
|
@@ -5460,11 +5356,9 @@ var PythonBridge = class {
|
|
|
5460
5356
|
}
|
|
5461
5357
|
}, timeoutMs);
|
|
5462
5358
|
this.pending.set(id, {
|
|
5463
|
-
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions -- Promise resolve type narrowing
|
|
5464
5359
|
resolve: resolve4,
|
|
5465
5360
|
reject,
|
|
5466
|
-
timeout
|
|
5467
|
-
method: "parse_python"
|
|
5361
|
+
timeout
|
|
5468
5362
|
});
|
|
5469
5363
|
if (!this.process?.stdin) {
|
|
5470
5364
|
reject(new Error("Python bridge process not available"));
|
|
@@ -5990,7 +5884,6 @@ export {
|
|
|
5990
5884
|
shutdownLogger,
|
|
5991
5885
|
summarizePayload,
|
|
5992
5886
|
truncateForLog,
|
|
5993
|
-
PythonBridge,
|
|
5994
5887
|
ChunkingService,
|
|
5995
5888
|
ASTParser,
|
|
5996
5889
|
ok,
|
|
@@ -6007,4 +5900,4 @@ export {
|
|
|
6007
5900
|
createServices,
|
|
6008
5901
|
destroyServices
|
|
6009
5902
|
};
|
|
6010
|
-
//# sourceMappingURL=chunk-
|
|
5903
|
+
//# sourceMappingURL=chunk-KQLTWB4T.js.map
|