bluera-knowledge 0.9.26 → 0.9.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/commit.md +4 -7
- package/.claude/hooks/post-edit-check.sh +21 -24
- package/.claude/skills/atomic-commits/SKILL.md +6 -0
- package/.claude-plugin/plugin.json +1 -1
- package/.env.example +4 -0
- package/.husky/pre-push +12 -2
- package/.versionrc.json +0 -4
- package/BUGS-FOUND.md +71 -0
- package/CHANGELOG.md +76 -0
- package/README.md +55 -20
- package/bun.lock +35 -1
- package/commands/crawl.md +2 -0
- package/dist/{chunk-BICFAWMN.js → chunk-2SJHNRXD.js} +73 -8
- package/dist/chunk-2SJHNRXD.js.map +1 -0
- package/dist/{chunk-J7J6LXOJ.js → chunk-OGEY66FZ.js} +106 -41
- package/dist/chunk-OGEY66FZ.js.map +1 -0
- package/dist/{chunk-5QMHZUC4.js → chunk-RWSXP3PQ.js} +482 -106
- package/dist/chunk-RWSXP3PQ.js.map +1 -0
- package/dist/index.js +73 -28
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +2 -2
- package/dist/workers/background-worker-cli.js +2 -2
- package/eslint.config.js +1 -1
- package/package.json +3 -1
- package/src/analysis/ast-parser.test.ts +46 -0
- package/src/cli/commands/crawl.test.ts +99 -12
- package/src/cli/commands/crawl.ts +76 -24
- package/src/cli/commands/store.test.ts +68 -1
- package/src/cli/commands/store.ts +9 -3
- package/src/crawl/article-converter.ts +36 -1
- package/src/crawl/bridge.ts +18 -7
- package/src/crawl/intelligent-crawler.ts +45 -4
- package/src/db/embeddings.test.ts +16 -0
- package/src/db/lance.test.ts +31 -0
- package/src/db/lance.ts +8 -0
- package/src/logging/index.ts +29 -0
- package/src/logging/logger.test.ts +75 -0
- package/src/logging/logger.ts +147 -0
- package/src/logging/payload.test.ts +152 -0
- package/src/logging/payload.ts +121 -0
- package/src/mcp/handlers/search.handler.test.ts +28 -9
- package/src/mcp/handlers/search.handler.ts +69 -29
- package/src/mcp/handlers/store.handler.test.ts +1 -0
- package/src/mcp/server.ts +44 -16
- package/src/services/chunking.service.ts +23 -0
- package/src/services/index.service.test.ts +921 -1
- package/src/services/index.service.ts +76 -1
- package/src/services/index.ts +20 -2
- package/src/services/search.service.test.ts +573 -21
- package/src/services/search.service.ts +257 -105
- package/src/services/services.test.ts +2 -2
- package/src/services/snippet.service.ts +28 -3
- package/src/services/store.service.test.ts +28 -0
- package/src/services/store.service.ts +4 -0
- package/src/services/token.service.test.ts +45 -0
- package/src/services/token.service.ts +33 -0
- package/src/types/result.test.ts +10 -0
- package/tests/integration/cli-consistency.test.ts +1 -4
- package/vitest.config.ts +4 -0
- package/dist/chunk-5QMHZUC4.js.map +0 -1
- package/dist/chunk-BICFAWMN.js.map +0 -1
- package/dist/chunk-J7J6LXOJ.js.map +0 -1
- package/scripts/readme-version-updater.cjs +0 -18
package/dist/index.js
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
runMCPServer
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-OGEY66FZ.js";
|
|
5
5
|
import {
|
|
6
6
|
IntelligentCrawler
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-2SJHNRXD.js";
|
|
8
8
|
import {
|
|
9
9
|
ASTParser,
|
|
10
|
+
ChunkingService,
|
|
11
|
+
classifyWebContentType,
|
|
10
12
|
createDocumentId,
|
|
11
13
|
createServices,
|
|
12
14
|
createStoreId,
|
|
@@ -14,7 +16,7 @@ import {
|
|
|
14
16
|
err,
|
|
15
17
|
extractRepoName,
|
|
16
18
|
ok
|
|
17
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-RWSXP3PQ.js";
|
|
18
20
|
import "./chunk-L2YVNC63.js";
|
|
19
21
|
|
|
20
22
|
// src/index.ts
|
|
@@ -85,12 +87,14 @@ function createStoreCommand(getOptions) {
|
|
|
85
87
|
store.command("create <name>").description("Create a new store pointing to a local path or URL").requiredOption("-t, --type <type>", "Store type: file (local dir), repo (git), web (crawled site)").requiredOption("-s, --source <path>", "Local path for file/repo stores, URL for web stores").option("-d, --description <desc>", "Optional description for the store").option("--tags <tags>", "Comma-separated tags for filtering").action(async (name, options) => {
|
|
86
88
|
const globalOpts = getOptions();
|
|
87
89
|
const services = await createServices(globalOpts.config, globalOpts.dataDir);
|
|
90
|
+
let exitCode = 0;
|
|
88
91
|
try {
|
|
92
|
+
const isUrl = options.source.startsWith("http://") || options.source.startsWith("https://");
|
|
89
93
|
const result = await services.store.create({
|
|
90
94
|
name,
|
|
91
95
|
type: options.type,
|
|
92
|
-
path: options.type
|
|
93
|
-
url: options.type === "web" ? options.source : void 0,
|
|
96
|
+
path: options.type === "file" || options.type === "repo" && !isUrl ? options.source : void 0,
|
|
97
|
+
url: options.type === "web" || options.type === "repo" && isUrl ? options.source : void 0,
|
|
94
98
|
description: options.description,
|
|
95
99
|
tags: options.tags?.split(",").map((t) => t.trim())
|
|
96
100
|
});
|
|
@@ -104,11 +108,14 @@ Created store: ${result.data.name} (${result.data.id})
|
|
|
104
108
|
}
|
|
105
109
|
} else {
|
|
106
110
|
console.error(`Error: ${result.error.message}`);
|
|
107
|
-
|
|
111
|
+
exitCode = 1;
|
|
108
112
|
}
|
|
109
113
|
} finally {
|
|
110
114
|
await destroyServices(services);
|
|
111
115
|
}
|
|
116
|
+
if (exitCode !== 0) {
|
|
117
|
+
process.exit(exitCode);
|
|
118
|
+
}
|
|
112
119
|
});
|
|
113
120
|
store.command("info <store>").description("Show store details: ID, type, path/URL, timestamps").action(async (storeIdOrName) => {
|
|
114
121
|
const globalOpts = getOptions();
|
|
@@ -467,10 +474,33 @@ function createCrawlCommand(getOptions) {
|
|
|
467
474
|
return new Command6("crawl").description("Crawl web pages with natural language control and index into store").argument("<url>", "URL to crawl").argument("<store>", "Target web store to add crawled content to").option("--crawl <instruction>", 'Natural language instruction for what to crawl (e.g., "all Getting Started pages")').option("--extract <instruction>", 'Natural language instruction for what to extract (e.g., "extract API references")').option("--simple", "Use simple BFS mode instead of intelligent crawling").option("--max-pages <number>", "Maximum number of pages to crawl", "50").option("--headless", "Use headless browser for JavaScript-rendered sites").action(async (url, storeIdOrName, cmdOptions) => {
|
|
468
475
|
const globalOpts = getOptions();
|
|
469
476
|
const services = await createServices(globalOpts.config, globalOpts.dataDir);
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
477
|
+
let store;
|
|
478
|
+
let storeCreated = false;
|
|
479
|
+
const existingStore = await services.store.getByIdOrName(storeIdOrName);
|
|
480
|
+
if (!existingStore) {
|
|
481
|
+
const result = await services.store.create({
|
|
482
|
+
name: storeIdOrName,
|
|
483
|
+
type: "web",
|
|
484
|
+
url
|
|
485
|
+
});
|
|
486
|
+
if (!result.success) {
|
|
487
|
+
await destroyServices(services);
|
|
488
|
+
throw new Error(`Failed to create store: ${result.error.message}`);
|
|
489
|
+
}
|
|
490
|
+
const createdStore = result.data;
|
|
491
|
+
if (createdStore.type !== "web") {
|
|
492
|
+
throw new Error("Unexpected store type after creation");
|
|
493
|
+
}
|
|
494
|
+
store = createdStore;
|
|
495
|
+
storeCreated = true;
|
|
496
|
+
if (globalOpts.quiet !== true && globalOpts.format !== "json") {
|
|
497
|
+
console.log(`Created web store: ${store.name}`);
|
|
498
|
+
}
|
|
499
|
+
} else if (existingStore.type !== "web") {
|
|
500
|
+
await destroyServices(services);
|
|
501
|
+
throw new Error(`Store "${storeIdOrName}" exists but is not a web store (type: ${existingStore.type})`);
|
|
502
|
+
} else {
|
|
503
|
+
store = existingStore;
|
|
474
504
|
}
|
|
475
505
|
const maxPages = cmdOptions.maxPages !== void 0 ? parseInt(cmdOptions.maxPages) : 50;
|
|
476
506
|
const isInteractive = process.stdout.isTTY && globalOpts.quiet !== true && globalOpts.format !== "json";
|
|
@@ -482,7 +512,9 @@ function createCrawlCommand(getOptions) {
|
|
|
482
512
|
console.log(`Crawling ${url}`);
|
|
483
513
|
}
|
|
484
514
|
const crawler = new IntelligentCrawler();
|
|
515
|
+
const webChunker = ChunkingService.forContentType("web");
|
|
485
516
|
let pagesIndexed = 0;
|
|
517
|
+
let chunksCreated = 0;
|
|
486
518
|
crawler.on("progress", (progress) => {
|
|
487
519
|
if (spinner) {
|
|
488
520
|
if (progress.type === "strategy") {
|
|
@@ -508,22 +540,33 @@ function createCrawlCommand(getOptions) {
|
|
|
508
540
|
...cmdOptions.simple !== void 0 && { simple: cmdOptions.simple },
|
|
509
541
|
useHeadless: cmdOptions.headless ?? false
|
|
510
542
|
})) {
|
|
511
|
-
const
|
|
512
|
-
const
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
543
|
+
const contentToProcess = result.extracted !== void 0 ? result.extracted : result.markdown;
|
|
544
|
+
const chunks = webChunker.chunk(contentToProcess, `${result.url}.md`);
|
|
545
|
+
const fileType = classifyWebContentType(result.url, result.title);
|
|
546
|
+
const urlHash = createHash("md5").update(result.url).digest("hex");
|
|
547
|
+
for (const chunk of chunks) {
|
|
548
|
+
const chunkId = chunks.length > 1 ? `${store.id}-${urlHash}-${String(chunk.chunkIndex)}` : `${store.id}-${urlHash}`;
|
|
549
|
+
const vector = await services.embeddings.embed(chunk.content);
|
|
550
|
+
docs.push({
|
|
551
|
+
id: createDocumentId(chunkId),
|
|
552
|
+
content: chunk.content,
|
|
553
|
+
vector,
|
|
554
|
+
metadata: {
|
|
555
|
+
type: chunks.length > 1 ? "chunk" : "web",
|
|
556
|
+
storeId: store.id,
|
|
557
|
+
url: result.url,
|
|
558
|
+
title: result.title,
|
|
559
|
+
extracted: result.extracted !== void 0,
|
|
560
|
+
depth: result.depth,
|
|
561
|
+
indexedAt: /* @__PURE__ */ new Date(),
|
|
562
|
+
fileType,
|
|
563
|
+
chunkIndex: chunk.chunkIndex,
|
|
564
|
+
totalChunks: chunk.totalChunks,
|
|
565
|
+
sectionHeader: chunk.sectionHeader
|
|
566
|
+
}
|
|
567
|
+
});
|
|
568
|
+
chunksCreated++;
|
|
569
|
+
}
|
|
527
570
|
pagesIndexed++;
|
|
528
571
|
}
|
|
529
572
|
if (docs.length > 0) {
|
|
@@ -535,8 +578,10 @@ function createCrawlCommand(getOptions) {
|
|
|
535
578
|
const crawlResult = {
|
|
536
579
|
success: true,
|
|
537
580
|
store: store.name,
|
|
581
|
+
storeCreated,
|
|
538
582
|
url,
|
|
539
583
|
pagesCrawled: pagesIndexed,
|
|
584
|
+
chunksCreated,
|
|
540
585
|
mode: cmdOptions.simple === true ? "simple" : "intelligent",
|
|
541
586
|
hadCrawlInstruction: cmdOptions.crawl !== void 0,
|
|
542
587
|
hadExtractInstruction: cmdOptions.extract !== void 0
|
|
@@ -544,9 +589,9 @@ function createCrawlCommand(getOptions) {
|
|
|
544
589
|
if (globalOpts.format === "json") {
|
|
545
590
|
console.log(JSON.stringify(crawlResult, null, 2));
|
|
546
591
|
} else if (spinner !== void 0) {
|
|
547
|
-
spinner.succeed(`Crawled
|
|
592
|
+
spinner.succeed(`Crawled ${String(pagesIndexed)} pages, indexed ${String(chunksCreated)} chunks`);
|
|
548
593
|
} else if (globalOpts.quiet !== true) {
|
|
549
|
-
console.log(`Crawled
|
|
594
|
+
console.log(`Crawled ${String(pagesIndexed)} pages, indexed ${String(chunksCreated)} chunks`);
|
|
550
595
|
}
|
|
551
596
|
} catch (error) {
|
|
552
597
|
const message = `Crawl failed: ${error instanceof Error ? error.message : String(error)}`;
|