bluera-knowledge 0.9.26 → 0.9.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/.claude/commands/commit.md +4 -7
  2. package/.claude/hooks/post-edit-check.sh +21 -24
  3. package/.claude/skills/atomic-commits/SKILL.md +6 -0
  4. package/.claude-plugin/plugin.json +1 -1
  5. package/.env.example +4 -0
  6. package/.husky/pre-push +12 -2
  7. package/.versionrc.json +0 -4
  8. package/BUGS-FOUND.md +71 -0
  9. package/CHANGELOG.md +76 -0
  10. package/README.md +55 -20
  11. package/bun.lock +35 -1
  12. package/commands/crawl.md +2 -0
  13. package/dist/{chunk-BICFAWMN.js → chunk-2SJHNRXD.js} +73 -8
  14. package/dist/chunk-2SJHNRXD.js.map +1 -0
  15. package/dist/{chunk-J7J6LXOJ.js → chunk-OGEY66FZ.js} +106 -41
  16. package/dist/chunk-OGEY66FZ.js.map +1 -0
  17. package/dist/{chunk-5QMHZUC4.js → chunk-RWSXP3PQ.js} +482 -106
  18. package/dist/chunk-RWSXP3PQ.js.map +1 -0
  19. package/dist/index.js +73 -28
  20. package/dist/index.js.map +1 -1
  21. package/dist/mcp/server.js +2 -2
  22. package/dist/workers/background-worker-cli.js +2 -2
  23. package/eslint.config.js +1 -1
  24. package/package.json +3 -1
  25. package/src/analysis/ast-parser.test.ts +46 -0
  26. package/src/cli/commands/crawl.test.ts +99 -12
  27. package/src/cli/commands/crawl.ts +76 -24
  28. package/src/cli/commands/store.test.ts +68 -1
  29. package/src/cli/commands/store.ts +9 -3
  30. package/src/crawl/article-converter.ts +36 -1
  31. package/src/crawl/bridge.ts +18 -7
  32. package/src/crawl/intelligent-crawler.ts +45 -4
  33. package/src/db/embeddings.test.ts +16 -0
  34. package/src/db/lance.test.ts +31 -0
  35. package/src/db/lance.ts +8 -0
  36. package/src/logging/index.ts +29 -0
  37. package/src/logging/logger.test.ts +75 -0
  38. package/src/logging/logger.ts +147 -0
  39. package/src/logging/payload.test.ts +152 -0
  40. package/src/logging/payload.ts +121 -0
  41. package/src/mcp/handlers/search.handler.test.ts +28 -9
  42. package/src/mcp/handlers/search.handler.ts +69 -29
  43. package/src/mcp/handlers/store.handler.test.ts +1 -0
  44. package/src/mcp/server.ts +44 -16
  45. package/src/services/chunking.service.ts +23 -0
  46. package/src/services/index.service.test.ts +921 -1
  47. package/src/services/index.service.ts +76 -1
  48. package/src/services/index.ts +20 -2
  49. package/src/services/search.service.test.ts +573 -21
  50. package/src/services/search.service.ts +257 -105
  51. package/src/services/services.test.ts +2 -2
  52. package/src/services/snippet.service.ts +28 -3
  53. package/src/services/store.service.test.ts +28 -0
  54. package/src/services/store.service.ts +4 -0
  55. package/src/services/token.service.test.ts +45 -0
  56. package/src/services/token.service.ts +33 -0
  57. package/src/types/result.test.ts +10 -0
  58. package/tests/integration/cli-consistency.test.ts +1 -4
  59. package/vitest.config.ts +4 -0
  60. package/dist/chunk-5QMHZUC4.js.map +0 -1
  61. package/dist/chunk-BICFAWMN.js.map +0 -1
  62. package/dist/chunk-J7J6LXOJ.js.map +0 -1
  63. package/scripts/readme-version-updater.cjs +0 -18
package/dist/index.js CHANGED
@@ -1,12 +1,14 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  runMCPServer
4
- } from "./chunk-J7J6LXOJ.js";
4
+ } from "./chunk-OGEY66FZ.js";
5
5
  import {
6
6
  IntelligentCrawler
7
- } from "./chunk-BICFAWMN.js";
7
+ } from "./chunk-2SJHNRXD.js";
8
8
  import {
9
9
  ASTParser,
10
+ ChunkingService,
11
+ classifyWebContentType,
10
12
  createDocumentId,
11
13
  createServices,
12
14
  createStoreId,
@@ -14,7 +16,7 @@ import {
14
16
  err,
15
17
  extractRepoName,
16
18
  ok
17
- } from "./chunk-5QMHZUC4.js";
19
+ } from "./chunk-RWSXP3PQ.js";
18
20
  import "./chunk-L2YVNC63.js";
19
21
 
20
22
  // src/index.ts
@@ -85,12 +87,14 @@ function createStoreCommand(getOptions) {
85
87
  store.command("create <name>").description("Create a new store pointing to a local path or URL").requiredOption("-t, --type <type>", "Store type: file (local dir), repo (git), web (crawled site)").requiredOption("-s, --source <path>", "Local path for file/repo stores, URL for web stores").option("-d, --description <desc>", "Optional description for the store").option("--tags <tags>", "Comma-separated tags for filtering").action(async (name, options) => {
86
88
  const globalOpts = getOptions();
87
89
  const services = await createServices(globalOpts.config, globalOpts.dataDir);
90
+ let exitCode = 0;
88
91
  try {
92
+ const isUrl = options.source.startsWith("http://") || options.source.startsWith("https://");
89
93
  const result = await services.store.create({
90
94
  name,
91
95
  type: options.type,
92
- path: options.type !== "web" ? options.source : void 0,
93
- url: options.type === "web" ? options.source : void 0,
96
+ path: options.type === "file" || options.type === "repo" && !isUrl ? options.source : void 0,
97
+ url: options.type === "web" || options.type === "repo" && isUrl ? options.source : void 0,
94
98
  description: options.description,
95
99
  tags: options.tags?.split(",").map((t) => t.trim())
96
100
  });
@@ -104,11 +108,14 @@ Created store: ${result.data.name} (${result.data.id})
104
108
  }
105
109
  } else {
106
110
  console.error(`Error: ${result.error.message}`);
107
- process.exit(1);
111
+ exitCode = 1;
108
112
  }
109
113
  } finally {
110
114
  await destroyServices(services);
111
115
  }
116
+ if (exitCode !== 0) {
117
+ process.exit(exitCode);
118
+ }
112
119
  });
113
120
  store.command("info <store>").description("Show store details: ID, type, path/URL, timestamps").action(async (storeIdOrName) => {
114
121
  const globalOpts = getOptions();
@@ -467,10 +474,33 @@ function createCrawlCommand(getOptions) {
467
474
  return new Command6("crawl").description("Crawl web pages with natural language control and index into store").argument("<url>", "URL to crawl").argument("<store>", "Target web store to add crawled content to").option("--crawl <instruction>", 'Natural language instruction for what to crawl (e.g., "all Getting Started pages")').option("--extract <instruction>", 'Natural language instruction for what to extract (e.g., "extract API references")').option("--simple", "Use simple BFS mode instead of intelligent crawling").option("--max-pages <number>", "Maximum number of pages to crawl", "50").option("--headless", "Use headless browser for JavaScript-rendered sites").action(async (url, storeIdOrName, cmdOptions) => {
468
475
  const globalOpts = getOptions();
469
476
  const services = await createServices(globalOpts.config, globalOpts.dataDir);
470
- const store = await services.store.getByIdOrName(storeIdOrName);
471
- if (!store || store.type !== "web") {
472
- console.error(`Error: Web store not found: ${storeIdOrName}`);
473
- process.exit(3);
477
+ let store;
478
+ let storeCreated = false;
479
+ const existingStore = await services.store.getByIdOrName(storeIdOrName);
480
+ if (!existingStore) {
481
+ const result = await services.store.create({
482
+ name: storeIdOrName,
483
+ type: "web",
484
+ url
485
+ });
486
+ if (!result.success) {
487
+ await destroyServices(services);
488
+ throw new Error(`Failed to create store: ${result.error.message}`);
489
+ }
490
+ const createdStore = result.data;
491
+ if (createdStore.type !== "web") {
492
+ throw new Error("Unexpected store type after creation");
493
+ }
494
+ store = createdStore;
495
+ storeCreated = true;
496
+ if (globalOpts.quiet !== true && globalOpts.format !== "json") {
497
+ console.log(`Created web store: ${store.name}`);
498
+ }
499
+ } else if (existingStore.type !== "web") {
500
+ await destroyServices(services);
501
+ throw new Error(`Store "${storeIdOrName}" exists but is not a web store (type: ${existingStore.type})`);
502
+ } else {
503
+ store = existingStore;
474
504
  }
475
505
  const maxPages = cmdOptions.maxPages !== void 0 ? parseInt(cmdOptions.maxPages) : 50;
476
506
  const isInteractive = process.stdout.isTTY && globalOpts.quiet !== true && globalOpts.format !== "json";
@@ -482,7 +512,9 @@ function createCrawlCommand(getOptions) {
482
512
  console.log(`Crawling ${url}`);
483
513
  }
484
514
  const crawler = new IntelligentCrawler();
515
+ const webChunker = ChunkingService.forContentType("web");
485
516
  let pagesIndexed = 0;
517
+ let chunksCreated = 0;
486
518
  crawler.on("progress", (progress) => {
487
519
  if (spinner) {
488
520
  if (progress.type === "strategy") {
@@ -508,22 +540,33 @@ function createCrawlCommand(getOptions) {
508
540
  ...cmdOptions.simple !== void 0 && { simple: cmdOptions.simple },
509
541
  useHeadless: cmdOptions.headless ?? false
510
542
  })) {
511
- const contentToEmbed = result.extracted !== void 0 ? result.extracted : result.markdown;
512
- const vector = await services.embeddings.embed(contentToEmbed);
513
- docs.push({
514
- id: createDocumentId(`${store.id}-${createHash("md5").update(result.url).digest("hex")}`),
515
- content: contentToEmbed,
516
- vector,
517
- metadata: {
518
- type: "web",
519
- storeId: store.id,
520
- url: result.url,
521
- title: result.title,
522
- extracted: result.extracted !== void 0,
523
- depth: result.depth,
524
- indexedAt: /* @__PURE__ */ new Date()
525
- }
526
- });
543
+ const contentToProcess = result.extracted !== void 0 ? result.extracted : result.markdown;
544
+ const chunks = webChunker.chunk(contentToProcess, `${result.url}.md`);
545
+ const fileType = classifyWebContentType(result.url, result.title);
546
+ const urlHash = createHash("md5").update(result.url).digest("hex");
547
+ for (const chunk of chunks) {
548
+ const chunkId = chunks.length > 1 ? `${store.id}-${urlHash}-${String(chunk.chunkIndex)}` : `${store.id}-${urlHash}`;
549
+ const vector = await services.embeddings.embed(chunk.content);
550
+ docs.push({
551
+ id: createDocumentId(chunkId),
552
+ content: chunk.content,
553
+ vector,
554
+ metadata: {
555
+ type: chunks.length > 1 ? "chunk" : "web",
556
+ storeId: store.id,
557
+ url: result.url,
558
+ title: result.title,
559
+ extracted: result.extracted !== void 0,
560
+ depth: result.depth,
561
+ indexedAt: /* @__PURE__ */ new Date(),
562
+ fileType,
563
+ chunkIndex: chunk.chunkIndex,
564
+ totalChunks: chunk.totalChunks,
565
+ sectionHeader: chunk.sectionHeader
566
+ }
567
+ });
568
+ chunksCreated++;
569
+ }
527
570
  pagesIndexed++;
528
571
  }
529
572
  if (docs.length > 0) {
@@ -535,8 +578,10 @@ function createCrawlCommand(getOptions) {
535
578
  const crawlResult = {
536
579
  success: true,
537
580
  store: store.name,
581
+ storeCreated,
538
582
  url,
539
583
  pagesCrawled: pagesIndexed,
584
+ chunksCreated,
540
585
  mode: cmdOptions.simple === true ? "simple" : "intelligent",
541
586
  hadCrawlInstruction: cmdOptions.crawl !== void 0,
542
587
  hadExtractInstruction: cmdOptions.extract !== void 0
@@ -544,9 +589,9 @@ function createCrawlCommand(getOptions) {
544
589
  if (globalOpts.format === "json") {
545
590
  console.log(JSON.stringify(crawlResult, null, 2));
546
591
  } else if (spinner !== void 0) {
547
- spinner.succeed(`Crawled and indexed ${String(pagesIndexed)} pages`);
592
+ spinner.succeed(`Crawled ${String(pagesIndexed)} pages, indexed ${String(chunksCreated)} chunks`);
548
593
  } else if (globalOpts.quiet !== true) {
549
- console.log(`Crawled and indexed ${String(pagesIndexed)} pages`);
594
+ console.log(`Crawled ${String(pagesIndexed)} pages, indexed ${String(chunksCreated)} chunks`);
550
595
  }
551
596
  } catch (error) {
552
597
  const message = `Crawl failed: ${error instanceof Error ? error.message : String(error)}`;