bluera-knowledge 0.33.1 → 0.33.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3,11 +3,11 @@ import {
3
3
  ZilAdapter,
4
4
  runMCPServer,
5
5
  spawnBackgroundWorker
6
- } from "./chunk-YDTTD53Y.js";
6
+ } from "./chunk-OPLZTNKK.js";
7
7
  import {
8
8
  IntelligentCrawler,
9
9
  getCrawlStrategy
10
- } from "./chunk-KDZDLJUY.js";
10
+ } from "./chunk-OMXQBWCR.js";
11
11
  import {
12
12
  ASTParser,
13
13
  AdapterRegistry,
@@ -25,7 +25,7 @@ import {
25
25
  isRepoStoreDefinition,
26
26
  isWebStoreDefinition,
27
27
  ok
28
- } from "./chunk-3TB7TDVF.js";
28
+ } from "./chunk-SROFPHRA.js";
29
29
  import {
30
30
  createDocumentId
31
31
  } from "./chunk-CLIMKLTW.js";
@@ -46,7 +46,7 @@ function createCrawlCommand(getOptions) {
46
46
  ).option(
47
47
  "--extract <instruction>",
48
48
  'Natural language instruction for what to extract (e.g., "extract API references")'
49
- ).option("--simple", "Use simple BFS mode instead of intelligent crawling").option("--max-pages <number>", "Maximum number of pages to crawl", "50").option("--fast", "Use fast axios-only mode (may fail on JavaScript-heavy sites)").allowUnknownOption().action(
49
+ ).option("--max-pages <number>", "Maximum number of pages to crawl", "50").option("--fast", "Use fast axios-only mode (may fail on JavaScript-heavy sites)").allowUnknownOption().action(
50
50
  async (url, storeIdOrName, cmdOptions) => {
51
51
  const storeArg = process.argv.find((arg) => arg.startsWith("--store"));
52
52
  if (storeArg !== void 0) {
@@ -65,19 +65,16 @@ Example: bluera-knowledge crawl https://docs.example.com my-docs
65
65
  }
66
66
  const globalOpts = getOptions();
67
67
  const useHeadless = !(cmdOptions.fast ?? false);
68
- let preComputedStrategy;
69
- const useIntelligentMode = cmdOptions.simple !== true && cmdOptions.crawl !== void 0 && cmdOptions.crawl !== "";
70
- if (useIntelligentMode && cmdOptions.crawl !== void 0) {
71
- if (globalOpts.quiet !== true && globalOpts.format !== "json") {
72
- console.log(`Crawling ${url}`);
73
- console.log("Analyzing page structure with Claude...");
74
- }
75
- preComputedStrategy = await getCrawlStrategy(url, cmdOptions.crawl, useHeadless);
76
- if (globalOpts.quiet !== true && globalOpts.format !== "json") {
77
- console.log(
78
- `Claude identified ${String(preComputedStrategy.urls.length)} URLs: ${preComputedStrategy.reasoning}`
79
- );
80
- }
68
+ const crawlInstruction = cmdOptions.crawl ?? "crawl all pages linked from this URL";
69
+ if (globalOpts.quiet !== true && globalOpts.format !== "json") {
70
+ console.log(`Crawling ${url}`);
71
+ console.log("Analyzing page structure with Claude...");
72
+ }
73
+ const preComputedStrategy = await getCrawlStrategy(url, crawlInstruction, useHeadless);
74
+ if (globalOpts.quiet !== true && globalOpts.format !== "json") {
75
+ console.log(
76
+ `Claude identified ${String(preComputedStrategy.urls.length)} URLs: ${preComputedStrategy.reasoning}`
77
+ );
81
78
  }
82
79
  const services = await createServices(
83
80
  globalOpts.config,
@@ -123,8 +120,7 @@ Example: bluera-knowledge crawl https://docs.example.com my-docs
123
120
  const isInteractive = process.stdout.isTTY && globalOpts.quiet !== true && globalOpts.format !== "json";
124
121
  let spinner;
125
122
  if (isInteractive) {
126
- const mode = cmdOptions.simple === true ? "simple" : "intelligent";
127
- spinner = ora(`Crawling ${url} (${mode} mode)`).start();
123
+ spinner = ora(`Crawling ${url} (intelligent mode)`).start();
128
124
  } else if (globalOpts.quiet !== true && globalOpts.format !== "json") {
129
125
  console.log(`Crawling ${url}`);
130
126
  }
@@ -154,12 +150,11 @@ Example: bluera-knowledge crawl https://docs.example.com my-docs
154
150
  await services.lance.initialize(store.id);
155
151
  const docs = [];
156
152
  for await (const result of crawler.crawl(url, {
157
- ...cmdOptions.crawl !== void 0 && { crawlInstruction: cmdOptions.crawl },
153
+ crawlInstruction,
158
154
  ...cmdOptions.extract !== void 0 && { extractInstruction: cmdOptions.extract },
159
155
  maxPages,
160
- ...cmdOptions.simple !== void 0 && { simple: cmdOptions.simple },
161
156
  useHeadless,
162
- ...preComputedStrategy !== void 0 && { preComputedStrategy }
157
+ preComputedStrategy
163
158
  })) {
164
159
  const contentToProcess = result.extracted ?? result.markdown;
165
160
  const chunks = webChunker.chunk(contentToProcess, `${result.url}.md`);
@@ -205,7 +200,7 @@ Example: bluera-knowledge crawl https://docs.example.com my-docs
205
200
  url,
206
201
  pagesCrawled: pagesIndexed,
207
202
  chunksCreated,
208
- mode: cmdOptions.simple === true ? "simple" : "intelligent",
203
+ mode: "intelligent",
209
204
  hadCrawlInstruction: cmdOptions.crawl !== void 0,
210
205
  hadExtractInstruction: cmdOptions.extract !== void 0
211
206
  };