bluera-knowledge 0.33.1 → 0.33.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-KDZDLJUY.js → chunk-OMXQBWCR.js} +21 -139
- package/dist/chunk-OMXQBWCR.js.map +1 -0
- package/dist/{chunk-YDTTD53Y.js → chunk-OPLZTNKK.js} +2 -2
- package/dist/{chunk-3TB7TDVF.js → chunk-SROFPHRA.js} +1 -2
- package/dist/chunk-SROFPHRA.js.map +1 -0
- package/dist/index.js +18 -23
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +2 -2
- package/dist/workers/background-worker-cli.js +5 -8
- package/dist/workers/background-worker-cli.js.map +1 -1
- package/package.json +1 -1
- package/scripts/validate-local.sh +0 -16
- package/skills/crawl/SKILL.md +1 -7
- package/dist/chunk-3TB7TDVF.js.map +0 -1
- package/dist/chunk-KDZDLJUY.js.map +0 -1
- /package/dist/{chunk-YDTTD53Y.js.map → chunk-OPLZTNKK.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -3,11 +3,11 @@ import {
|
|
|
3
3
|
ZilAdapter,
|
|
4
4
|
runMCPServer,
|
|
5
5
|
spawnBackgroundWorker
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-OPLZTNKK.js";
|
|
7
7
|
import {
|
|
8
8
|
IntelligentCrawler,
|
|
9
9
|
getCrawlStrategy
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-OMXQBWCR.js";
|
|
11
11
|
import {
|
|
12
12
|
ASTParser,
|
|
13
13
|
AdapterRegistry,
|
|
@@ -25,7 +25,7 @@ import {
|
|
|
25
25
|
isRepoStoreDefinition,
|
|
26
26
|
isWebStoreDefinition,
|
|
27
27
|
ok
|
|
28
|
-
} from "./chunk-
|
|
28
|
+
} from "./chunk-SROFPHRA.js";
|
|
29
29
|
import {
|
|
30
30
|
createDocumentId
|
|
31
31
|
} from "./chunk-CLIMKLTW.js";
|
|
@@ -46,7 +46,7 @@ function createCrawlCommand(getOptions) {
|
|
|
46
46
|
).option(
|
|
47
47
|
"--extract <instruction>",
|
|
48
48
|
'Natural language instruction for what to extract (e.g., "extract API references")'
|
|
49
|
-
).option("--
|
|
49
|
+
).option("--max-pages <number>", "Maximum number of pages to crawl", "50").option("--fast", "Use fast axios-only mode (may fail on JavaScript-heavy sites)").allowUnknownOption().action(
|
|
50
50
|
async (url, storeIdOrName, cmdOptions) => {
|
|
51
51
|
const storeArg = process.argv.find((arg) => arg.startsWith("--store"));
|
|
52
52
|
if (storeArg !== void 0) {
|
|
@@ -65,19 +65,16 @@ Example: bluera-knowledge crawl https://docs.example.com my-docs
|
|
|
65
65
|
}
|
|
66
66
|
const globalOpts = getOptions();
|
|
67
67
|
const useHeadless = !(cmdOptions.fast ?? false);
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
`Claude identified ${String(preComputedStrategy.urls.length)} URLs: ${preComputedStrategy.reasoning}`
|
|
79
|
-
);
|
|
80
|
-
}
|
|
68
|
+
const crawlInstruction = cmdOptions.crawl ?? "crawl all pages linked from this URL";
|
|
69
|
+
if (globalOpts.quiet !== true && globalOpts.format !== "json") {
|
|
70
|
+
console.log(`Crawling ${url}`);
|
|
71
|
+
console.log("Analyzing page structure with Claude...");
|
|
72
|
+
}
|
|
73
|
+
const preComputedStrategy = await getCrawlStrategy(url, crawlInstruction, useHeadless);
|
|
74
|
+
if (globalOpts.quiet !== true && globalOpts.format !== "json") {
|
|
75
|
+
console.log(
|
|
76
|
+
`Claude identified ${String(preComputedStrategy.urls.length)} URLs: ${preComputedStrategy.reasoning}`
|
|
77
|
+
);
|
|
81
78
|
}
|
|
82
79
|
const services = await createServices(
|
|
83
80
|
globalOpts.config,
|
|
@@ -123,8 +120,7 @@ Example: bluera-knowledge crawl https://docs.example.com my-docs
|
|
|
123
120
|
const isInteractive = process.stdout.isTTY && globalOpts.quiet !== true && globalOpts.format !== "json";
|
|
124
121
|
let spinner;
|
|
125
122
|
if (isInteractive) {
|
|
126
|
-
|
|
127
|
-
spinner = ora(`Crawling ${url} (${mode} mode)`).start();
|
|
123
|
+
spinner = ora(`Crawling ${url} (intelligent mode)`).start();
|
|
128
124
|
} else if (globalOpts.quiet !== true && globalOpts.format !== "json") {
|
|
129
125
|
console.log(`Crawling ${url}`);
|
|
130
126
|
}
|
|
@@ -154,12 +150,11 @@ Example: bluera-knowledge crawl https://docs.example.com my-docs
|
|
|
154
150
|
await services.lance.initialize(store.id);
|
|
155
151
|
const docs = [];
|
|
156
152
|
for await (const result of crawler.crawl(url, {
|
|
157
|
-
|
|
153
|
+
crawlInstruction,
|
|
158
154
|
...cmdOptions.extract !== void 0 && { extractInstruction: cmdOptions.extract },
|
|
159
155
|
maxPages,
|
|
160
|
-
...cmdOptions.simple !== void 0 && { simple: cmdOptions.simple },
|
|
161
156
|
useHeadless,
|
|
162
|
-
|
|
157
|
+
preComputedStrategy
|
|
163
158
|
})) {
|
|
164
159
|
const contentToProcess = result.extracted ?? result.markdown;
|
|
165
160
|
const chunks = webChunker.chunk(contentToProcess, `${result.url}.md`);
|
|
@@ -205,7 +200,7 @@ Example: bluera-knowledge crawl https://docs.example.com my-docs
|
|
|
205
200
|
url,
|
|
206
201
|
pagesCrawled: pagesIndexed,
|
|
207
202
|
chunksCreated,
|
|
208
|
-
mode:
|
|
203
|
+
mode: "intelligent",
|
|
209
204
|
hadCrawlInstruction: cmdOptions.crawl !== void 0,
|
|
210
205
|
hadExtractInstruction: cmdOptions.extract !== void 0
|
|
211
206
|
};
|