@botpress/runtime 1.10.4 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/constants.d.ts +9 -10
- package/dist/constants.d.ts.map +1 -1
- package/dist/definition.js +336 -239
- package/dist/definition.js.map +4 -4
- package/dist/internal.js +166 -53
- package/dist/internal.js.map +4 -4
- package/dist/library.js +166 -53
- package/dist/library.js.map +4 -4
- package/dist/primitives/data-sources/source-base.d.ts +12 -6
- package/dist/primitives/data-sources/source-base.d.ts.map +1 -1
- package/dist/primitives/data-sources/source-directory.d.ts +6 -3
- package/dist/primitives/data-sources/source-directory.d.ts.map +1 -1
- package/dist/primitives/data-sources/source-table.d.ts +6 -3
- package/dist/primitives/data-sources/source-table.d.ts.map +1 -1
- package/dist/primitives/data-sources/source-website.d.ts +10 -4
- package/dist/primitives/data-sources/source-website.d.ts.map +1 -1
- package/dist/primitives/index.d.ts +6 -3
- package/dist/primitives/index.d.ts.map +1 -1
- package/dist/primitives/knowledge.d.ts +6 -2
- package/dist/primitives/knowledge.d.ts.map +1 -1
- package/dist/runtime/autonomous.d.ts.map +1 -1
- package/dist/runtime/tracked-state.d.ts +2 -0
- package/dist/runtime/tracked-state.d.ts.map +1 -1
- package/dist/runtime/workflows/knowledge-indexing.d.ts +3 -0
- package/dist/runtime/workflows/knowledge-indexing.d.ts.map +1 -1
- package/dist/runtime.js +144 -47
- package/dist/runtime.js.map +4 -4
- package/package.json +1 -1
package/dist/internal.js
CHANGED
|
@@ -48,7 +48,7 @@ var init_define_BUILD = __esm({
|
|
|
48
48
|
var define_PACKAGE_VERSIONS_default;
|
|
49
49
|
var init_define_PACKAGE_VERSIONS = __esm({
|
|
50
50
|
"<define:__PACKAGE_VERSIONS__>"() {
|
|
51
|
-
define_PACKAGE_VERSIONS_default = { runtime: "1.
|
|
51
|
+
define_PACKAGE_VERSIONS_default = { runtime: "1.11.0", adk: "1.11.0", sdk: "4.20.2", llmz: "0.0.33", zai: "2.5.0", cognitive: "0.2.0" };
|
|
52
52
|
}
|
|
53
53
|
});
|
|
54
54
|
|
|
@@ -16702,9 +16702,9 @@ var require_combined_stream = __commonJS({
|
|
|
16702
16702
|
}
|
|
16703
16703
|
});
|
|
16704
16704
|
|
|
16705
|
-
// ../../node_modules/mime-db/db.json
|
|
16705
|
+
// ../../node_modules/form-data/node_modules/mime-types/node_modules/mime-db/db.json
|
|
16706
16706
|
var require_db = __commonJS({
|
|
16707
|
-
"../../node_modules/mime-db/db.json"(exports2, module) {
|
|
16707
|
+
"../../node_modules/form-data/node_modules/mime-types/node_modules/mime-db/db.json"(exports2, module) {
|
|
16708
16708
|
module.exports = {
|
|
16709
16709
|
"application/1d-interleaved-parityfec": {
|
|
16710
16710
|
source: "iana"
|
|
@@ -25227,18 +25227,18 @@ var require_db = __commonJS({
|
|
|
25227
25227
|
}
|
|
25228
25228
|
});
|
|
25229
25229
|
|
|
25230
|
-
// ../../node_modules/mime-db/index.js
|
|
25230
|
+
// ../../node_modules/form-data/node_modules/mime-types/node_modules/mime-db/index.js
|
|
25231
25231
|
var require_mime_db = __commonJS({
|
|
25232
|
-
"../../node_modules/mime-db/index.js"(exports2, module) {
|
|
25232
|
+
"../../node_modules/form-data/node_modules/mime-types/node_modules/mime-db/index.js"(exports2, module) {
|
|
25233
25233
|
init_define_BUILD();
|
|
25234
25234
|
init_define_PACKAGE_VERSIONS();
|
|
25235
25235
|
module.exports = require_db();
|
|
25236
25236
|
}
|
|
25237
25237
|
});
|
|
25238
25238
|
|
|
25239
|
-
// ../../node_modules/mime-types/index.js
|
|
25239
|
+
// ../../node_modules/form-data/node_modules/mime-types/index.js
|
|
25240
25240
|
var require_mime_types = __commonJS({
|
|
25241
|
-
"../../node_modules/mime-types/index.js"(exports2) {
|
|
25241
|
+
"../../node_modules/form-data/node_modules/mime-types/index.js"(exports2) {
|
|
25242
25242
|
"use strict";
|
|
25243
25243
|
init_define_BUILD();
|
|
25244
25244
|
init_define_PACKAGE_VERSIONS();
|
|
@@ -34134,15 +34134,14 @@ var init_constants = __esm({
|
|
|
34134
34134
|
WellKnownTags = {
|
|
34135
34135
|
knowledge: {
|
|
34136
34136
|
/**
|
|
34137
|
-
* All knowledge base have this tag (with value "
|
|
34138
|
-
* @example "
|
|
34137
|
+
* All knowledge base have this tag (with value "knowledge-base") to identify them as knowledge-related records.
|
|
34138
|
+
* @example "source": "knowledge-base"
|
|
34139
34139
|
*/
|
|
34140
|
-
KNOWLEDGE: "
|
|
34140
|
+
KNOWLEDGE: "source",
|
|
34141
34141
|
/**
|
|
34142
34142
|
* The ID of the knowledge base the record belongs to.
|
|
34143
|
-
* This is the ID of the Knowledge Base primitive
|
|
34143
|
+
* This is the ID of the Knowledge Base primitive from Botpress.
|
|
34144
34144
|
* @example "kbId": "kb_01K6RT9T39KF7K0A7R7D71TDZ1"
|
|
34145
|
-
* @deprecated Use KNOWLEDGE_BASE_NAME for now, as we will be moving to IDs later.
|
|
34146
34145
|
*/
|
|
34147
34146
|
KNOWLEDGE_BASE_ID: "kbId",
|
|
34148
34147
|
/**
|
|
@@ -34152,15 +34151,15 @@ var init_constants = __esm({
|
|
|
34152
34151
|
KNOWLEDGE_BASE_NAME: "kbName",
|
|
34153
34152
|
/**
|
|
34154
34153
|
* The ID of the Data Source the record was ingested from.
|
|
34155
|
-
* @example "
|
|
34154
|
+
* @example "dsId": "docs"
|
|
34156
34155
|
*/
|
|
34157
|
-
KNOWLEDGE_SOURCE_ID: "
|
|
34156
|
+
KNOWLEDGE_SOURCE_ID: "dsId",
|
|
34158
34157
|
/**
|
|
34159
34158
|
* The type of the Data Source the record was ingested from.
|
|
34160
|
-
* Possible values are: "
|
|
34161
|
-
* @example "
|
|
34159
|
+
* Possible values are: "document", "rich-text", "web-page", etc.
|
|
34160
|
+
* @example "dsType": "document"
|
|
34162
34161
|
*/
|
|
34163
|
-
KNOWLEDGE_SOURCE_TYPE: "
|
|
34162
|
+
KNOWLEDGE_SOURCE_TYPE: "dsType"
|
|
34164
34163
|
}
|
|
34165
34164
|
};
|
|
34166
34165
|
WellKnownMetadata = {
|
|
@@ -34257,7 +34256,7 @@ If the question is not related to the knowledge bases, do NOT use this tool.`.tr
|
|
|
34257
34256
|
includeBreadcrumb: true,
|
|
34258
34257
|
contextDepth: 4,
|
|
34259
34258
|
tags: {
|
|
34260
|
-
[WellKnownTags.knowledge.KNOWLEDGE]: "
|
|
34259
|
+
[WellKnownTags.knowledge.KNOWLEDGE]: "knowledge-base",
|
|
34261
34260
|
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: kbNames
|
|
34262
34261
|
}
|
|
34263
34262
|
});
|
|
@@ -34292,11 +34291,14 @@ If the question is not related to the knowledge bases, do NOT use this tool.`.tr
|
|
|
34292
34291
|
if (tags[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]) {
|
|
34293
34292
|
citationMetadata.knowledgeBase = tags[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME];
|
|
34294
34293
|
}
|
|
34294
|
+
if (tags[WellKnownTags.knowledge.KNOWLEDGE_BASE_ID]) {
|
|
34295
|
+
citationMetadata.knowledgeBaseId = tags[WellKnownTags.knowledge.KNOWLEDGE_BASE_ID];
|
|
34296
|
+
}
|
|
34295
34297
|
if (tags[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]) {
|
|
34296
|
-
citationMetadata.
|
|
34298
|
+
citationMetadata.dsType = tags[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE];
|
|
34297
34299
|
}
|
|
34298
34300
|
if (tags[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_ID]) {
|
|
34299
|
-
citationMetadata.
|
|
34301
|
+
citationMetadata.dsId = tags[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_ID];
|
|
34300
34302
|
}
|
|
34301
34303
|
for (const key in citationMetadata) {
|
|
34302
34304
|
let value = citationMetadata[key];
|
|
@@ -35300,8 +35302,9 @@ var init_source_base = __esm({
|
|
|
35300
35302
|
size: z7.number()
|
|
35301
35303
|
});
|
|
35302
35304
|
SyncInput = z7.object({
|
|
35303
|
-
|
|
35305
|
+
dsId: z7.string(),
|
|
35304
35306
|
kbName: z7.string(),
|
|
35307
|
+
kbId: z7.string(),
|
|
35305
35308
|
force: z7.boolean().optional().describe("Force re-indexing even if files haven't changed").default(false)
|
|
35306
35309
|
});
|
|
35307
35310
|
SyncOutput = z7.object({
|
|
@@ -35318,18 +35321,16 @@ var init_source_base = __esm({
|
|
|
35318
35321
|
state: props.state,
|
|
35319
35322
|
timeout: "120m",
|
|
35320
35323
|
async handler(execProps) {
|
|
35321
|
-
const { kbName,
|
|
35324
|
+
const { kbName, kbId, dsId } = execProps.input;
|
|
35322
35325
|
const kb = adk.project.knowledge.find((kb2) => kb2.name === kbName);
|
|
35323
35326
|
if (!kb) {
|
|
35324
35327
|
throw new Error(`Knowledge base '${kbName}' not found`);
|
|
35325
35328
|
}
|
|
35326
|
-
const source = kb.sources.find((s) => s.id ===
|
|
35329
|
+
const source = kb.sources.find((s) => s.id === dsId);
|
|
35327
35330
|
if (!source) {
|
|
35328
|
-
throw new Error(`Data source with ID '${
|
|
35331
|
+
throw new Error(`Data source with ID '${dsId}' not found in knowledge base '${kbName}'`);
|
|
35329
35332
|
}
|
|
35330
|
-
console.log(
|
|
35331
|
-
`\u{1F504} Starting sync for data source '${sourceId}' of type '${source.type}' in knowledge base '${kbName}'`
|
|
35332
|
-
);
|
|
35333
|
+
console.log(`\u{1F504} Starting sync for data source '${dsId}' of type '${source.type}' in knowledge base '${kbName}' (${kbId})`);
|
|
35333
35334
|
return await props.handler.bind(source)(execProps);
|
|
35334
35335
|
}
|
|
35335
35336
|
});
|
|
@@ -35360,12 +35361,13 @@ var init_knowledge_indexing = __esm({
|
|
|
35360
35361
|
description: "Built-in workflow to re-index all data sources in a knowledge base",
|
|
35361
35362
|
input: z8.object({
|
|
35362
35363
|
kbName: z8.string(),
|
|
35364
|
+
kbId: z8.string(),
|
|
35363
35365
|
force: z8.boolean().optional().describe("Force re-indexing even if files haven't changed").default(false)
|
|
35364
35366
|
}),
|
|
35365
35367
|
timeout: "180m",
|
|
35366
35368
|
output: SyncOutput,
|
|
35367
35369
|
handler: async ({ input, step: step2 }) => {
|
|
35368
|
-
const { kbName } = input;
|
|
35370
|
+
const { kbName, kbId } = input;
|
|
35369
35371
|
const kb = adk.project.knowledge.find((x) => x.name === kbName);
|
|
35370
35372
|
if (!kb) {
|
|
35371
35373
|
throw new Error(`Knowledge base '${kbName}' not found`);
|
|
@@ -35381,7 +35383,8 @@ var init_knowledge_indexing = __esm({
|
|
|
35381
35383
|
statuses: ["in_progress", "listening", "pending", "paused"],
|
|
35382
35384
|
input: {
|
|
35383
35385
|
kbName,
|
|
35384
|
-
|
|
35386
|
+
kbId,
|
|
35387
|
+
dsId: source.id,
|
|
35385
35388
|
force: input.force || false
|
|
35386
35389
|
}
|
|
35387
35390
|
}).then((x) => x.id)
|
|
@@ -35779,7 +35782,9 @@ var init_tracked_state = __esm({
|
|
|
35779
35782
|
/** Workflow-specific state (persists across workflow executions) */
|
|
35780
35783
|
workflowState: "workflowState",
|
|
35781
35784
|
/** Workflow cached steps executions */
|
|
35782
|
-
workflowSteps: "workflowSteps"
|
|
35785
|
+
workflowSteps: "workflowSteps",
|
|
35786
|
+
/** Data source metadata for dashboard visibility */
|
|
35787
|
+
dsData: "dsData"
|
|
35783
35788
|
};
|
|
35784
35789
|
TrackedState = class _TrackedState {
|
|
35785
35790
|
type;
|
|
@@ -40268,6 +40273,7 @@ var init_source_website = __esm({
|
|
|
40268
40273
|
init_constants();
|
|
40269
40274
|
init_fxp();
|
|
40270
40275
|
init_library();
|
|
40276
|
+
init_context();
|
|
40271
40277
|
init_html_fetch();
|
|
40272
40278
|
State = z24.object({
|
|
40273
40279
|
urls: z24.array(
|
|
@@ -40284,6 +40290,7 @@ var init_source_website = __esm({
|
|
|
40284
40290
|
mode;
|
|
40285
40291
|
baseUrl;
|
|
40286
40292
|
sitemapUrl;
|
|
40293
|
+
llmsTxtUrl;
|
|
40287
40294
|
urls;
|
|
40288
40295
|
filterFn;
|
|
40289
40296
|
customFetch;
|
|
@@ -40292,10 +40299,11 @@ var init_source_website = __esm({
|
|
|
40292
40299
|
maxDepth;
|
|
40293
40300
|
transformFn;
|
|
40294
40301
|
constructor(id, mode, options) {
|
|
40295
|
-
super(id, "
|
|
40302
|
+
super(id, "web-page");
|
|
40296
40303
|
this.mode = mode;
|
|
40297
40304
|
this.baseUrl = options.baseUrl ?? void 0;
|
|
40298
40305
|
this.sitemapUrl = options.sitemapUrl ?? void 0;
|
|
40306
|
+
this.llmsTxtUrl = options.llmsTxtUrl ?? void 0;
|
|
40299
40307
|
this.urls = options.urls ?? void 0;
|
|
40300
40308
|
this.filterFn = "filter" in options ? options.filter : void 0;
|
|
40301
40309
|
if (typeof options.fetch === "string") {
|
|
@@ -40324,6 +40332,9 @@ var init_source_website = __esm({
|
|
|
40324
40332
|
} else if (this.mode === "sitemap") {
|
|
40325
40333
|
config.sitemapUrl = this.sitemapUrl;
|
|
40326
40334
|
config.maxPages = this.maxPages;
|
|
40335
|
+
} else if (this.mode === "llms-txt") {
|
|
40336
|
+
config.llmsTxtUrl = this.llmsTxtUrl;
|
|
40337
|
+
config.maxPages = this.maxPages;
|
|
40327
40338
|
} else if (this.mode === "urls") {
|
|
40328
40339
|
config.urls = this.urls;
|
|
40329
40340
|
}
|
|
@@ -40375,7 +40386,7 @@ var init_source_website = __esm({
|
|
|
40375
40386
|
/**
|
|
40376
40387
|
* Fetch content from a URL for sitemap parsing (raw content needed)
|
|
40377
40388
|
*/
|
|
40378
|
-
async
|
|
40389
|
+
async fetchRaw(url2) {
|
|
40379
40390
|
if (this.customFetch) {
|
|
40380
40391
|
try {
|
|
40381
40392
|
return await this.customFetch(url2);
|
|
@@ -40493,6 +40504,21 @@ var init_source_website = __esm({
|
|
|
40493
40504
|
}
|
|
40494
40505
|
return { urls };
|
|
40495
40506
|
}
|
|
40507
|
+
parseLlmsTxt(content) {
|
|
40508
|
+
const urls = [];
|
|
40509
|
+
const lines = content.split("\n");
|
|
40510
|
+
const urlRegex = /https?:\/\/[^\s)]+\.md/g;
|
|
40511
|
+
for (const line of lines) {
|
|
40512
|
+
const matches = line.matchAll(urlRegex);
|
|
40513
|
+
for (const match2 of matches) {
|
|
40514
|
+
const [url2] = match2;
|
|
40515
|
+
if (url2) {
|
|
40516
|
+
urls.push(url2);
|
|
40517
|
+
}
|
|
40518
|
+
}
|
|
40519
|
+
}
|
|
40520
|
+
return { urls };
|
|
40521
|
+
}
|
|
40496
40522
|
/**
|
|
40497
40523
|
* Discover URLs from a website using browser integration
|
|
40498
40524
|
*/
|
|
@@ -40550,6 +40576,28 @@ var init_source_website = __esm({
|
|
|
40550
40576
|
if (this.mode === "website") {
|
|
40551
40577
|
return this.discoverUrlsFromWebsite(step2);
|
|
40552
40578
|
}
|
|
40579
|
+
if (this.mode === "llms-txt") {
|
|
40580
|
+
if (!this.llmsTxtUrl) {
|
|
40581
|
+
throw new Error("No llms.txt URL provided");
|
|
40582
|
+
}
|
|
40583
|
+
const { content } = await step2("fetch llms.txt", () => this.fetchRaw(this.llmsTxtUrl));
|
|
40584
|
+
const { urls } = this.parseLlmsTxt(content);
|
|
40585
|
+
console.log(`Parsed ${urls.length} URLs from llms.txt`);
|
|
40586
|
+
const filteredUrls = [];
|
|
40587
|
+
for (const url2 of urls) {
|
|
40588
|
+
if (filteredUrls.length >= this.maxPages) {
|
|
40589
|
+
console.log(`Reached maxPages limit (${this.maxPages}), stopping`);
|
|
40590
|
+
break;
|
|
40591
|
+
}
|
|
40592
|
+
const filterContext = { url: url2 };
|
|
40593
|
+
if (!this.filterFn || this.filterFn(filterContext)) {
|
|
40594
|
+
filteredUrls.push({ loc: url2 });
|
|
40595
|
+
} else {
|
|
40596
|
+
console.log(`Skipped URL (filtered): ${url2}`);
|
|
40597
|
+
}
|
|
40598
|
+
}
|
|
40599
|
+
return filteredUrls;
|
|
40600
|
+
}
|
|
40553
40601
|
if (!this.sitemapUrl) {
|
|
40554
40602
|
throw new Error("No sitemap URL provided");
|
|
40555
40603
|
}
|
|
@@ -40563,7 +40611,7 @@ var init_source_website = __esm({
|
|
|
40563
40611
|
}
|
|
40564
40612
|
await step2(`processing sitemap ${item.url}`, async () => {
|
|
40565
40613
|
try {
|
|
40566
|
-
const { content, contentType } = await this.
|
|
40614
|
+
const { content, contentType } = await this.fetchRaw(item.url);
|
|
40567
40615
|
console.log(`Fetched sitemap ${item.url} (${content.length} bytes), processing... ${contentType}`);
|
|
40568
40616
|
console.log(content, contentType);
|
|
40569
40617
|
try {
|
|
@@ -40632,15 +40680,16 @@ var init_source_website = __esm({
|
|
|
40632
40680
|
console.log(
|
|
40633
40681
|
`Starting sync for WebsiteSource [${this.id}] in mode [${this.mode}, maxPages=${this.maxPages}, maxDepth=${this.maxDepth}, baseUrl=${this.baseUrl}, sitemapUrl=${this.sitemapUrl}]`
|
|
40634
40682
|
);
|
|
40635
|
-
console.log(`Using knowledge base: ${input.kbName}, force reindex: ${!!input.force}, ${input.
|
|
40683
|
+
console.log(`Using knowledge base: ${input.kbName}, force reindex: ${!!input.force}, ${input.dsId}`);
|
|
40636
40684
|
if (input.force) {
|
|
40637
40685
|
console.log("\u{1F504} FORCE MODE: Re-indexing all files regardless of changes");
|
|
40638
40686
|
}
|
|
40639
40687
|
const tags = {
|
|
40640
|
-
[WellKnownTags.knowledge.KNOWLEDGE]: "
|
|
40688
|
+
[WellKnownTags.knowledge.KNOWLEDGE]: "knowledge-base",
|
|
40689
|
+
[WellKnownTags.knowledge.KNOWLEDGE_BASE_ID]: input.kbId,
|
|
40690
|
+
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: input.kbName,
|
|
40641
40691
|
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_ID]: this.id,
|
|
40642
|
-
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]: this.type
|
|
40643
|
-
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: input.kbName
|
|
40692
|
+
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]: this.type
|
|
40644
40693
|
};
|
|
40645
40694
|
const discoveredUrls = await step2("discover urls from sitemap", () => this.discoverUrls(step2, state));
|
|
40646
40695
|
console.log(`Discovered ${discoveredUrls.length} URLs from sitemap`);
|
|
@@ -40686,6 +40735,7 @@ var init_source_website = __esm({
|
|
|
40686
40735
|
console.log(
|
|
40687
40736
|
`To fetch: ${toFetch.length}, To remove: ${toRemove.length}, Skipped (unchanged): ${skippedUnchanged}${input.force ? " [FORCE MODE]" : ""}`
|
|
40688
40737
|
);
|
|
40738
|
+
console.log(`[SYNC DEBUG] Starting 'deleting removed urls' step...`);
|
|
40689
40739
|
const deleted = await step2.map(
|
|
40690
40740
|
"deleting removed urls",
|
|
40691
40741
|
toRemove,
|
|
@@ -40699,6 +40749,7 @@ var init_source_website = __esm({
|
|
|
40699
40749
|
),
|
|
40700
40750
|
{ concurrency: 5 }
|
|
40701
40751
|
);
|
|
40752
|
+
console.log(`[SYNC DEBUG] Deleted ${deleted.length} URLs, starting fetch phase...`);
|
|
40702
40753
|
const fetchAndIndex = async (sitemapUrl) => {
|
|
40703
40754
|
try {
|
|
40704
40755
|
const {
|
|
@@ -40745,8 +40796,8 @@ var init_source_website = __esm({
|
|
|
40745
40796
|
...fetchMetadata?.[WellKnownMetadata.knowledge.FAVICON] && {
|
|
40746
40797
|
[WellKnownMetadata.knowledge.FAVICON]: fetchMetadata[WellKnownMetadata.knowledge.FAVICON]
|
|
40747
40798
|
},
|
|
40748
|
-
|
|
40749
|
-
|
|
40799
|
+
dsId: this.id,
|
|
40800
|
+
dsType: this.type
|
|
40750
40801
|
}
|
|
40751
40802
|
});
|
|
40752
40803
|
return {
|
|
@@ -40760,13 +40811,53 @@ var init_source_website = __esm({
|
|
|
40760
40811
|
return null;
|
|
40761
40812
|
}
|
|
40762
40813
|
};
|
|
40814
|
+
console.log(`[SYNC DEBUG] Starting 'fetching and indexing pages' for ${toFetch.slice(0, this.maxPages).length} URLs...`);
|
|
40763
40815
|
const indexed = await step2.map(
|
|
40764
40816
|
"fetching and indexing pages",
|
|
40765
40817
|
toFetch.slice(0, this.maxPages),
|
|
40766
40818
|
(url2) => fetchAndIndex(url2),
|
|
40767
40819
|
{ concurrency: 20, maxAttempts: 2 }
|
|
40768
40820
|
);
|
|
40821
|
+
console.log(`[SYNC DEBUG] Fetch complete. ${indexed.length} attempted, ${indexed.filter((f) => f !== null).length} successful`);
|
|
40769
40822
|
const successful = indexed.filter((f) => f !== null);
|
|
40823
|
+
console.log(`[SYNC DEBUG] Starting dsData registration...`);
|
|
40824
|
+
await step2("register web page source", async () => {
|
|
40825
|
+
try {
|
|
40826
|
+
const botId = context.get("botId");
|
|
40827
|
+
const getStateResult = await client._inner.getState({ id: botId, type: "bot", name: "dsData" }).catch(() => ({ state: null }));
|
|
40828
|
+
const freshState = getStateResult.state;
|
|
40829
|
+
const existingPayload = freshState?.payload || {};
|
|
40830
|
+
const kbPayload = existingPayload[input.kbId] || {};
|
|
40831
|
+
const websiteUrl = this.baseUrl || this.sitemapUrl || this.urls?.[0] || "";
|
|
40832
|
+
const getTitle = () => {
|
|
40833
|
+
if (!websiteUrl) return "Website";
|
|
40834
|
+
try {
|
|
40835
|
+
const urlWithProtocol = websiteUrl.includes("://") ? websiteUrl : `https://${websiteUrl}`;
|
|
40836
|
+
return new URL(urlWithProtocol).hostname;
|
|
40837
|
+
} catch {
|
|
40838
|
+
return websiteUrl;
|
|
40839
|
+
}
|
|
40840
|
+
};
|
|
40841
|
+
kbPayload[this.id] = {
|
|
40842
|
+
type: "web-page",
|
|
40843
|
+
title: getTitle(),
|
|
40844
|
+
createdOn: kbPayload[this.id]?.createdOn || Date.now(),
|
|
40845
|
+
// Preserve original creation time
|
|
40846
|
+
data: { websiteUrl, pages: [], indexingJobs: [] }
|
|
40847
|
+
};
|
|
40848
|
+
existingPayload[input.kbId] = kbPayload;
|
|
40849
|
+
await client._inner.setState({
|
|
40850
|
+
id: botId,
|
|
40851
|
+
type: "bot",
|
|
40852
|
+
name: "dsData",
|
|
40853
|
+
payload: existingPayload
|
|
40854
|
+
});
|
|
40855
|
+
console.log(`Registered web page source "${this.id}" in dsData for KB ${input.kbId}`);
|
|
40856
|
+
} catch (err) {
|
|
40857
|
+
console.warn("Failed to register web page source in dsData state:", err);
|
|
40858
|
+
}
|
|
40859
|
+
});
|
|
40860
|
+
console.log(`[SYNC DEBUG] \u2705 WebsiteSource sync complete for "${this.id}". Processed: ${discoveredUrls.length}, Added: ${successful.length}, Deleted: ${deleted.length}`);
|
|
40770
40861
|
return {
|
|
40771
40862
|
processed: discoveredUrls.length,
|
|
40772
40863
|
deleted,
|
|
@@ -40785,6 +40876,10 @@ var init_source_website = __esm({
|
|
|
40785
40876
|
const id = options.id || `sitemap_${sitemapUrl.replace(/https?:\/\//, "").replace(/\//g, "_")}`;
|
|
40786
40877
|
return new _WebsiteSource(id, "sitemap", { ...options, sitemapUrl });
|
|
40787
40878
|
}
|
|
40879
|
+
static fromLlmsTxt(llmsTxtUrl, options = {}) {
|
|
40880
|
+
const id = options.id || `llmstxt_${llmsTxtUrl.replace(/https?:\/\//, "").replace(/\//g, "_")}`;
|
|
40881
|
+
return new _WebsiteSource(id, "llms-txt", { ...options, llmsTxtUrl });
|
|
40882
|
+
}
|
|
40788
40883
|
static fromUrls(urls, options = {}) {
|
|
40789
40884
|
let defaultId = `urls_${urls.length}_pages`;
|
|
40790
40885
|
if (urls.length > 0) {
|
|
@@ -47604,7 +47699,7 @@ var init_source_directory = __esm({
|
|
|
47604
47699
|
_directoryPath;
|
|
47605
47700
|
_filterFn;
|
|
47606
47701
|
constructor(id, directoryPath, options = {}) {
|
|
47607
|
-
super(id, "
|
|
47702
|
+
super(id, "document");
|
|
47608
47703
|
this._directoryPath = directoryPath;
|
|
47609
47704
|
this._filterFn = options.filter ?? void 0;
|
|
47610
47705
|
}
|
|
@@ -47649,10 +47744,11 @@ var init_source_directory = __esm({
|
|
|
47649
47744
|
const crypto3 = await import("crypto");
|
|
47650
47745
|
const directory = path5.resolve(adk.environment.agent.directory, this.directoryPath);
|
|
47651
47746
|
const tags = {
|
|
47652
|
-
[WellKnownTags.knowledge.KNOWLEDGE]: "
|
|
47747
|
+
[WellKnownTags.knowledge.KNOWLEDGE]: "knowledge-base",
|
|
47748
|
+
[WellKnownTags.knowledge.KNOWLEDGE_BASE_ID]: input.kbId,
|
|
47749
|
+
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: input.kbName,
|
|
47653
47750
|
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_ID]: this.id,
|
|
47654
|
-
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]: this.type
|
|
47655
|
-
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: input.kbName
|
|
47751
|
+
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]: this.type
|
|
47656
47752
|
};
|
|
47657
47753
|
if (!directory.startsWith(adk.environment.agent.directory)) {
|
|
47658
47754
|
throw new Error("Directory path must be within the agent's directory");
|
|
@@ -47736,8 +47832,8 @@ var init_source_directory = __esm({
|
|
|
47736
47832
|
},
|
|
47737
47833
|
metadata: {
|
|
47738
47834
|
hash,
|
|
47739
|
-
|
|
47740
|
-
|
|
47835
|
+
dsId: this.id,
|
|
47836
|
+
dsType: this.type,
|
|
47741
47837
|
relPath: local.rel,
|
|
47742
47838
|
[WellKnownMetadata.knowledge.TITLE]: title
|
|
47743
47839
|
}
|
|
@@ -47774,10 +47870,10 @@ var init_source_directory = __esm({
|
|
|
47774
47870
|
|
|
47775
47871
|
// src/primitives/data-sources/index.ts
|
|
47776
47872
|
function isDirectorySource(source) {
|
|
47777
|
-
return source.type === "
|
|
47873
|
+
return source.type === "document";
|
|
47778
47874
|
}
|
|
47779
47875
|
function isWebsiteSource(source) {
|
|
47780
|
-
return source.type === "
|
|
47876
|
+
return source.type === "web-page";
|
|
47781
47877
|
}
|
|
47782
47878
|
function isTableSource(source) {
|
|
47783
47879
|
return source.type === "table";
|
|
@@ -47802,6 +47898,7 @@ var init_knowledge = __esm({
|
|
|
47802
47898
|
init_define_BUILD();
|
|
47803
47899
|
init_define_PACKAGE_VERSIONS();
|
|
47804
47900
|
init_knowledge_indexing();
|
|
47901
|
+
init_runtime();
|
|
47805
47902
|
((Typings8) => {
|
|
47806
47903
|
Typings8.Primitive = "knowledge";
|
|
47807
47904
|
})(Typings5 || (Typings5 = {}));
|
|
@@ -47823,6 +47920,18 @@ var init_knowledge = __esm({
|
|
|
47823
47920
|
sources: this.sources
|
|
47824
47921
|
};
|
|
47825
47922
|
}
|
|
47923
|
+
/**
|
|
47924
|
+
* Look up the KB ID from Botpress
|
|
47925
|
+
*/
|
|
47926
|
+
async getKbId() {
|
|
47927
|
+
const client = context.get("client")._inner;
|
|
47928
|
+
const kbs = await client.list.knowledgeBases({}).collect();
|
|
47929
|
+
const remoteKb = kbs.find((k) => k.name === this.name);
|
|
47930
|
+
if (!remoteKb) {
|
|
47931
|
+
throw new Error(`KB '${this.name}' not found in Botpress - run 'adk deploy' or approve KB sync in 'adk dev'`);
|
|
47932
|
+
}
|
|
47933
|
+
return remoteKb.id;
|
|
47934
|
+
}
|
|
47826
47935
|
/**
|
|
47827
47936
|
* Refresh the knowledge base by triggering the built-in indexing workflow for all sources
|
|
47828
47937
|
* This will fetch data from all data sources and update the knowledge base
|
|
@@ -47830,10 +47939,12 @@ var init_knowledge = __esm({
|
|
|
47830
47939
|
* @returns Promise that resolves when the indexing workflow has been started
|
|
47831
47940
|
*/
|
|
47832
47941
|
async refresh(options) {
|
|
47942
|
+
const kbId = await this.getKbId();
|
|
47833
47943
|
await KnowledgeIndexingWorkflow.getOrCreate({
|
|
47834
47944
|
key: `kb:${this.name}`,
|
|
47835
47945
|
input: {
|
|
47836
47946
|
kbName: this.name,
|
|
47947
|
+
kbId,
|
|
47837
47948
|
force: options?.force || false
|
|
47838
47949
|
}
|
|
47839
47950
|
});
|
|
@@ -47841,20 +47952,22 @@ var init_knowledge = __esm({
|
|
|
47841
47952
|
/**
|
|
47842
47953
|
* Refresh a specific data source by its ID
|
|
47843
47954
|
*
|
|
47844
|
-
* @param
|
|
47955
|
+
* @param dsId - The ID of the data source to refresh
|
|
47845
47956
|
* @param force - If true, forces re-indexing of all data even if unchanged
|
|
47846
47957
|
* @returns Promise that resolves when the source has been queued for indexing
|
|
47847
47958
|
*/
|
|
47848
|
-
async refreshSource(
|
|
47849
|
-
const source = this.sources.find((s) => s.id ===
|
|
47959
|
+
async refreshSource(dsId, options) {
|
|
47960
|
+
const source = this.sources.find((s) => s.id === dsId);
|
|
47850
47961
|
if (!source) {
|
|
47851
|
-
throw new Error(`Data source with id "${
|
|
47962
|
+
throw new Error(`Data source with id "${dsId}" not found in knowledge base "${this.name}"`);
|
|
47852
47963
|
}
|
|
47964
|
+
const kbId = await this.getKbId();
|
|
47853
47965
|
await source.syncWorkflow.getOrCreate({
|
|
47854
|
-
key: `${this.name}:${
|
|
47966
|
+
key: `${this.name}:${dsId}`,
|
|
47855
47967
|
input: {
|
|
47856
47968
|
kbName: this.name,
|
|
47857
|
-
|
|
47969
|
+
kbId,
|
|
47970
|
+
dsId,
|
|
47858
47971
|
force: options?.force || false
|
|
47859
47972
|
}
|
|
47860
47973
|
});
|