@botpress/runtime 1.10.4 → 1.10.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/constants.d.ts +9 -10
- package/dist/constants.d.ts.map +1 -1
- package/dist/definition.js +330 -233
- package/dist/definition.js.map +4 -4
- package/dist/internal.js +160 -47
- package/dist/internal.js.map +3 -3
- package/dist/library.js +160 -47
- package/dist/library.js.map +3 -3
- package/dist/primitives/data-sources/source-base.d.ts +12 -6
- package/dist/primitives/data-sources/source-base.d.ts.map +1 -1
- package/dist/primitives/data-sources/source-directory.d.ts +6 -3
- package/dist/primitives/data-sources/source-directory.d.ts.map +1 -1
- package/dist/primitives/data-sources/source-table.d.ts +6 -3
- package/dist/primitives/data-sources/source-table.d.ts.map +1 -1
- package/dist/primitives/data-sources/source-website.d.ts +10 -4
- package/dist/primitives/data-sources/source-website.d.ts.map +1 -1
- package/dist/primitives/index.d.ts +6 -3
- package/dist/primitives/index.d.ts.map +1 -1
- package/dist/primitives/knowledge.d.ts +6 -2
- package/dist/primitives/knowledge.d.ts.map +1 -1
- package/dist/runtime/autonomous.d.ts.map +1 -1
- package/dist/runtime/tracked-state.d.ts +2 -0
- package/dist/runtime/tracked-state.d.ts.map +1 -1
- package/dist/runtime/workflows/knowledge-indexing.d.ts +3 -0
- package/dist/runtime/workflows/knowledge-indexing.d.ts.map +1 -1
- package/dist/runtime.js +138 -41
- package/dist/runtime.js.map +3 -3
- package/package.json +1 -1
package/dist/library.js
CHANGED
|
@@ -48,7 +48,7 @@ var init_define_BUILD = __esm({
|
|
|
48
48
|
var define_PACKAGE_VERSIONS_default;
|
|
49
49
|
var init_define_PACKAGE_VERSIONS = __esm({
|
|
50
50
|
"<define:__PACKAGE_VERSIONS__>"() {
|
|
51
|
-
define_PACKAGE_VERSIONS_default = { runtime: "1.10.
|
|
51
|
+
define_PACKAGE_VERSIONS_default = { runtime: "1.10.5", adk: "1.10.5", sdk: "4.20.2", llmz: "0.0.33", zai: "2.5.0", cognitive: "0.2.0" };
|
|
52
52
|
}
|
|
53
53
|
});
|
|
54
54
|
|
|
@@ -33934,15 +33934,14 @@ var init_constants = __esm({
|
|
|
33934
33934
|
WellKnownTags = {
|
|
33935
33935
|
knowledge: {
|
|
33936
33936
|
/**
|
|
33937
|
-
* All knowledge base have this tag (with value "
|
|
33938
|
-
* @example "
|
|
33937
|
+
* All knowledge base have this tag (with value "knowledge-base") to identify them as knowledge-related records.
|
|
33938
|
+
* @example "source": "knowledge-base"
|
|
33939
33939
|
*/
|
|
33940
|
-
KNOWLEDGE: "
|
|
33940
|
+
KNOWLEDGE: "source",
|
|
33941
33941
|
/**
|
|
33942
33942
|
* The ID of the knowledge base the record belongs to.
|
|
33943
|
-
* This is the ID of the Knowledge Base primitive
|
|
33943
|
+
* This is the ID of the Knowledge Base primitive from Botpress.
|
|
33944
33944
|
* @example "kbId": "kb_01K6RT9T39KF7K0A7R7D71TDZ1"
|
|
33945
|
-
* @deprecated Use KNOWLEDGE_BASE_NAME for now, as we will be moving to IDs later.
|
|
33946
33945
|
*/
|
|
33947
33946
|
KNOWLEDGE_BASE_ID: "kbId",
|
|
33948
33947
|
/**
|
|
@@ -33952,15 +33951,15 @@ var init_constants = __esm({
|
|
|
33952
33951
|
KNOWLEDGE_BASE_NAME: "kbName",
|
|
33953
33952
|
/**
|
|
33954
33953
|
* The ID of the Data Source the record was ingested from.
|
|
33955
|
-
* @example "
|
|
33954
|
+
* @example "dsId": "docs"
|
|
33956
33955
|
*/
|
|
33957
|
-
KNOWLEDGE_SOURCE_ID: "
|
|
33956
|
+
KNOWLEDGE_SOURCE_ID: "dsId",
|
|
33958
33957
|
/**
|
|
33959
33958
|
* The type of the Data Source the record was ingested from.
|
|
33960
|
-
* Possible values are: "
|
|
33961
|
-
* @example "
|
|
33959
|
+
* Possible values are: "document", "rich-text", "web-page", etc.
|
|
33960
|
+
* @example "dsType": "document"
|
|
33962
33961
|
*/
|
|
33963
|
-
KNOWLEDGE_SOURCE_TYPE: "
|
|
33962
|
+
KNOWLEDGE_SOURCE_TYPE: "dsType"
|
|
33964
33963
|
}
|
|
33965
33964
|
};
|
|
33966
33965
|
WellKnownMetadata = {
|
|
@@ -34057,7 +34056,7 @@ If the question is not related to the knowledge bases, do NOT use this tool.`.tr
|
|
|
34057
34056
|
includeBreadcrumb: true,
|
|
34058
34057
|
contextDepth: 4,
|
|
34059
34058
|
tags: {
|
|
34060
|
-
[WellKnownTags.knowledge.KNOWLEDGE]: "
|
|
34059
|
+
[WellKnownTags.knowledge.KNOWLEDGE]: "knowledge-base",
|
|
34061
34060
|
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: kbNames
|
|
34062
34061
|
}
|
|
34063
34062
|
});
|
|
@@ -34092,11 +34091,14 @@ If the question is not related to the knowledge bases, do NOT use this tool.`.tr
|
|
|
34092
34091
|
if (tags[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]) {
|
|
34093
34092
|
citationMetadata.knowledgeBase = tags[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME];
|
|
34094
34093
|
}
|
|
34094
|
+
if (tags[WellKnownTags.knowledge.KNOWLEDGE_BASE_ID]) {
|
|
34095
|
+
citationMetadata.knowledgeBaseId = tags[WellKnownTags.knowledge.KNOWLEDGE_BASE_ID];
|
|
34096
|
+
}
|
|
34095
34097
|
if (tags[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]) {
|
|
34096
|
-
citationMetadata.
|
|
34098
|
+
citationMetadata.dsType = tags[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE];
|
|
34097
34099
|
}
|
|
34098
34100
|
if (tags[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_ID]) {
|
|
34099
|
-
citationMetadata.
|
|
34101
|
+
citationMetadata.dsId = tags[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_ID];
|
|
34100
34102
|
}
|
|
34101
34103
|
for (const key in citationMetadata) {
|
|
34102
34104
|
let value = citationMetadata[key];
|
|
@@ -35245,8 +35247,9 @@ var init_source_base = __esm({
|
|
|
35245
35247
|
size: z6.number()
|
|
35246
35248
|
});
|
|
35247
35249
|
SyncInput = z6.object({
|
|
35248
|
-
|
|
35250
|
+
dsId: z6.string(),
|
|
35249
35251
|
kbName: z6.string(),
|
|
35252
|
+
kbId: z6.string(),
|
|
35250
35253
|
force: z6.boolean().optional().describe("Force re-indexing even if files haven't changed").default(false)
|
|
35251
35254
|
});
|
|
35252
35255
|
SyncOutput = z6.object({
|
|
@@ -35263,18 +35266,16 @@ var init_source_base = __esm({
|
|
|
35263
35266
|
state: props.state,
|
|
35264
35267
|
timeout: "120m",
|
|
35265
35268
|
async handler(execProps) {
|
|
35266
|
-
const { kbName,
|
|
35269
|
+
const { kbName, kbId, dsId } = execProps.input;
|
|
35267
35270
|
const kb = adk.project.knowledge.find((kb2) => kb2.name === kbName);
|
|
35268
35271
|
if (!kb) {
|
|
35269
35272
|
throw new Error(`Knowledge base '${kbName}' not found`);
|
|
35270
35273
|
}
|
|
35271
|
-
const source = kb.sources.find((s) => s.id ===
|
|
35274
|
+
const source = kb.sources.find((s) => s.id === dsId);
|
|
35272
35275
|
if (!source) {
|
|
35273
|
-
throw new Error(`Data source with ID '${
|
|
35276
|
+
throw new Error(`Data source with ID '${dsId}' not found in knowledge base '${kbName}'`);
|
|
35274
35277
|
}
|
|
35275
|
-
console.log(
|
|
35276
|
-
`\u{1F504} Starting sync for data source '${sourceId}' of type '${source.type}' in knowledge base '${kbName}'`
|
|
35277
|
-
);
|
|
35278
|
+
console.log(`\u{1F504} Starting sync for data source '${dsId}' of type '${source.type}' in knowledge base '${kbName}' (${kbId})`);
|
|
35278
35279
|
return await props.handler.bind(source)(execProps);
|
|
35279
35280
|
}
|
|
35280
35281
|
});
|
|
@@ -35305,12 +35306,13 @@ var init_knowledge_indexing = __esm({
|
|
|
35305
35306
|
description: "Built-in workflow to re-index all data sources in a knowledge base",
|
|
35306
35307
|
input: z7.object({
|
|
35307
35308
|
kbName: z7.string(),
|
|
35309
|
+
kbId: z7.string(),
|
|
35308
35310
|
force: z7.boolean().optional().describe("Force re-indexing even if files haven't changed").default(false)
|
|
35309
35311
|
}),
|
|
35310
35312
|
timeout: "180m",
|
|
35311
35313
|
output: SyncOutput,
|
|
35312
35314
|
handler: async ({ input, step: step2 }) => {
|
|
35313
|
-
const { kbName } = input;
|
|
35315
|
+
const { kbName, kbId } = input;
|
|
35314
35316
|
const kb = adk.project.knowledge.find((x) => x.name === kbName);
|
|
35315
35317
|
if (!kb) {
|
|
35316
35318
|
throw new Error(`Knowledge base '${kbName}' not found`);
|
|
@@ -35326,7 +35328,8 @@ var init_knowledge_indexing = __esm({
|
|
|
35326
35328
|
statuses: ["in_progress", "listening", "pending", "paused"],
|
|
35327
35329
|
input: {
|
|
35328
35330
|
kbName,
|
|
35329
|
-
|
|
35331
|
+
kbId,
|
|
35332
|
+
dsId: source.id,
|
|
35330
35333
|
force: input.force || false
|
|
35331
35334
|
}
|
|
35332
35335
|
}).then((x) => x.id)
|
|
@@ -36757,7 +36760,9 @@ var init_tracked_state = __esm({
|
|
|
36757
36760
|
/** Workflow-specific state (persists across workflow executions) */
|
|
36758
36761
|
workflowState: "workflowState",
|
|
36759
36762
|
/** Workflow cached steps executions */
|
|
36760
|
-
workflowSteps: "workflowSteps"
|
|
36763
|
+
workflowSteps: "workflowSteps",
|
|
36764
|
+
/** Data source metadata for dashboard visibility */
|
|
36765
|
+
dsData: "dsData"
|
|
36761
36766
|
};
|
|
36762
36767
|
TrackedState = class _TrackedState {
|
|
36763
36768
|
type;
|
|
@@ -40054,6 +40059,7 @@ var init_source_website = __esm({
|
|
|
40054
40059
|
init_constants();
|
|
40055
40060
|
init_fxp();
|
|
40056
40061
|
init_library();
|
|
40062
|
+
init_context();
|
|
40057
40063
|
init_html_fetch();
|
|
40058
40064
|
State = z23.object({
|
|
40059
40065
|
urls: z23.array(
|
|
@@ -40070,6 +40076,7 @@ var init_source_website = __esm({
|
|
|
40070
40076
|
mode;
|
|
40071
40077
|
baseUrl;
|
|
40072
40078
|
sitemapUrl;
|
|
40079
|
+
llmsTxtUrl;
|
|
40073
40080
|
urls;
|
|
40074
40081
|
filterFn;
|
|
40075
40082
|
customFetch;
|
|
@@ -40078,10 +40085,11 @@ var init_source_website = __esm({
|
|
|
40078
40085
|
maxDepth;
|
|
40079
40086
|
transformFn;
|
|
40080
40087
|
constructor(id, mode, options) {
|
|
40081
|
-
super(id, "
|
|
40088
|
+
super(id, "web-page");
|
|
40082
40089
|
this.mode = mode;
|
|
40083
40090
|
this.baseUrl = options.baseUrl ?? void 0;
|
|
40084
40091
|
this.sitemapUrl = options.sitemapUrl ?? void 0;
|
|
40092
|
+
this.llmsTxtUrl = options.llmsTxtUrl ?? void 0;
|
|
40085
40093
|
this.urls = options.urls ?? void 0;
|
|
40086
40094
|
this.filterFn = "filter" in options ? options.filter : void 0;
|
|
40087
40095
|
if (typeof options.fetch === "string") {
|
|
@@ -40110,6 +40118,9 @@ var init_source_website = __esm({
|
|
|
40110
40118
|
} else if (this.mode === "sitemap") {
|
|
40111
40119
|
config.sitemapUrl = this.sitemapUrl;
|
|
40112
40120
|
config.maxPages = this.maxPages;
|
|
40121
|
+
} else if (this.mode === "llms-txt") {
|
|
40122
|
+
config.llmsTxtUrl = this.llmsTxtUrl;
|
|
40123
|
+
config.maxPages = this.maxPages;
|
|
40113
40124
|
} else if (this.mode === "urls") {
|
|
40114
40125
|
config.urls = this.urls;
|
|
40115
40126
|
}
|
|
@@ -40161,7 +40172,7 @@ var init_source_website = __esm({
|
|
|
40161
40172
|
/**
|
|
40162
40173
|
* Fetch content from a URL for sitemap parsing (raw content needed)
|
|
40163
40174
|
*/
|
|
40164
|
-
async
|
|
40175
|
+
async fetchRaw(url2) {
|
|
40165
40176
|
if (this.customFetch) {
|
|
40166
40177
|
try {
|
|
40167
40178
|
return await this.customFetch(url2);
|
|
@@ -40279,6 +40290,21 @@ var init_source_website = __esm({
|
|
|
40279
40290
|
}
|
|
40280
40291
|
return { urls };
|
|
40281
40292
|
}
|
|
40293
|
+
parseLlmsTxt(content) {
|
|
40294
|
+
const urls = [];
|
|
40295
|
+
const lines = content.split("\n");
|
|
40296
|
+
const urlRegex = /https?:\/\/[^\s)]+\.md/g;
|
|
40297
|
+
for (const line of lines) {
|
|
40298
|
+
const matches = line.matchAll(urlRegex);
|
|
40299
|
+
for (const match2 of matches) {
|
|
40300
|
+
const [url2] = match2;
|
|
40301
|
+
if (url2) {
|
|
40302
|
+
urls.push(url2);
|
|
40303
|
+
}
|
|
40304
|
+
}
|
|
40305
|
+
}
|
|
40306
|
+
return { urls };
|
|
40307
|
+
}
|
|
40282
40308
|
/**
|
|
40283
40309
|
* Discover URLs from a website using browser integration
|
|
40284
40310
|
*/
|
|
@@ -40336,6 +40362,28 @@ var init_source_website = __esm({
|
|
|
40336
40362
|
if (this.mode === "website") {
|
|
40337
40363
|
return this.discoverUrlsFromWebsite(step2);
|
|
40338
40364
|
}
|
|
40365
|
+
if (this.mode === "llms-txt") {
|
|
40366
|
+
if (!this.llmsTxtUrl) {
|
|
40367
|
+
throw new Error("No llms.txt URL provided");
|
|
40368
|
+
}
|
|
40369
|
+
const { content } = await step2("fetch llms.txt", () => this.fetchRaw(this.llmsTxtUrl));
|
|
40370
|
+
const { urls } = this.parseLlmsTxt(content);
|
|
40371
|
+
console.log(`Parsed ${urls.length} URLs from llms.txt`);
|
|
40372
|
+
const filteredUrls = [];
|
|
40373
|
+
for (const url2 of urls) {
|
|
40374
|
+
if (filteredUrls.length >= this.maxPages) {
|
|
40375
|
+
console.log(`Reached maxPages limit (${this.maxPages}), stopping`);
|
|
40376
|
+
break;
|
|
40377
|
+
}
|
|
40378
|
+
const filterContext = { url: url2 };
|
|
40379
|
+
if (!this.filterFn || this.filterFn(filterContext)) {
|
|
40380
|
+
filteredUrls.push({ loc: url2 });
|
|
40381
|
+
} else {
|
|
40382
|
+
console.log(`Skipped URL (filtered): ${url2}`);
|
|
40383
|
+
}
|
|
40384
|
+
}
|
|
40385
|
+
return filteredUrls;
|
|
40386
|
+
}
|
|
40339
40387
|
if (!this.sitemapUrl) {
|
|
40340
40388
|
throw new Error("No sitemap URL provided");
|
|
40341
40389
|
}
|
|
@@ -40349,7 +40397,7 @@ var init_source_website = __esm({
|
|
|
40349
40397
|
}
|
|
40350
40398
|
await step2(`processing sitemap ${item.url}`, async () => {
|
|
40351
40399
|
try {
|
|
40352
|
-
const { content, contentType } = await this.
|
|
40400
|
+
const { content, contentType } = await this.fetchRaw(item.url);
|
|
40353
40401
|
console.log(`Fetched sitemap ${item.url} (${content.length} bytes), processing... ${contentType}`);
|
|
40354
40402
|
console.log(content, contentType);
|
|
40355
40403
|
try {
|
|
@@ -40418,15 +40466,16 @@ var init_source_website = __esm({
|
|
|
40418
40466
|
console.log(
|
|
40419
40467
|
`Starting sync for WebsiteSource [${this.id}] in mode [${this.mode}, maxPages=${this.maxPages}, maxDepth=${this.maxDepth}, baseUrl=${this.baseUrl}, sitemapUrl=${this.sitemapUrl}]`
|
|
40420
40468
|
);
|
|
40421
|
-
console.log(`Using knowledge base: ${input.kbName}, force reindex: ${!!input.force}, ${input.
|
|
40469
|
+
console.log(`Using knowledge base: ${input.kbName}, force reindex: ${!!input.force}, ${input.dsId}`);
|
|
40422
40470
|
if (input.force) {
|
|
40423
40471
|
console.log("\u{1F504} FORCE MODE: Re-indexing all files regardless of changes");
|
|
40424
40472
|
}
|
|
40425
40473
|
const tags = {
|
|
40426
|
-
[WellKnownTags.knowledge.KNOWLEDGE]: "
|
|
40474
|
+
[WellKnownTags.knowledge.KNOWLEDGE]: "knowledge-base",
|
|
40475
|
+
[WellKnownTags.knowledge.KNOWLEDGE_BASE_ID]: input.kbId,
|
|
40476
|
+
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: input.kbName,
|
|
40427
40477
|
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_ID]: this.id,
|
|
40428
|
-
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]: this.type
|
|
40429
|
-
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: input.kbName
|
|
40478
|
+
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]: this.type
|
|
40430
40479
|
};
|
|
40431
40480
|
const discoveredUrls = await step2("discover urls from sitemap", () => this.discoverUrls(step2, state));
|
|
40432
40481
|
console.log(`Discovered ${discoveredUrls.length} URLs from sitemap`);
|
|
@@ -40472,6 +40521,7 @@ var init_source_website = __esm({
|
|
|
40472
40521
|
console.log(
|
|
40473
40522
|
`To fetch: ${toFetch.length}, To remove: ${toRemove.length}, Skipped (unchanged): ${skippedUnchanged}${input.force ? " [FORCE MODE]" : ""}`
|
|
40474
40523
|
);
|
|
40524
|
+
console.log(`[SYNC DEBUG] Starting 'deleting removed urls' step...`);
|
|
40475
40525
|
const deleted = await step2.map(
|
|
40476
40526
|
"deleting removed urls",
|
|
40477
40527
|
toRemove,
|
|
@@ -40485,6 +40535,7 @@ var init_source_website = __esm({
|
|
|
40485
40535
|
),
|
|
40486
40536
|
{ concurrency: 5 }
|
|
40487
40537
|
);
|
|
40538
|
+
console.log(`[SYNC DEBUG] Deleted ${deleted.length} URLs, starting fetch phase...`);
|
|
40488
40539
|
const fetchAndIndex = async (sitemapUrl) => {
|
|
40489
40540
|
try {
|
|
40490
40541
|
const {
|
|
@@ -40531,8 +40582,8 @@ var init_source_website = __esm({
|
|
|
40531
40582
|
...fetchMetadata?.[WellKnownMetadata.knowledge.FAVICON] && {
|
|
40532
40583
|
[WellKnownMetadata.knowledge.FAVICON]: fetchMetadata[WellKnownMetadata.knowledge.FAVICON]
|
|
40533
40584
|
},
|
|
40534
|
-
|
|
40535
|
-
|
|
40585
|
+
dsId: this.id,
|
|
40586
|
+
dsType: this.type
|
|
40536
40587
|
}
|
|
40537
40588
|
});
|
|
40538
40589
|
return {
|
|
@@ -40546,13 +40597,53 @@ var init_source_website = __esm({
|
|
|
40546
40597
|
return null;
|
|
40547
40598
|
}
|
|
40548
40599
|
};
|
|
40600
|
+
console.log(`[SYNC DEBUG] Starting 'fetching and indexing pages' for ${toFetch.slice(0, this.maxPages).length} URLs...`);
|
|
40549
40601
|
const indexed = await step2.map(
|
|
40550
40602
|
"fetching and indexing pages",
|
|
40551
40603
|
toFetch.slice(0, this.maxPages),
|
|
40552
40604
|
(url2) => fetchAndIndex(url2),
|
|
40553
40605
|
{ concurrency: 20, maxAttempts: 2 }
|
|
40554
40606
|
);
|
|
40607
|
+
console.log(`[SYNC DEBUG] Fetch complete. ${indexed.length} attempted, ${indexed.filter((f) => f !== null).length} successful`);
|
|
40555
40608
|
const successful = indexed.filter((f) => f !== null);
|
|
40609
|
+
console.log(`[SYNC DEBUG] Starting dsData registration...`);
|
|
40610
|
+
await step2("register web page source", async () => {
|
|
40611
|
+
try {
|
|
40612
|
+
const botId = context.get("botId");
|
|
40613
|
+
const getStateResult = await client._inner.getState({ id: botId, type: "bot", name: "dsData" }).catch(() => ({ state: null }));
|
|
40614
|
+
const freshState = getStateResult.state;
|
|
40615
|
+
const existingPayload = freshState?.payload || {};
|
|
40616
|
+
const kbPayload = existingPayload[input.kbId] || {};
|
|
40617
|
+
const websiteUrl = this.baseUrl || this.sitemapUrl || this.urls?.[0] || "";
|
|
40618
|
+
const getTitle = () => {
|
|
40619
|
+
if (!websiteUrl) return "Website";
|
|
40620
|
+
try {
|
|
40621
|
+
const urlWithProtocol = websiteUrl.includes("://") ? websiteUrl : `https://${websiteUrl}`;
|
|
40622
|
+
return new URL(urlWithProtocol).hostname;
|
|
40623
|
+
} catch {
|
|
40624
|
+
return websiteUrl;
|
|
40625
|
+
}
|
|
40626
|
+
};
|
|
40627
|
+
kbPayload[this.id] = {
|
|
40628
|
+
type: "web-page",
|
|
40629
|
+
title: getTitle(),
|
|
40630
|
+
createdOn: kbPayload[this.id]?.createdOn || Date.now(),
|
|
40631
|
+
// Preserve original creation time
|
|
40632
|
+
data: { websiteUrl, pages: [], indexingJobs: [] }
|
|
40633
|
+
};
|
|
40634
|
+
existingPayload[input.kbId] = kbPayload;
|
|
40635
|
+
await client._inner.setState({
|
|
40636
|
+
id: botId,
|
|
40637
|
+
type: "bot",
|
|
40638
|
+
name: "dsData",
|
|
40639
|
+
payload: existingPayload
|
|
40640
|
+
});
|
|
40641
|
+
console.log(`Registered web page source "${this.id}" in dsData for KB ${input.kbId}`);
|
|
40642
|
+
} catch (err) {
|
|
40643
|
+
console.warn("Failed to register web page source in dsData state:", err);
|
|
40644
|
+
}
|
|
40645
|
+
});
|
|
40646
|
+
console.log(`[SYNC DEBUG] \u2705 WebsiteSource sync complete for "${this.id}". Processed: ${discoveredUrls.length}, Added: ${successful.length}, Deleted: ${deleted.length}`);
|
|
40556
40647
|
return {
|
|
40557
40648
|
processed: discoveredUrls.length,
|
|
40558
40649
|
deleted,
|
|
@@ -40571,6 +40662,10 @@ var init_source_website = __esm({
|
|
|
40571
40662
|
const id = options.id || `sitemap_${sitemapUrl.replace(/https?:\/\//, "").replace(/\//g, "_")}`;
|
|
40572
40663
|
return new _WebsiteSource(id, "sitemap", { ...options, sitemapUrl });
|
|
40573
40664
|
}
|
|
40665
|
+
static fromLlmsTxt(llmsTxtUrl, options = {}) {
|
|
40666
|
+
const id = options.id || `llmstxt_${llmsTxtUrl.replace(/https?:\/\//, "").replace(/\//g, "_")}`;
|
|
40667
|
+
return new _WebsiteSource(id, "llms-txt", { ...options, llmsTxtUrl });
|
|
40668
|
+
}
|
|
40574
40669
|
static fromUrls(urls, options = {}) {
|
|
40575
40670
|
let defaultId = `urls_${urls.length}_pages`;
|
|
40576
40671
|
if (urls.length > 0) {
|
|
@@ -47390,7 +47485,7 @@ var init_source_directory = __esm({
|
|
|
47390
47485
|
_directoryPath;
|
|
47391
47486
|
_filterFn;
|
|
47392
47487
|
constructor(id, directoryPath, options = {}) {
|
|
47393
|
-
super(id, "
|
|
47488
|
+
super(id, "document");
|
|
47394
47489
|
this._directoryPath = directoryPath;
|
|
47395
47490
|
this._filterFn = options.filter ?? void 0;
|
|
47396
47491
|
}
|
|
@@ -47435,10 +47530,11 @@ var init_source_directory = __esm({
|
|
|
47435
47530
|
const crypto3 = await import("crypto");
|
|
47436
47531
|
const directory = path4.resolve(adk.environment.agent.directory, this.directoryPath);
|
|
47437
47532
|
const tags = {
|
|
47438
|
-
[WellKnownTags.knowledge.KNOWLEDGE]: "
|
|
47533
|
+
[WellKnownTags.knowledge.KNOWLEDGE]: "knowledge-base",
|
|
47534
|
+
[WellKnownTags.knowledge.KNOWLEDGE_BASE_ID]: input.kbId,
|
|
47535
|
+
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: input.kbName,
|
|
47439
47536
|
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_ID]: this.id,
|
|
47440
|
-
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]: this.type
|
|
47441
|
-
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: input.kbName
|
|
47537
|
+
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]: this.type
|
|
47442
47538
|
};
|
|
47443
47539
|
if (!directory.startsWith(adk.environment.agent.directory)) {
|
|
47444
47540
|
throw new Error("Directory path must be within the agent's directory");
|
|
@@ -47522,8 +47618,8 @@ var init_source_directory = __esm({
|
|
|
47522
47618
|
},
|
|
47523
47619
|
metadata: {
|
|
47524
47620
|
hash,
|
|
47525
|
-
|
|
47526
|
-
|
|
47621
|
+
dsId: this.id,
|
|
47622
|
+
dsType: this.type,
|
|
47527
47623
|
relPath: local.rel,
|
|
47528
47624
|
[WellKnownMetadata.knowledge.TITLE]: title
|
|
47529
47625
|
}
|
|
@@ -47560,10 +47656,10 @@ var init_source_directory = __esm({
|
|
|
47560
47656
|
|
|
47561
47657
|
// src/primitives/data-sources/index.ts
|
|
47562
47658
|
function isDirectorySource(source) {
|
|
47563
|
-
return source.type === "
|
|
47659
|
+
return source.type === "document";
|
|
47564
47660
|
}
|
|
47565
47661
|
function isWebsiteSource(source) {
|
|
47566
|
-
return source.type === "
|
|
47662
|
+
return source.type === "web-page";
|
|
47567
47663
|
}
|
|
47568
47664
|
function isTableSource(source) {
|
|
47569
47665
|
return source.type === "table";
|
|
@@ -47588,6 +47684,7 @@ var init_knowledge = __esm({
|
|
|
47588
47684
|
init_define_BUILD();
|
|
47589
47685
|
init_define_PACKAGE_VERSIONS();
|
|
47590
47686
|
init_knowledge_indexing();
|
|
47687
|
+
init_runtime();
|
|
47591
47688
|
((Typings8) => {
|
|
47592
47689
|
Typings8.Primitive = "knowledge";
|
|
47593
47690
|
})(Typings5 || (Typings5 = {}));
|
|
@@ -47609,6 +47706,18 @@ var init_knowledge = __esm({
|
|
|
47609
47706
|
sources: this.sources
|
|
47610
47707
|
};
|
|
47611
47708
|
}
|
|
47709
|
+
/**
|
|
47710
|
+
* Look up the KB ID from Botpress
|
|
47711
|
+
*/
|
|
47712
|
+
async getKbId() {
|
|
47713
|
+
const client = context.get("client")._inner;
|
|
47714
|
+
const kbs = await client.list.knowledgeBases({}).collect();
|
|
47715
|
+
const remoteKb = kbs.find((k) => k.name === this.name);
|
|
47716
|
+
if (!remoteKb) {
|
|
47717
|
+
throw new Error(`KB '${this.name}' not found in Botpress - run 'adk deploy' or approve KB sync in 'adk dev'`);
|
|
47718
|
+
}
|
|
47719
|
+
return remoteKb.id;
|
|
47720
|
+
}
|
|
47612
47721
|
/**
|
|
47613
47722
|
* Refresh the knowledge base by triggering the built-in indexing workflow for all sources
|
|
47614
47723
|
* This will fetch data from all data sources and update the knowledge base
|
|
@@ -47616,10 +47725,12 @@ var init_knowledge = __esm({
|
|
|
47616
47725
|
* @returns Promise that resolves when the indexing workflow has been started
|
|
47617
47726
|
*/
|
|
47618
47727
|
async refresh(options) {
|
|
47728
|
+
const kbId = await this.getKbId();
|
|
47619
47729
|
await KnowledgeIndexingWorkflow.getOrCreate({
|
|
47620
47730
|
key: `kb:${this.name}`,
|
|
47621
47731
|
input: {
|
|
47622
47732
|
kbName: this.name,
|
|
47733
|
+
kbId,
|
|
47623
47734
|
force: options?.force || false
|
|
47624
47735
|
}
|
|
47625
47736
|
});
|
|
@@ -47627,20 +47738,22 @@ var init_knowledge = __esm({
|
|
|
47627
47738
|
/**
|
|
47628
47739
|
* Refresh a specific data source by its ID
|
|
47629
47740
|
*
|
|
47630
|
-
* @param
|
|
47741
|
+
* @param dsId - The ID of the data source to refresh
|
|
47631
47742
|
* @param force - If true, forces re-indexing of all data even if unchanged
|
|
47632
47743
|
* @returns Promise that resolves when the source has been queued for indexing
|
|
47633
47744
|
*/
|
|
47634
|
-
async refreshSource(
|
|
47635
|
-
const source = this.sources.find((s) => s.id ===
|
|
47745
|
+
async refreshSource(dsId, options) {
|
|
47746
|
+
const source = this.sources.find((s) => s.id === dsId);
|
|
47636
47747
|
if (!source) {
|
|
47637
|
-
throw new Error(`Data source with id "${
|
|
47748
|
+
throw new Error(`Data source with id "${dsId}" not found in knowledge base "${this.name}"`);
|
|
47638
47749
|
}
|
|
47750
|
+
const kbId = await this.getKbId();
|
|
47639
47751
|
await source.syncWorkflow.getOrCreate({
|
|
47640
|
-
key: `${this.name}:${
|
|
47752
|
+
key: `${this.name}:${dsId}`,
|
|
47641
47753
|
input: {
|
|
47642
47754
|
kbName: this.name,
|
|
47643
|
-
|
|
47755
|
+
kbId,
|
|
47756
|
+
dsId,
|
|
47644
47757
|
force: options?.force || false
|
|
47645
47758
|
}
|
|
47646
47759
|
});
|