@botpress/runtime 1.10.4 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/constants.d.ts +9 -10
- package/dist/constants.d.ts.map +1 -1
- package/dist/definition.js +336 -239
- package/dist/definition.js.map +4 -4
- package/dist/internal.js +166 -53
- package/dist/internal.js.map +4 -4
- package/dist/library.js +166 -53
- package/dist/library.js.map +4 -4
- package/dist/primitives/data-sources/source-base.d.ts +12 -6
- package/dist/primitives/data-sources/source-base.d.ts.map +1 -1
- package/dist/primitives/data-sources/source-directory.d.ts +6 -3
- package/dist/primitives/data-sources/source-directory.d.ts.map +1 -1
- package/dist/primitives/data-sources/source-table.d.ts +6 -3
- package/dist/primitives/data-sources/source-table.d.ts.map +1 -1
- package/dist/primitives/data-sources/source-website.d.ts +10 -4
- package/dist/primitives/data-sources/source-website.d.ts.map +1 -1
- package/dist/primitives/index.d.ts +6 -3
- package/dist/primitives/index.d.ts.map +1 -1
- package/dist/primitives/knowledge.d.ts +6 -2
- package/dist/primitives/knowledge.d.ts.map +1 -1
- package/dist/runtime/autonomous.d.ts.map +1 -1
- package/dist/runtime/tracked-state.d.ts +2 -0
- package/dist/runtime/tracked-state.d.ts.map +1 -1
- package/dist/runtime/workflows/knowledge-indexing.d.ts +3 -0
- package/dist/runtime/workflows/knowledge-indexing.d.ts.map +1 -1
- package/dist/runtime.js +144 -47
- package/dist/runtime.js.map +4 -4
- package/package.json +1 -1
package/dist/runtime.js
CHANGED
|
@@ -48,7 +48,7 @@ var init_define_BUILD = __esm({
|
|
|
48
48
|
var define_PACKAGE_VERSIONS_default;
|
|
49
49
|
var init_define_PACKAGE_VERSIONS = __esm({
|
|
50
50
|
"<define:__PACKAGE_VERSIONS__>"() {
|
|
51
|
-
define_PACKAGE_VERSIONS_default = { runtime: "1.
|
|
51
|
+
define_PACKAGE_VERSIONS_default = { runtime: "1.11.0", adk: "1.11.0", sdk: "4.20.2", llmz: "0.0.33", zai: "2.5.0", cognitive: "0.2.0" };
|
|
52
52
|
}
|
|
53
53
|
});
|
|
54
54
|
|
|
@@ -16000,9 +16000,9 @@ var require_combined_stream = __commonJS({
|
|
|
16000
16000
|
}
|
|
16001
16001
|
});
|
|
16002
16002
|
|
|
16003
|
-
// ../../node_modules/mime-db/db.json
|
|
16003
|
+
// ../../node_modules/form-data/node_modules/mime-types/node_modules/mime-db/db.json
|
|
16004
16004
|
var require_db = __commonJS({
|
|
16005
|
-
"../../node_modules/mime-db/db.json"(exports2, module) {
|
|
16005
|
+
"../../node_modules/form-data/node_modules/mime-types/node_modules/mime-db/db.json"(exports2, module) {
|
|
16006
16006
|
module.exports = {
|
|
16007
16007
|
"application/1d-interleaved-parityfec": {
|
|
16008
16008
|
source: "iana"
|
|
@@ -24525,18 +24525,18 @@ var require_db = __commonJS({
|
|
|
24525
24525
|
}
|
|
24526
24526
|
});
|
|
24527
24527
|
|
|
24528
|
-
// ../../node_modules/mime-db/index.js
|
|
24528
|
+
// ../../node_modules/form-data/node_modules/mime-types/node_modules/mime-db/index.js
|
|
24529
24529
|
var require_mime_db = __commonJS({
|
|
24530
|
-
"../../node_modules/mime-db/index.js"(exports2, module) {
|
|
24530
|
+
"../../node_modules/form-data/node_modules/mime-types/node_modules/mime-db/index.js"(exports2, module) {
|
|
24531
24531
|
init_define_BUILD();
|
|
24532
24532
|
init_define_PACKAGE_VERSIONS();
|
|
24533
24533
|
module.exports = require_db();
|
|
24534
24534
|
}
|
|
24535
24535
|
});
|
|
24536
24536
|
|
|
24537
|
-
// ../../node_modules/mime-types/index.js
|
|
24537
|
+
// ../../node_modules/form-data/node_modules/mime-types/index.js
|
|
24538
24538
|
var require_mime_types = __commonJS({
|
|
24539
|
-
"../../node_modules/mime-types/index.js"(exports2) {
|
|
24539
|
+
"../../node_modules/form-data/node_modules/mime-types/index.js"(exports2) {
|
|
24540
24540
|
"use strict";
|
|
24541
24541
|
init_define_BUILD();
|
|
24542
24542
|
init_define_PACKAGE_VERSIONS();
|
|
@@ -33751,15 +33751,14 @@ var init_constants = __esm({
|
|
|
33751
33751
|
WellKnownTags = {
|
|
33752
33752
|
knowledge: {
|
|
33753
33753
|
/**
|
|
33754
|
-
* All knowledge base have this tag (with value "
|
|
33755
|
-
* @example "
|
|
33754
|
+
* All knowledge base have this tag (with value "knowledge-base") to identify them as knowledge-related records.
|
|
33755
|
+
* @example "source": "knowledge-base"
|
|
33756
33756
|
*/
|
|
33757
|
-
KNOWLEDGE: "
|
|
33757
|
+
KNOWLEDGE: "source",
|
|
33758
33758
|
/**
|
|
33759
33759
|
* The ID of the knowledge base the record belongs to.
|
|
33760
|
-
* This is the ID of the Knowledge Base primitive
|
|
33760
|
+
* This is the ID of the Knowledge Base primitive from Botpress.
|
|
33761
33761
|
* @example "kbId": "kb_01K6RT9T39KF7K0A7R7D71TDZ1"
|
|
33762
|
-
* @deprecated Use KNOWLEDGE_BASE_NAME for now, as we will be moving to IDs later.
|
|
33763
33762
|
*/
|
|
33764
33763
|
KNOWLEDGE_BASE_ID: "kbId",
|
|
33765
33764
|
/**
|
|
@@ -33769,15 +33768,15 @@ var init_constants = __esm({
|
|
|
33769
33768
|
KNOWLEDGE_BASE_NAME: "kbName",
|
|
33770
33769
|
/**
|
|
33771
33770
|
* The ID of the Data Source the record was ingested from.
|
|
33772
|
-
* @example "
|
|
33771
|
+
* @example "dsId": "docs"
|
|
33773
33772
|
*/
|
|
33774
|
-
KNOWLEDGE_SOURCE_ID: "
|
|
33773
|
+
KNOWLEDGE_SOURCE_ID: "dsId",
|
|
33775
33774
|
/**
|
|
33776
33775
|
* The type of the Data Source the record was ingested from.
|
|
33777
|
-
* Possible values are: "
|
|
33778
|
-
* @example "
|
|
33776
|
+
* Possible values are: "document", "rich-text", "web-page", etc.
|
|
33777
|
+
* @example "dsType": "document"
|
|
33779
33778
|
*/
|
|
33780
|
-
KNOWLEDGE_SOURCE_TYPE: "
|
|
33779
|
+
KNOWLEDGE_SOURCE_TYPE: "dsType"
|
|
33781
33780
|
}
|
|
33782
33781
|
};
|
|
33783
33782
|
WellKnownMetadata = {
|
|
@@ -33874,7 +33873,7 @@ If the question is not related to the knowledge bases, do NOT use this tool.`.tr
|
|
|
33874
33873
|
includeBreadcrumb: true,
|
|
33875
33874
|
contextDepth: 4,
|
|
33876
33875
|
tags: {
|
|
33877
|
-
[WellKnownTags.knowledge.KNOWLEDGE]: "
|
|
33876
|
+
[WellKnownTags.knowledge.KNOWLEDGE]: "knowledge-base",
|
|
33878
33877
|
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: kbNames
|
|
33879
33878
|
}
|
|
33880
33879
|
});
|
|
@@ -33909,11 +33908,14 @@ If the question is not related to the knowledge bases, do NOT use this tool.`.tr
|
|
|
33909
33908
|
if (tags[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]) {
|
|
33910
33909
|
citationMetadata.knowledgeBase = tags[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME];
|
|
33911
33910
|
}
|
|
33911
|
+
if (tags[WellKnownTags.knowledge.KNOWLEDGE_BASE_ID]) {
|
|
33912
|
+
citationMetadata.knowledgeBaseId = tags[WellKnownTags.knowledge.KNOWLEDGE_BASE_ID];
|
|
33913
|
+
}
|
|
33912
33914
|
if (tags[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]) {
|
|
33913
|
-
citationMetadata.
|
|
33915
|
+
citationMetadata.dsType = tags[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE];
|
|
33914
33916
|
}
|
|
33915
33917
|
if (tags[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_ID]) {
|
|
33916
|
-
citationMetadata.
|
|
33918
|
+
citationMetadata.dsId = tags[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_ID];
|
|
33917
33919
|
}
|
|
33918
33920
|
for (const key in citationMetadata) {
|
|
33919
33921
|
let value = citationMetadata[key];
|
|
@@ -34442,8 +34444,9 @@ var init_source_base = __esm({
|
|
|
34442
34444
|
size: z6.number()
|
|
34443
34445
|
});
|
|
34444
34446
|
SyncInput = z6.object({
|
|
34445
|
-
|
|
34447
|
+
dsId: z6.string(),
|
|
34446
34448
|
kbName: z6.string(),
|
|
34449
|
+
kbId: z6.string(),
|
|
34447
34450
|
force: z6.boolean().optional().describe("Force re-indexing even if files haven't changed").default(false)
|
|
34448
34451
|
});
|
|
34449
34452
|
SyncOutput = z6.object({
|
|
@@ -34460,18 +34463,16 @@ var init_source_base = __esm({
|
|
|
34460
34463
|
state: props.state,
|
|
34461
34464
|
timeout: "120m",
|
|
34462
34465
|
async handler(execProps) {
|
|
34463
|
-
const { kbName,
|
|
34466
|
+
const { kbName, kbId, dsId } = execProps.input;
|
|
34464
34467
|
const kb = adk.project.knowledge.find((kb2) => kb2.name === kbName);
|
|
34465
34468
|
if (!kb) {
|
|
34466
34469
|
throw new Error(`Knowledge base '${kbName}' not found`);
|
|
34467
34470
|
}
|
|
34468
|
-
const source = kb.sources.find((s) => s.id ===
|
|
34471
|
+
const source = kb.sources.find((s) => s.id === dsId);
|
|
34469
34472
|
if (!source) {
|
|
34470
|
-
throw new Error(`Data source with ID '${
|
|
34473
|
+
throw new Error(`Data source with ID '${dsId}' not found in knowledge base '${kbName}'`);
|
|
34471
34474
|
}
|
|
34472
|
-
console.log(
|
|
34473
|
-
`\u{1F504} Starting sync for data source '${sourceId}' of type '${source.type}' in knowledge base '${kbName}'`
|
|
34474
|
-
);
|
|
34475
|
+
console.log(`\u{1F504} Starting sync for data source '${dsId}' of type '${source.type}' in knowledge base '${kbName}' (${kbId})`);
|
|
34475
34476
|
return await props.handler.bind(source)(execProps);
|
|
34476
34477
|
}
|
|
34477
34478
|
});
|
|
@@ -36162,6 +36163,7 @@ var init_source_website = __esm({
|
|
|
36162
36163
|
init_constants();
|
|
36163
36164
|
init_fxp();
|
|
36164
36165
|
init_library();
|
|
36166
|
+
init_context3();
|
|
36165
36167
|
init_html_fetch();
|
|
36166
36168
|
State = z8.object({
|
|
36167
36169
|
urls: z8.array(
|
|
@@ -36178,6 +36180,7 @@ var init_source_website = __esm({
|
|
|
36178
36180
|
mode;
|
|
36179
36181
|
baseUrl;
|
|
36180
36182
|
sitemapUrl;
|
|
36183
|
+
llmsTxtUrl;
|
|
36181
36184
|
urls;
|
|
36182
36185
|
filterFn;
|
|
36183
36186
|
customFetch;
|
|
@@ -36186,10 +36189,11 @@ var init_source_website = __esm({
|
|
|
36186
36189
|
maxDepth;
|
|
36187
36190
|
transformFn;
|
|
36188
36191
|
constructor(id, mode, options) {
|
|
36189
|
-
super(id, "
|
|
36192
|
+
super(id, "web-page");
|
|
36190
36193
|
this.mode = mode;
|
|
36191
36194
|
this.baseUrl = options.baseUrl ?? void 0;
|
|
36192
36195
|
this.sitemapUrl = options.sitemapUrl ?? void 0;
|
|
36196
|
+
this.llmsTxtUrl = options.llmsTxtUrl ?? void 0;
|
|
36193
36197
|
this.urls = options.urls ?? void 0;
|
|
36194
36198
|
this.filterFn = "filter" in options ? options.filter : void 0;
|
|
36195
36199
|
if (typeof options.fetch === "string") {
|
|
@@ -36218,6 +36222,9 @@ var init_source_website = __esm({
|
|
|
36218
36222
|
} else if (this.mode === "sitemap") {
|
|
36219
36223
|
config.sitemapUrl = this.sitemapUrl;
|
|
36220
36224
|
config.maxPages = this.maxPages;
|
|
36225
|
+
} else if (this.mode === "llms-txt") {
|
|
36226
|
+
config.llmsTxtUrl = this.llmsTxtUrl;
|
|
36227
|
+
config.maxPages = this.maxPages;
|
|
36221
36228
|
} else if (this.mode === "urls") {
|
|
36222
36229
|
config.urls = this.urls;
|
|
36223
36230
|
}
|
|
@@ -36269,7 +36276,7 @@ var init_source_website = __esm({
|
|
|
36269
36276
|
/**
|
|
36270
36277
|
* Fetch content from a URL for sitemap parsing (raw content needed)
|
|
36271
36278
|
*/
|
|
36272
|
-
async
|
|
36279
|
+
async fetchRaw(url2) {
|
|
36273
36280
|
if (this.customFetch) {
|
|
36274
36281
|
try {
|
|
36275
36282
|
return await this.customFetch(url2);
|
|
@@ -36387,6 +36394,21 @@ var init_source_website = __esm({
|
|
|
36387
36394
|
}
|
|
36388
36395
|
return { urls };
|
|
36389
36396
|
}
|
|
36397
|
+
parseLlmsTxt(content) {
|
|
36398
|
+
const urls = [];
|
|
36399
|
+
const lines = content.split("\n");
|
|
36400
|
+
const urlRegex = /https?:\/\/[^\s)]+\.md/g;
|
|
36401
|
+
for (const line of lines) {
|
|
36402
|
+
const matches = line.matchAll(urlRegex);
|
|
36403
|
+
for (const match2 of matches) {
|
|
36404
|
+
const [url2] = match2;
|
|
36405
|
+
if (url2) {
|
|
36406
|
+
urls.push(url2);
|
|
36407
|
+
}
|
|
36408
|
+
}
|
|
36409
|
+
}
|
|
36410
|
+
return { urls };
|
|
36411
|
+
}
|
|
36390
36412
|
/**
|
|
36391
36413
|
* Discover URLs from a website using browser integration
|
|
36392
36414
|
*/
|
|
@@ -36444,6 +36466,28 @@ var init_source_website = __esm({
|
|
|
36444
36466
|
if (this.mode === "website") {
|
|
36445
36467
|
return this.discoverUrlsFromWebsite(step2);
|
|
36446
36468
|
}
|
|
36469
|
+
if (this.mode === "llms-txt") {
|
|
36470
|
+
if (!this.llmsTxtUrl) {
|
|
36471
|
+
throw new Error("No llms.txt URL provided");
|
|
36472
|
+
}
|
|
36473
|
+
const { content } = await step2("fetch llms.txt", () => this.fetchRaw(this.llmsTxtUrl));
|
|
36474
|
+
const { urls } = this.parseLlmsTxt(content);
|
|
36475
|
+
console.log(`Parsed ${urls.length} URLs from llms.txt`);
|
|
36476
|
+
const filteredUrls = [];
|
|
36477
|
+
for (const url2 of urls) {
|
|
36478
|
+
if (filteredUrls.length >= this.maxPages) {
|
|
36479
|
+
console.log(`Reached maxPages limit (${this.maxPages}), stopping`);
|
|
36480
|
+
break;
|
|
36481
|
+
}
|
|
36482
|
+
const filterContext = { url: url2 };
|
|
36483
|
+
if (!this.filterFn || this.filterFn(filterContext)) {
|
|
36484
|
+
filteredUrls.push({ loc: url2 });
|
|
36485
|
+
} else {
|
|
36486
|
+
console.log(`Skipped URL (filtered): ${url2}`);
|
|
36487
|
+
}
|
|
36488
|
+
}
|
|
36489
|
+
return filteredUrls;
|
|
36490
|
+
}
|
|
36447
36491
|
if (!this.sitemapUrl) {
|
|
36448
36492
|
throw new Error("No sitemap URL provided");
|
|
36449
36493
|
}
|
|
@@ -36457,7 +36501,7 @@ var init_source_website = __esm({
|
|
|
36457
36501
|
}
|
|
36458
36502
|
await step2(`processing sitemap ${item.url}`, async () => {
|
|
36459
36503
|
try {
|
|
36460
|
-
const { content, contentType } = await this.
|
|
36504
|
+
const { content, contentType } = await this.fetchRaw(item.url);
|
|
36461
36505
|
console.log(`Fetched sitemap ${item.url} (${content.length} bytes), processing... ${contentType}`);
|
|
36462
36506
|
console.log(content, contentType);
|
|
36463
36507
|
try {
|
|
@@ -36526,15 +36570,16 @@ var init_source_website = __esm({
|
|
|
36526
36570
|
console.log(
|
|
36527
36571
|
`Starting sync for WebsiteSource [${this.id}] in mode [${this.mode}, maxPages=${this.maxPages}, maxDepth=${this.maxDepth}, baseUrl=${this.baseUrl}, sitemapUrl=${this.sitemapUrl}]`
|
|
36528
36572
|
);
|
|
36529
|
-
console.log(`Using knowledge base: ${input.kbName}, force reindex: ${!!input.force}, ${input.
|
|
36573
|
+
console.log(`Using knowledge base: ${input.kbName}, force reindex: ${!!input.force}, ${input.dsId}`);
|
|
36530
36574
|
if (input.force) {
|
|
36531
36575
|
console.log("\u{1F504} FORCE MODE: Re-indexing all files regardless of changes");
|
|
36532
36576
|
}
|
|
36533
36577
|
const tags = {
|
|
36534
|
-
[WellKnownTags.knowledge.KNOWLEDGE]: "
|
|
36578
|
+
[WellKnownTags.knowledge.KNOWLEDGE]: "knowledge-base",
|
|
36579
|
+
[WellKnownTags.knowledge.KNOWLEDGE_BASE_ID]: input.kbId,
|
|
36580
|
+
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: input.kbName,
|
|
36535
36581
|
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_ID]: this.id,
|
|
36536
|
-
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]: this.type
|
|
36537
|
-
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: input.kbName
|
|
36582
|
+
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]: this.type
|
|
36538
36583
|
};
|
|
36539
36584
|
const discoveredUrls = await step2("discover urls from sitemap", () => this.discoverUrls(step2, state));
|
|
36540
36585
|
console.log(`Discovered ${discoveredUrls.length} URLs from sitemap`);
|
|
@@ -36580,6 +36625,7 @@ var init_source_website = __esm({
|
|
|
36580
36625
|
console.log(
|
|
36581
36626
|
`To fetch: ${toFetch.length}, To remove: ${toRemove.length}, Skipped (unchanged): ${skippedUnchanged}${input.force ? " [FORCE MODE]" : ""}`
|
|
36582
36627
|
);
|
|
36628
|
+
console.log(`[SYNC DEBUG] Starting 'deleting removed urls' step...`);
|
|
36583
36629
|
const deleted = await step2.map(
|
|
36584
36630
|
"deleting removed urls",
|
|
36585
36631
|
toRemove,
|
|
@@ -36593,6 +36639,7 @@ var init_source_website = __esm({
|
|
|
36593
36639
|
),
|
|
36594
36640
|
{ concurrency: 5 }
|
|
36595
36641
|
);
|
|
36642
|
+
console.log(`[SYNC DEBUG] Deleted ${deleted.length} URLs, starting fetch phase...`);
|
|
36596
36643
|
const fetchAndIndex = async (sitemapUrl) => {
|
|
36597
36644
|
try {
|
|
36598
36645
|
const {
|
|
@@ -36639,8 +36686,8 @@ var init_source_website = __esm({
|
|
|
36639
36686
|
...fetchMetadata?.[WellKnownMetadata.knowledge.FAVICON] && {
|
|
36640
36687
|
[WellKnownMetadata.knowledge.FAVICON]: fetchMetadata[WellKnownMetadata.knowledge.FAVICON]
|
|
36641
36688
|
},
|
|
36642
|
-
|
|
36643
|
-
|
|
36689
|
+
dsId: this.id,
|
|
36690
|
+
dsType: this.type
|
|
36644
36691
|
}
|
|
36645
36692
|
});
|
|
36646
36693
|
return {
|
|
@@ -36654,13 +36701,53 @@ var init_source_website = __esm({
|
|
|
36654
36701
|
return null;
|
|
36655
36702
|
}
|
|
36656
36703
|
};
|
|
36704
|
+
console.log(`[SYNC DEBUG] Starting 'fetching and indexing pages' for ${toFetch.slice(0, this.maxPages).length} URLs...`);
|
|
36657
36705
|
const indexed = await step2.map(
|
|
36658
36706
|
"fetching and indexing pages",
|
|
36659
36707
|
toFetch.slice(0, this.maxPages),
|
|
36660
36708
|
(url2) => fetchAndIndex(url2),
|
|
36661
36709
|
{ concurrency: 20, maxAttempts: 2 }
|
|
36662
36710
|
);
|
|
36711
|
+
console.log(`[SYNC DEBUG] Fetch complete. ${indexed.length} attempted, ${indexed.filter((f) => f !== null).length} successful`);
|
|
36663
36712
|
const successful = indexed.filter((f) => f !== null);
|
|
36713
|
+
console.log(`[SYNC DEBUG] Starting dsData registration...`);
|
|
36714
|
+
await step2("register web page source", async () => {
|
|
36715
|
+
try {
|
|
36716
|
+
const botId = context2.get("botId");
|
|
36717
|
+
const getStateResult = await client._inner.getState({ id: botId, type: "bot", name: "dsData" }).catch(() => ({ state: null }));
|
|
36718
|
+
const freshState = getStateResult.state;
|
|
36719
|
+
const existingPayload = freshState?.payload || {};
|
|
36720
|
+
const kbPayload = existingPayload[input.kbId] || {};
|
|
36721
|
+
const websiteUrl = this.baseUrl || this.sitemapUrl || this.urls?.[0] || "";
|
|
36722
|
+
const getTitle = () => {
|
|
36723
|
+
if (!websiteUrl) return "Website";
|
|
36724
|
+
try {
|
|
36725
|
+
const urlWithProtocol = websiteUrl.includes("://") ? websiteUrl : `https://${websiteUrl}`;
|
|
36726
|
+
return new URL(urlWithProtocol).hostname;
|
|
36727
|
+
} catch {
|
|
36728
|
+
return websiteUrl;
|
|
36729
|
+
}
|
|
36730
|
+
};
|
|
36731
|
+
kbPayload[this.id] = {
|
|
36732
|
+
type: "web-page",
|
|
36733
|
+
title: getTitle(),
|
|
36734
|
+
createdOn: kbPayload[this.id]?.createdOn || Date.now(),
|
|
36735
|
+
// Preserve original creation time
|
|
36736
|
+
data: { websiteUrl, pages: [], indexingJobs: [] }
|
|
36737
|
+
};
|
|
36738
|
+
existingPayload[input.kbId] = kbPayload;
|
|
36739
|
+
await client._inner.setState({
|
|
36740
|
+
id: botId,
|
|
36741
|
+
type: "bot",
|
|
36742
|
+
name: "dsData",
|
|
36743
|
+
payload: existingPayload
|
|
36744
|
+
});
|
|
36745
|
+
console.log(`Registered web page source "${this.id}" in dsData for KB ${input.kbId}`);
|
|
36746
|
+
} catch (err) {
|
|
36747
|
+
console.warn("Failed to register web page source in dsData state:", err);
|
|
36748
|
+
}
|
|
36749
|
+
});
|
|
36750
|
+
console.log(`[SYNC DEBUG] \u2705 WebsiteSource sync complete for "${this.id}". Processed: ${discoveredUrls.length}, Added: ${successful.length}, Deleted: ${deleted.length}`);
|
|
36664
36751
|
return {
|
|
36665
36752
|
processed: discoveredUrls.length,
|
|
36666
36753
|
deleted,
|
|
@@ -36679,6 +36766,10 @@ var init_source_website = __esm({
|
|
|
36679
36766
|
const id = options.id || `sitemap_${sitemapUrl.replace(/https?:\/\//, "").replace(/\//g, "_")}`;
|
|
36680
36767
|
return new _WebsiteSource(id, "sitemap", { ...options, sitemapUrl });
|
|
36681
36768
|
}
|
|
36769
|
+
static fromLlmsTxt(llmsTxtUrl, options = {}) {
|
|
36770
|
+
const id = options.id || `llmstxt_${llmsTxtUrl.replace(/https?:\/\//, "").replace(/\//g, "_")}`;
|
|
36771
|
+
return new _WebsiteSource(id, "llms-txt", { ...options, llmsTxtUrl });
|
|
36772
|
+
}
|
|
36682
36773
|
static fromUrls(urls, options = {}) {
|
|
36683
36774
|
let defaultId = `urls_${urls.length}_pages`;
|
|
36684
36775
|
if (urls.length > 0) {
|
|
@@ -43498,7 +43589,7 @@ var init_source_directory = __esm({
|
|
|
43498
43589
|
_directoryPath;
|
|
43499
43590
|
_filterFn;
|
|
43500
43591
|
constructor(id, directoryPath, options = {}) {
|
|
43501
|
-
super(id, "
|
|
43592
|
+
super(id, "document");
|
|
43502
43593
|
this._directoryPath = directoryPath;
|
|
43503
43594
|
this._filterFn = options.filter ?? void 0;
|
|
43504
43595
|
}
|
|
@@ -43543,10 +43634,11 @@ var init_source_directory = __esm({
|
|
|
43543
43634
|
const crypto3 = await import("crypto");
|
|
43544
43635
|
const directory = path4.resolve(adk.environment.agent.directory, this.directoryPath);
|
|
43545
43636
|
const tags = {
|
|
43546
|
-
[WellKnownTags.knowledge.KNOWLEDGE]: "
|
|
43637
|
+
[WellKnownTags.knowledge.KNOWLEDGE]: "knowledge-base",
|
|
43638
|
+
[WellKnownTags.knowledge.KNOWLEDGE_BASE_ID]: input.kbId,
|
|
43639
|
+
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: input.kbName,
|
|
43547
43640
|
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_ID]: this.id,
|
|
43548
|
-
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]: this.type
|
|
43549
|
-
[WellKnownTags.knowledge.KNOWLEDGE_BASE_NAME]: input.kbName
|
|
43641
|
+
[WellKnownTags.knowledge.KNOWLEDGE_SOURCE_TYPE]: this.type
|
|
43550
43642
|
};
|
|
43551
43643
|
if (!directory.startsWith(adk.environment.agent.directory)) {
|
|
43552
43644
|
throw new Error("Directory path must be within the agent's directory");
|
|
@@ -43630,8 +43722,8 @@ var init_source_directory = __esm({
|
|
|
43630
43722
|
},
|
|
43631
43723
|
metadata: {
|
|
43632
43724
|
hash,
|
|
43633
|
-
|
|
43634
|
-
|
|
43725
|
+
dsId: this.id,
|
|
43726
|
+
dsType: this.type,
|
|
43635
43727
|
relPath: local.rel,
|
|
43636
43728
|
[WellKnownMetadata.knowledge.TITLE]: title
|
|
43637
43729
|
}
|
|
@@ -43668,10 +43760,10 @@ var init_source_directory = __esm({
|
|
|
43668
43760
|
|
|
43669
43761
|
// src/primitives/data-sources/index.ts
|
|
43670
43762
|
function isDirectorySource(source) {
|
|
43671
|
-
return source.type === "
|
|
43763
|
+
return source.type === "document";
|
|
43672
43764
|
}
|
|
43673
43765
|
function isWebsiteSource(source) {
|
|
43674
|
-
return source.type === "
|
|
43766
|
+
return source.type === "web-page";
|
|
43675
43767
|
}
|
|
43676
43768
|
function isTableSource(source) {
|
|
43677
43769
|
return source.type === "table";
|
|
@@ -44404,12 +44496,13 @@ var init_knowledge_indexing = __esm({
|
|
|
44404
44496
|
description: "Built-in workflow to re-index all data sources in a knowledge base",
|
|
44405
44497
|
input: z13.object({
|
|
44406
44498
|
kbName: z13.string(),
|
|
44499
|
+
kbId: z13.string(),
|
|
44407
44500
|
force: z13.boolean().optional().describe("Force re-indexing even if files haven't changed").default(false)
|
|
44408
44501
|
}),
|
|
44409
44502
|
timeout: "180m",
|
|
44410
44503
|
output: SyncOutput,
|
|
44411
44504
|
handler: async ({ input, step: step2 }) => {
|
|
44412
|
-
const { kbName } = input;
|
|
44505
|
+
const { kbName, kbId } = input;
|
|
44413
44506
|
const kb = adk.project.knowledge.find((x) => x.name === kbName);
|
|
44414
44507
|
if (!kb) {
|
|
44415
44508
|
throw new Error(`Knowledge base '${kbName}' not found`);
|
|
@@ -44425,7 +44518,8 @@ var init_knowledge_indexing = __esm({
|
|
|
44425
44518
|
statuses: ["in_progress", "listening", "pending", "paused"],
|
|
44426
44519
|
input: {
|
|
44427
44520
|
kbName,
|
|
44428
|
-
|
|
44521
|
+
kbId,
|
|
44522
|
+
dsId: source.id,
|
|
44429
44523
|
force: input.force || false
|
|
44430
44524
|
}
|
|
44431
44525
|
}).then((x) => x.id)
|
|
@@ -44454,6 +44548,7 @@ var init_knowledge = __esm({
|
|
|
44454
44548
|
init_define_BUILD();
|
|
44455
44549
|
init_define_PACKAGE_VERSIONS();
|
|
44456
44550
|
init_knowledge_indexing();
|
|
44551
|
+
init_runtime2();
|
|
44457
44552
|
((Typings8) => {
|
|
44458
44553
|
Typings8.Primitive = "knowledge";
|
|
44459
44554
|
})(Typings5 || (Typings5 = {}));
|
|
@@ -45901,7 +45996,9 @@ var init_tracked_state = __esm({
|
|
|
45901
45996
|
/** Workflow-specific state (persists across workflow executions) */
|
|
45902
45997
|
workflowState: "workflowState",
|
|
45903
45998
|
/** Workflow cached steps executions */
|
|
45904
|
-
workflowSteps: "workflowSteps"
|
|
45999
|
+
workflowSteps: "workflowSteps",
|
|
46000
|
+
/** Data source metadata for dashboard visibility */
|
|
46001
|
+
dsData: "dsData"
|
|
45905
46002
|
};
|
|
45906
46003
|
TrackedState2 = class _TrackedState {
|
|
45907
46004
|
type;
|