@botpress/runtime 1.6.5 → 1.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/definition.js +166 -29
- package/dist/definition.js.map +4 -4
- package/dist/internal.js +166 -29
- package/dist/internal.js.map +4 -4
- package/dist/library.js +166 -29
- package/dist/library.js.map +4 -4
- package/dist/primitives/data-sources/html-fetch.d.ts +57 -0
- package/dist/primitives/data-sources/html-fetch.d.ts.map +1 -0
- package/dist/primitives/data-sources/source-website.d.ts +69 -4
- package/dist/primitives/data-sources/source-website.d.ts.map +1 -1
- package/dist/runtime.js +166 -29
- package/dist/runtime.js.map +4 -4
- package/package.json +1 -1
package/dist/definition.js
CHANGED
|
@@ -48,7 +48,7 @@ var init_define_BUILD = __esm({
|
|
|
48
48
|
var define_PACKAGE_VERSIONS_default;
|
|
49
49
|
var init_define_PACKAGE_VERSIONS = __esm({
|
|
50
50
|
"<define:__PACKAGE_VERSIONS__>"() {
|
|
51
|
-
define_PACKAGE_VERSIONS_default = { runtime: "1.6.
|
|
51
|
+
define_PACKAGE_VERSIONS_default = { runtime: "1.6.6", adk: "not-installed", sdk: "4.17.3", llmz: "0.0.27", zai: "2.4.0", cognitive: "0.2.0" };
|
|
52
52
|
}
|
|
53
53
|
});
|
|
54
54
|
|
|
@@ -11623,7 +11623,7 @@ var require_follow_redirects = __commonJS({
|
|
|
11623
11623
|
var currentUrlParts = parseUrl(this._currentUrl);
|
|
11624
11624
|
var currentHost = currentHostHeader || currentUrlParts.host;
|
|
11625
11625
|
var currentUrl = /^\w+:/.test(location) ? this._currentUrl : url2.format(Object.assign(currentUrlParts, { host: currentHost }));
|
|
11626
|
-
var redirectUrl =
|
|
11626
|
+
var redirectUrl = resolveUrl2(location, currentUrl);
|
|
11627
11627
|
debug("redirecting to", redirectUrl.href);
|
|
11628
11628
|
this._isRedirect = true;
|
|
11629
11629
|
spreadUrlObject(redirectUrl, this._options);
|
|
@@ -11707,7 +11707,7 @@ var require_follow_redirects = __commonJS({
|
|
|
11707
11707
|
}
|
|
11708
11708
|
return parsed;
|
|
11709
11709
|
}
|
|
11710
|
-
function
|
|
11710
|
+
function resolveUrl2(relative, base) {
|
|
11711
11711
|
return useNativeURL ? new URL2(relative, base) : parseUrl(url2.resolve(base, relative));
|
|
11712
11712
|
}
|
|
11713
11713
|
function validateUrl(input) {
|
|
@@ -42143,6 +42143,95 @@ var XMLParser = class {
|
|
|
42143
42143
|
}
|
|
42144
42144
|
};
|
|
42145
42145
|
|
|
42146
|
+
// src/primitives/data-sources/html-fetch.ts
|
|
42147
|
+
init_define_BUILD();
|
|
42148
|
+
init_define_PACKAGE_VERSIONS();
|
|
42149
|
+
function extractHtmlMetadata(html) {
|
|
42150
|
+
const metadata = {};
|
|
42151
|
+
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
42152
|
+
if (titleMatch && titleMatch[1]) {
|
|
42153
|
+
metadata.title = titleMatch[1].trim();
|
|
42154
|
+
}
|
|
42155
|
+
let descriptionMatch = html.match(
|
|
42156
|
+
/<meta\s+(?:name|property)=["'](?:description|og:description)["']\s+content="([^"]+)"/i
|
|
42157
|
+
);
|
|
42158
|
+
if (!descriptionMatch) {
|
|
42159
|
+
descriptionMatch = html.match(
|
|
42160
|
+
/<meta\s+(?:name|property)=["'](?:description|og:description)["']\s+content='([^']+)'/i
|
|
42161
|
+
);
|
|
42162
|
+
}
|
|
42163
|
+
if (descriptionMatch && descriptionMatch[1]) {
|
|
42164
|
+
metadata.description = descriptionMatch[1].trim();
|
|
42165
|
+
}
|
|
42166
|
+
const faviconPatterns = [
|
|
42167
|
+
// rel first, double quotes
|
|
42168
|
+
/<link\s+[^>]*rel="(?:icon|shortcut icon|apple-touch-icon)"[^>]*href="([^"]+)"/i,
|
|
42169
|
+
// rel first, single quotes
|
|
42170
|
+
/<link\s+[^>]*rel='(?:icon|shortcut icon|apple-touch-icon)'[^>]*href='([^']+)'/i,
|
|
42171
|
+
// href first, double quotes
|
|
42172
|
+
/<link\s+[^>]*href="([^"]+)"[^>]*rel="(?:icon|shortcut icon|apple-touch-icon)"/i,
|
|
42173
|
+
// href first, single quotes
|
|
42174
|
+
/<link\s+[^>]*href='([^']+)'[^>]*rel='(?:icon|shortcut icon|apple-touch-icon)'/i
|
|
42175
|
+
];
|
|
42176
|
+
for (const pattern of faviconPatterns) {
|
|
42177
|
+
const faviconMatch = html.match(pattern);
|
|
42178
|
+
if (faviconMatch && faviconMatch[1]) {
|
|
42179
|
+
metadata.favicon = faviconMatch[1].trim();
|
|
42180
|
+
break;
|
|
42181
|
+
}
|
|
42182
|
+
}
|
|
42183
|
+
if (!metadata.favicon) {
|
|
42184
|
+
metadata.favicon = "/favicon.ico";
|
|
42185
|
+
}
|
|
42186
|
+
return metadata;
|
|
42187
|
+
}
|
|
42188
|
+
function resolveUrl(url2, baseUrl) {
|
|
42189
|
+
if (url2.startsWith("http://") || url2.startsWith("https://")) {
|
|
42190
|
+
return url2;
|
|
42191
|
+
}
|
|
42192
|
+
try {
|
|
42193
|
+
const base = new URL(baseUrl);
|
|
42194
|
+
return new URL(url2, base.origin).href;
|
|
42195
|
+
} catch {
|
|
42196
|
+
return url2;
|
|
42197
|
+
}
|
|
42198
|
+
}
|
|
42199
|
+
async function fetchHtml(url2, options) {
|
|
42200
|
+
const userAgent = options?.userAgent || "Mozilla/5.0 (compatible; BotpressBot/1.0)";
|
|
42201
|
+
const fetchOptions = {
|
|
42202
|
+
headers: {
|
|
42203
|
+
"User-Agent": userAgent
|
|
42204
|
+
}
|
|
42205
|
+
};
|
|
42206
|
+
if (options?.timeout) {
|
|
42207
|
+
fetchOptions.signal = AbortSignal.timeout(options.timeout);
|
|
42208
|
+
}
|
|
42209
|
+
const response = await fetch(url2, fetchOptions);
|
|
42210
|
+
if (!response.ok) {
|
|
42211
|
+
throw new Error(`Failed to fetch ${url2}: ${response.status} ${response.statusText}`);
|
|
42212
|
+
}
|
|
42213
|
+
const contentType = response.headers.get("content-type") || "text/html";
|
|
42214
|
+
const content = await response.text();
|
|
42215
|
+
const isHtml = contentType.includes("text/html") || contentType.includes("application/xhtml");
|
|
42216
|
+
if (!isHtml) {
|
|
42217
|
+
return {
|
|
42218
|
+
url: url2,
|
|
42219
|
+
contentType,
|
|
42220
|
+
content
|
|
42221
|
+
};
|
|
42222
|
+
}
|
|
42223
|
+
const extracted = extractHtmlMetadata(content);
|
|
42224
|
+
if (extracted.favicon) {
|
|
42225
|
+
extracted.favicon = resolveUrl(extracted.favicon, url2);
|
|
42226
|
+
}
|
|
42227
|
+
return {
|
|
42228
|
+
url: url2,
|
|
42229
|
+
contentType,
|
|
42230
|
+
content,
|
|
42231
|
+
metadata: extracted
|
|
42232
|
+
};
|
|
42233
|
+
}
|
|
42234
|
+
|
|
42146
42235
|
// src/primitives/data-sources/source-website.ts
|
|
42147
42236
|
var State = z10.object({
|
|
42148
42237
|
urls: z10.array(
|
|
@@ -42162,6 +42251,7 @@ var WebsiteSource = class _WebsiteSource extends DataSource {
|
|
|
42162
42251
|
urls;
|
|
42163
42252
|
filterFn;
|
|
42164
42253
|
customFetch;
|
|
42254
|
+
fetchStrategy;
|
|
42165
42255
|
maxPages;
|
|
42166
42256
|
maxDepth;
|
|
42167
42257
|
transformFn;
|
|
@@ -42172,7 +42262,16 @@ var WebsiteSource = class _WebsiteSource extends DataSource {
|
|
|
42172
42262
|
this.sitemapUrl = options.sitemapUrl ?? void 0;
|
|
42173
42263
|
this.urls = options.urls ?? void 0;
|
|
42174
42264
|
this.filterFn = "filter" in options ? options.filter : void 0;
|
|
42175
|
-
|
|
42265
|
+
if (typeof options.fetch === "string") {
|
|
42266
|
+
this.fetchStrategy = options.fetch;
|
|
42267
|
+
this.customFetch = void 0;
|
|
42268
|
+
} else if (typeof options.fetch === "function") {
|
|
42269
|
+
this.customFetch = options.fetch;
|
|
42270
|
+
this.fetchStrategy = "node:fetch";
|
|
42271
|
+
} else {
|
|
42272
|
+
this.fetchStrategy = "node:fetch";
|
|
42273
|
+
this.customFetch = void 0;
|
|
42274
|
+
}
|
|
42176
42275
|
this.maxPages = Math.max(1, Math.min(("maxPages" in options ? options.maxPages : void 0) ?? 5e4, 5e4));
|
|
42177
42276
|
this.maxDepth = Math.max(1, Math.min(("maxDepth" in options ? options.maxDepth : void 0) ?? 20, 20));
|
|
42178
42277
|
}
|
|
@@ -42180,51 +42279,82 @@ var WebsiteSource = class _WebsiteSource extends DataSource {
|
|
|
42180
42279
|
return !!adk.project.integrations.get("browser");
|
|
42181
42280
|
}
|
|
42182
42281
|
/**
|
|
42183
|
-
*
|
|
42282
|
+
* Convert HtmlMetadata to FetchResult metadata format
|
|
42283
|
+
*/
|
|
42284
|
+
convertMetadata(metadata) {
|
|
42285
|
+
const result = {};
|
|
42286
|
+
if (metadata.title) {
|
|
42287
|
+
result[WellKnownMetadata.knowledge.TITLE] = metadata.title;
|
|
42288
|
+
}
|
|
42289
|
+
if (metadata.description) {
|
|
42290
|
+
result[WellKnownMetadata.knowledge.DESCRIPTION] = metadata.description;
|
|
42291
|
+
}
|
|
42292
|
+
if (metadata.favicon) {
|
|
42293
|
+
result[WellKnownMetadata.knowledge.FAVICON] = metadata.favicon;
|
|
42294
|
+
}
|
|
42295
|
+
return result;
|
|
42296
|
+
}
|
|
42297
|
+
/**
|
|
42298
|
+
* Default fetch implementation using Node's built-in fetch
|
|
42299
|
+
*/
|
|
42300
|
+
async defaultFetch(url2) {
|
|
42301
|
+
const result = await fetchHtml(url2, {
|
|
42302
|
+
timeout: 3e4
|
|
42303
|
+
});
|
|
42304
|
+
if (!result.metadata) {
|
|
42305
|
+
return {
|
|
42306
|
+
url: result.url,
|
|
42307
|
+
contentType: result.contentType,
|
|
42308
|
+
content: result.content
|
|
42309
|
+
};
|
|
42310
|
+
}
|
|
42311
|
+
return {
|
|
42312
|
+
url: result.url,
|
|
42313
|
+
contentType: result.contentType,
|
|
42314
|
+
content: result.content,
|
|
42315
|
+
metadata: this.convertMetadata(result.metadata)
|
|
42316
|
+
};
|
|
42317
|
+
}
|
|
42318
|
+
/**
|
|
42319
|
+
* Fetch content from a URL for sitemap parsing (raw content needed)
|
|
42184
42320
|
*/
|
|
42185
42321
|
async fetchSitemap(url2) {
|
|
42186
42322
|
if (this.customFetch) {
|
|
42187
42323
|
try {
|
|
42188
42324
|
return await this.customFetch(url2);
|
|
42189
42325
|
} catch (err) {
|
|
42190
|
-
console.warn(`Custom fetch failed for ${url2}, falling back...`);
|
|
42326
|
+
console.warn(`Custom fetch failed for ${url2}, falling back to ${this.fetchStrategy}...`);
|
|
42191
42327
|
}
|
|
42192
42328
|
}
|
|
42193
|
-
if (
|
|
42194
|
-
|
|
42195
|
-
|
|
42196
|
-
);
|
|
42197
|
-
}
|
|
42198
|
-
const output2 = await adk.project.integrations.get("browser")?.actions.browsePages({
|
|
42199
|
-
urls: [url2],
|
|
42200
|
-
timeout: 3e4,
|
|
42201
|
-
waitFor: 500
|
|
42202
|
-
});
|
|
42203
|
-
const result = output2?.results[0];
|
|
42204
|
-
if (!result || !result.content) {
|
|
42205
|
-
throw new Error(`Failed to fetch content from ${url2}`);
|
|
42329
|
+
if (this.fetchStrategy === "integration:browser") {
|
|
42330
|
+
return this.fetchWithBrowserIntegration(url2, { raw: true });
|
|
42331
|
+
} else {
|
|
42332
|
+
return this.defaultFetch(url2);
|
|
42206
42333
|
}
|
|
42207
|
-
return {
|
|
42208
|
-
url: result.url,
|
|
42209
|
-
contentType: "application/html",
|
|
42210
|
-
content: result.raw
|
|
42211
|
-
};
|
|
42212
42334
|
}
|
|
42213
42335
|
/**
|
|
42214
|
-
* Fetch content from a URL with
|
|
42336
|
+
* Fetch content from a URL for indexing (with metadata extraction)
|
|
42215
42337
|
*/
|
|
42216
42338
|
async fetchUrl(url2) {
|
|
42217
42339
|
if (this.customFetch) {
|
|
42218
42340
|
try {
|
|
42219
42341
|
return await this.customFetch(url2);
|
|
42220
42342
|
} catch (err) {
|
|
42221
|
-
console.warn(`Custom fetch failed for ${url2}, falling back...`);
|
|
42343
|
+
console.warn(`Custom fetch failed for ${url2}, falling back to ${this.fetchStrategy}...`);
|
|
42222
42344
|
}
|
|
42223
42345
|
}
|
|
42346
|
+
if (this.fetchStrategy === "integration:browser") {
|
|
42347
|
+
return this.fetchWithBrowserIntegration(url2, { raw: false });
|
|
42348
|
+
} else {
|
|
42349
|
+
return this.defaultFetch(url2);
|
|
42350
|
+
}
|
|
42351
|
+
}
|
|
42352
|
+
/**
|
|
42353
|
+
* Fetch content using the browser integration
|
|
42354
|
+
*/
|
|
42355
|
+
async fetchWithBrowserIntegration(url2, options) {
|
|
42224
42356
|
if (!this.isBrowserIntegrationAvailable()) {
|
|
42225
|
-
throw new Error(
|
|
42226
|
-
`The 'browser' integration is not installed and is required for crawling website. Please provide a custom fetch function or install the 'browser' integration.`
|
|
42227
|
-
);
|
|
42357
|
+
throw new Error(`The 'browser' integration is not installed. Please install it or use fetch: 'node:fetch'.`);
|
|
42228
42358
|
}
|
|
42229
42359
|
const output2 = await adk.project.integrations.get("browser")?.actions.browsePages({
|
|
42230
42360
|
urls: [url2],
|
|
@@ -42235,6 +42365,13 @@ var WebsiteSource = class _WebsiteSource extends DataSource {
|
|
|
42235
42365
|
if (!result || !result.content) {
|
|
42236
42366
|
throw new Error(`Failed to fetch content from ${url2}`);
|
|
42237
42367
|
}
|
|
42368
|
+
if (options.raw && result.raw) {
|
|
42369
|
+
return {
|
|
42370
|
+
url: result.url,
|
|
42371
|
+
contentType: "application/html",
|
|
42372
|
+
content: result.raw
|
|
42373
|
+
};
|
|
42374
|
+
}
|
|
42238
42375
|
return {
|
|
42239
42376
|
url: result.url,
|
|
42240
42377
|
contentType: "text/markdown",
|