@botpress/runtime 1.6.5 → 1.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,7 +48,7 @@ var init_define_BUILD = __esm({
48
48
  var define_PACKAGE_VERSIONS_default;
49
49
  var init_define_PACKAGE_VERSIONS = __esm({
50
50
  "<define:__PACKAGE_VERSIONS__>"() {
51
- define_PACKAGE_VERSIONS_default = { runtime: "1.6.5", adk: "not-installed", sdk: "4.17.3", llmz: "0.0.27", zai: "2.4.0", cognitive: "0.2.0" };
51
+ define_PACKAGE_VERSIONS_default = { runtime: "1.6.7", adk: "not-installed", sdk: "4.17.3", llmz: "0.0.27", zai: "2.4.0", cognitive: "0.2.0" };
52
52
  }
53
53
  });
54
54
 
@@ -11623,7 +11623,7 @@ var require_follow_redirects = __commonJS({
11623
11623
  var currentUrlParts = parseUrl(this._currentUrl);
11624
11624
  var currentHost = currentHostHeader || currentUrlParts.host;
11625
11625
  var currentUrl = /^\w+:/.test(location) ? this._currentUrl : url2.format(Object.assign(currentUrlParts, { host: currentHost }));
11626
- var redirectUrl = resolveUrl(location, currentUrl);
11626
+ var redirectUrl = resolveUrl2(location, currentUrl);
11627
11627
  debug("redirecting to", redirectUrl.href);
11628
11628
  this._isRedirect = true;
11629
11629
  spreadUrlObject(redirectUrl, this._options);
@@ -11707,7 +11707,7 @@ var require_follow_redirects = __commonJS({
11707
11707
  }
11708
11708
  return parsed;
11709
11709
  }
11710
- function resolveUrl(relative, base) {
11710
+ function resolveUrl2(relative, base) {
11711
11711
  return useNativeURL ? new URL2(relative, base) : parseUrl(url2.resolve(base, relative));
11712
11712
  }
11713
11713
  function validateUrl(input) {
@@ -42143,6 +42143,95 @@ var XMLParser = class {
42143
42143
  }
42144
42144
  };
42145
42145
 
42146
+ // src/primitives/data-sources/html-fetch.ts
42147
+ init_define_BUILD();
42148
+ init_define_PACKAGE_VERSIONS();
42149
+ function extractHtmlMetadata(html) {
42150
+ const metadata = {};
42151
+ const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
42152
+ if (titleMatch && titleMatch[1]) {
42153
+ metadata.title = titleMatch[1].trim();
42154
+ }
42155
+ let descriptionMatch = html.match(
42156
+ /<meta\s+(?:name|property)=["'](?:description|og:description)["']\s+content="([^"]+)"/i
42157
+ );
42158
+ if (!descriptionMatch) {
42159
+ descriptionMatch = html.match(
42160
+ /<meta\s+(?:name|property)=["'](?:description|og:description)["']\s+content='([^']+)'/i
42161
+ );
42162
+ }
42163
+ if (descriptionMatch && descriptionMatch[1]) {
42164
+ metadata.description = descriptionMatch[1].trim();
42165
+ }
42166
+ const faviconPatterns = [
42167
+ // rel first, double quotes
42168
+ /<link\s+[^>]*rel="(?:icon|shortcut icon|apple-touch-icon)"[^>]*href="([^"]+)"/i,
42169
+ // rel first, single quotes
42170
+ /<link\s+[^>]*rel='(?:icon|shortcut icon|apple-touch-icon)'[^>]*href='([^']+)'/i,
42171
+ // href first, double quotes
42172
+ /<link\s+[^>]*href="([^"]+)"[^>]*rel="(?:icon|shortcut icon|apple-touch-icon)"/i,
42173
+ // href first, single quotes
42174
+ /<link\s+[^>]*href='([^']+)'[^>]*rel='(?:icon|shortcut icon|apple-touch-icon)'/i
42175
+ ];
42176
+ for (const pattern of faviconPatterns) {
42177
+ const faviconMatch = html.match(pattern);
42178
+ if (faviconMatch && faviconMatch[1]) {
42179
+ metadata.favicon = faviconMatch[1].trim();
42180
+ break;
42181
+ }
42182
+ }
42183
+ if (!metadata.favicon) {
42184
+ metadata.favicon = "/favicon.ico";
42185
+ }
42186
+ return metadata;
42187
+ }
42188
+ function resolveUrl(url2, baseUrl) {
42189
+ if (url2.startsWith("http://") || url2.startsWith("https://")) {
42190
+ return url2;
42191
+ }
42192
+ try {
42193
+ const base = new URL(baseUrl);
42194
+ return new URL(url2, base.origin).href;
42195
+ } catch {
42196
+ return url2;
42197
+ }
42198
+ }
42199
+ async function fetchHtml(url2, options) {
42200
+ const userAgent = options?.userAgent || "Mozilla/5.0 (compatible; BotpressBot/1.0)";
42201
+ const fetchOptions = {
42202
+ headers: {
42203
+ "User-Agent": userAgent
42204
+ }
42205
+ };
42206
+ if (options?.timeout) {
42207
+ fetchOptions.signal = AbortSignal.timeout(options.timeout);
42208
+ }
42209
+ const response = await fetch(url2, fetchOptions);
42210
+ if (!response.ok) {
42211
+ throw new Error(`Failed to fetch ${url2}: ${response.status} ${response.statusText}`);
42212
+ }
42213
+ const contentType = response.headers.get("content-type") || "text/html";
42214
+ const content = await response.text();
42215
+ const isHtml = contentType.includes("text/html") || contentType.includes("application/xhtml");
42216
+ if (!isHtml) {
42217
+ return {
42218
+ url: url2,
42219
+ contentType,
42220
+ content
42221
+ };
42222
+ }
42223
+ const extracted = extractHtmlMetadata(content);
42224
+ if (extracted.favicon) {
42225
+ extracted.favicon = resolveUrl(extracted.favicon, url2);
42226
+ }
42227
+ return {
42228
+ url: url2,
42229
+ contentType,
42230
+ content,
42231
+ metadata: extracted
42232
+ };
42233
+ }
42234
+
42146
42235
  // src/primitives/data-sources/source-website.ts
42147
42236
  var State = z10.object({
42148
42237
  urls: z10.array(
@@ -42162,6 +42251,7 @@ var WebsiteSource = class _WebsiteSource extends DataSource {
42162
42251
  urls;
42163
42252
  filterFn;
42164
42253
  customFetch;
42254
+ fetchStrategy;
42165
42255
  maxPages;
42166
42256
  maxDepth;
42167
42257
  transformFn;
@@ -42172,7 +42262,16 @@ var WebsiteSource = class _WebsiteSource extends DataSource {
42172
42262
  this.sitemapUrl = options.sitemapUrl ?? void 0;
42173
42263
  this.urls = options.urls ?? void 0;
42174
42264
  this.filterFn = "filter" in options ? options.filter : void 0;
42175
- this.customFetch = options.fetch ?? void 0;
42265
+ if (typeof options.fetch === "string") {
42266
+ this.fetchStrategy = options.fetch;
42267
+ this.customFetch = void 0;
42268
+ } else if (typeof options.fetch === "function") {
42269
+ this.customFetch = options.fetch;
42270
+ this.fetchStrategy = "node:fetch";
42271
+ } else {
42272
+ this.fetchStrategy = "node:fetch";
42273
+ this.customFetch = void 0;
42274
+ }
42176
42275
  this.maxPages = Math.max(1, Math.min(("maxPages" in options ? options.maxPages : void 0) ?? 5e4, 5e4));
42177
42276
  this.maxDepth = Math.max(1, Math.min(("maxDepth" in options ? options.maxDepth : void 0) ?? 20, 20));
42178
42277
  }
@@ -42180,51 +42279,82 @@ var WebsiteSource = class _WebsiteSource extends DataSource {
42180
42279
  return !!adk.project.integrations.get("browser");
42181
42280
  }
42182
42281
  /**
42183
- * Fetch content from a URL with fallback strategy
42282
+ * Convert HtmlMetadata to FetchResult metadata format
42283
+ */
42284
+ convertMetadata(metadata) {
42285
+ const result = {};
42286
+ if (metadata.title) {
42287
+ result[WellKnownMetadata.knowledge.TITLE] = metadata.title;
42288
+ }
42289
+ if (metadata.description) {
42290
+ result[WellKnownMetadata.knowledge.DESCRIPTION] = metadata.description;
42291
+ }
42292
+ if (metadata.favicon) {
42293
+ result[WellKnownMetadata.knowledge.FAVICON] = metadata.favicon;
42294
+ }
42295
+ return result;
42296
+ }
42297
+ /**
42298
+ * Default fetch implementation using Node's built-in fetch
42299
+ */
42300
+ async defaultFetch(url2) {
42301
+ const result = await fetchHtml(url2, {
42302
+ timeout: 3e4
42303
+ });
42304
+ if (!result.metadata) {
42305
+ return {
42306
+ url: result.url,
42307
+ contentType: result.contentType,
42308
+ content: result.content
42309
+ };
42310
+ }
42311
+ return {
42312
+ url: result.url,
42313
+ contentType: result.contentType,
42314
+ content: result.content,
42315
+ metadata: this.convertMetadata(result.metadata)
42316
+ };
42317
+ }
42318
+ /**
42319
+ * Fetch content from a URL for sitemap parsing (raw content needed)
42184
42320
  */
42185
42321
  async fetchSitemap(url2) {
42186
42322
  if (this.customFetch) {
42187
42323
  try {
42188
42324
  return await this.customFetch(url2);
42189
42325
  } catch (err) {
42190
- console.warn(`Custom fetch failed for ${url2}, falling back...`);
42326
+ console.warn(`Custom fetch failed for ${url2}, falling back to ${this.fetchStrategy}...`);
42191
42327
  }
42192
42328
  }
42193
- if (!this.isBrowserIntegrationAvailable()) {
42194
- throw new Error(
42195
- `The 'browser' integration is not installed and is required for crawling website. Please provide a custom fetch function or install the 'browser' integration.`
42196
- );
42197
- }
42198
- const output2 = await adk.project.integrations.get("browser")?.actions.browsePages({
42199
- urls: [url2],
42200
- timeout: 3e4,
42201
- waitFor: 500
42202
- });
42203
- const result = output2?.results[0];
42204
- if (!result || !result.content) {
42205
- throw new Error(`Failed to fetch content from ${url2}`);
42329
+ if (this.fetchStrategy === "integration:browser") {
42330
+ return this.fetchWithBrowserIntegration(url2, { raw: true });
42331
+ } else {
42332
+ return this.defaultFetch(url2);
42206
42333
  }
42207
- return {
42208
- url: result.url,
42209
- contentType: "application/html",
42210
- content: result.raw
42211
- };
42212
42334
  }
42213
42335
  /**
42214
- * Fetch content from a URL with fallback strategy
42336
+ * Fetch content from a URL for indexing (with metadata extraction)
42215
42337
  */
42216
42338
  async fetchUrl(url2) {
42217
42339
  if (this.customFetch) {
42218
42340
  try {
42219
42341
  return await this.customFetch(url2);
42220
42342
  } catch (err) {
42221
- console.warn(`Custom fetch failed for ${url2}, falling back...`);
42343
+ console.warn(`Custom fetch failed for ${url2}, falling back to ${this.fetchStrategy}...`);
42222
42344
  }
42223
42345
  }
42346
+ if (this.fetchStrategy === "integration:browser") {
42347
+ return this.fetchWithBrowserIntegration(url2, { raw: false });
42348
+ } else {
42349
+ return this.defaultFetch(url2);
42350
+ }
42351
+ }
42352
+ /**
42353
+ * Fetch content using the browser integration
42354
+ */
42355
+ async fetchWithBrowserIntegration(url2, options) {
42224
42356
  if (!this.isBrowserIntegrationAvailable()) {
42225
- throw new Error(
42226
- `The 'browser' integration is not installed and is required for crawling website. Please provide a custom fetch function or install the 'browser' integration.`
42227
- );
42357
+ throw new Error(`The 'browser' integration is not installed. Please install it or use fetch: 'node:fetch'.`);
42228
42358
  }
42229
42359
  const output2 = await adk.project.integrations.get("browser")?.actions.browsePages({
42230
42360
  urls: [url2],
@@ -42235,6 +42365,13 @@ var WebsiteSource = class _WebsiteSource extends DataSource {
42235
42365
  if (!result || !result.content) {
42236
42366
  throw new Error(`Failed to fetch content from ${url2}`);
42237
42367
  }
42368
+ if (options.raw && result.raw) {
42369
+ return {
42370
+ url: result.url,
42371
+ contentType: "application/html",
42372
+ content: result.raw
42373
+ };
42374
+ }
42238
42375
  return {
42239
42376
  url: result.url,
42240
42377
  contentType: "text/markdown",
@@ -44390,7 +44527,7 @@ var adk = {
44390
44527
  get zai() {
44391
44528
  return new Zai({
44392
44529
  client: context2.get("cognitive"),
44393
- modelId: Array.isArray(adk.project.config.defaultModels.autonomous) ? adk.project.config.defaultModels.autonomous[0] ?? "auto" : adk.project.config.defaultModels.autonomous
44530
+ modelId: Array.isArray(adk.project.config.defaultModels.zai) ? adk.project.config.defaultModels.zai[0] ?? "auto" : adk.project.config.defaultModels.zai
44394
44531
  });
44395
44532
  },
44396
44533
  get project() {