openclaw-smart-fetch 0.2.33 โ†’ 0.2.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,6 +8,7 @@
8
8
  - ๐Ÿงน **Defuddle extraction** โ€” clean readable content instead of noisy HTML
9
9
  - ๐Ÿง  **Useful metadata** โ€” title, author, site, language, published date when available
10
10
  - ๐Ÿ“ฆ **Downloads + large file support** โ€” stream attachments and binaries to temp files
11
+ - ๐Ÿ” **Client-side `<meta>` redirects** โ€” follows sane meta refresh redirects with loop limits
11
12
  - โšก **Batch fetch** โ€” fetch many URLs with bounded concurrency
12
13
  - ๐Ÿ“ **Multiple output formats** โ€” `markdown`, `html`, `text`, `json`
13
14
  - ๐Ÿ”„ **Built-in `web_fetch` fallback** โ€” automatically improves the core web_fetch tool
package/dist/index.js CHANGED
@@ -9404,6 +9404,7 @@ var HTML_CONTENT_TYPES = [
9404
9404
  "text/plain",
9405
9405
  "text/markdown"
9406
9406
  ];
9407
+ var MAX_CLIENT_SIDE_REDIRECTS = 5;
9407
9408
  function normalizeContentType(contentType) {
9408
9409
  return contentType.split(";")[0]?.trim().toLowerCase() ?? "";
9409
9410
  }
@@ -9902,6 +9903,32 @@ function isLikelyJsonBody(body) {
9902
9903
  function isJsonResponse(contentType, body) {
9903
9904
  return isJsonContentType(contentType) || isLikelyJsonBody(body);
9904
9905
  }
9906
+ function decodeHtmlAttribute(value) {
9907
+ return value.replace(/&amp;/gi, "&").replace(/&quot;/gi, '"').replace(/&#39;|&apos;/gi, "'").replace(/&lt;/gi, "<").replace(/&gt;/gi, ">");
9908
+ }
9909
+ function extractClientSideRedirect(body, baseUrl) {
9910
+ const snippet = body.slice(0, 4096);
9911
+ const metaRefreshMatch = snippet.match(
9912
+ /<meta\b[^>]*http-equiv=["']?refresh["']?[^>]*content=["']?([^"'>]*)["']?[^>]*>/i
9913
+ );
9914
+ const refreshContent = metaRefreshMatch?.[1];
9915
+ if (!refreshContent) {
9916
+ return null;
9917
+ }
9918
+ const [delayPart = "", ...rest] = decodeHtmlAttribute(refreshContent).split(";");
9919
+ const delaySeconds = Number.parseFloat(delayPart.trim());
9920
+ const urlMatch = rest.join(";").match(/\burl\s*=\s*(.+)$/i);
9921
+ const rawTarget = urlMatch?.[1]?.trim().replace(/^['"]|['"]$/g, "");
9922
+ if (!rawTarget || !Number.isFinite(delaySeconds) || delaySeconds < 0 || delaySeconds >= 30) {
9923
+ return null;
9924
+ }
9925
+ try {
9926
+ const targetUrl = new URL(rawTarget, baseUrl).toString();
9927
+ return targetUrl === baseUrl ? null : targetUrl;
9928
+ } catch {
9929
+ return null;
9930
+ }
9931
+ }
9905
9932
  function buildJsonResult(opts, finalUrl, rawBody, format, maxChars, browser, os) {
9906
9933
  const parsedJson = parseAndFormatJson(rawBody);
9907
9934
  if ("error" in parsedJson) {
@@ -9981,7 +10008,7 @@ function shouldStripReplies(site) {
9981
10008
  return site === "Hacker News" || site.startsWith("r/") || site.startsWith("GitHub - ");
9982
10009
  }
9983
10010
  function createDefuddleFetch(dependencies = runtimeDependencies) {
9984
- return async function defuddleFetch2(opts, hooks = {}) {
10011
+ async function fetchWithClientRedirects(opts, hooks, clientSideRedirectCount) {
9985
10012
  const browser = opts.browser ?? DEFAULT_BROWSER;
9986
10013
  const os = opts.os ?? DEFAULT_OS;
9987
10014
  const format = opts.format ?? "markdown";
@@ -10109,6 +10136,27 @@ function createDefuddleFetch(dependencies = runtimeDependencies) {
10109
10136
  }
10110
10137
  errorContext.phase = "loading";
10111
10138
  const rawBody = await response.text();
10139
+ const clientSideRedirect = extractClientSideRedirect(rawBody, finalUrl);
10140
+ if (clientSideRedirect) {
10141
+ if (clientSideRedirectCount >= MAX_CLIENT_SIDE_REDIRECTS) {
10142
+ return {
10143
+ error: `Client-side redirect limit (${MAX_CLIENT_SIDE_REDIRECTS}) exceeded while fetching ${opts.url}.`,
10144
+ code: "too_many_redirects",
10145
+ phase: "loading",
10146
+ retryable: false,
10147
+ timeoutMs,
10148
+ url: opts.url,
10149
+ finalUrl,
10150
+ mimeType: normalizeContentType(contentType) || void 0,
10151
+ contentLength: errorContext.contentLength
10152
+ };
10153
+ }
10154
+ return fetchWithClientRedirects(
10155
+ { ...opts, url: clientSideRedirect },
10156
+ hooks,
10157
+ clientSideRedirectCount + 1
10158
+ );
10159
+ }
10112
10160
  const jsonResponse = isJsonResponse(contentType, rawBody);
10113
10161
  if (format === "json") {
10114
10162
  if (!jsonResponse) {
@@ -10314,6 +10362,9 @@ function createDefuddleFetch(dependencies = runtimeDependencies) {
10314
10362
  emitProgress(hooks, { status: "error", progress: 1, phase: "error" });
10315
10363
  return fetchError;
10316
10364
  }
10365
+ }
10366
+ return function defuddleFetch2(opts, hooks = {}) {
10367
+ return fetchWithClientRedirects(opts, hooks, 0);
10317
10368
  };
10318
10369
  }
10319
10370
  var defuddleFetch = createDefuddleFetch();