npm - openclaw-smart-fetch - Versions diffs - 0.2.29 → 0.2.30 - Mend

openclaw-smart-fetch 0.2.29 → 0.2.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js CHANGED Viewed

@@ -9607,6 +9607,12 @@ function buildPlainTextResult(opts, finalUrl, rawBody, format, maxChars, browser
     os
   };
 }
+function isTwitterJsDisabledPage(document, url) {
+  if (!/^(https?:\/\/)?(www\.)?(x\.com|twitter\.com)\//i.test(url))
+    return false;
+  const text = document.body?.textContent ?? document.documentElement?.textContent ?? "";
+  return text.includes("JavaScript is disabled") && text.includes("supported browser");
+}
 function extractDomTextFallback(document) {
   const bodyText = document.body?.textContent ?? document.documentElement?.textContent ?? "";
   return bodyText.replace(/\r\n/g, "\n").replace(/\n{3,}/g, "\n\n").split("\n").map((line) => line.trim()).join("\n").replace(/[ \t]{2,}/g, " ").trim();
@@ -10197,15 +10203,47 @@ function createDefuddleFetch(dependencies = runtimeDependencies) {
       });
       const fallbackDocument = parseLinkedomHTML(rawBody, finalUrl);
       const extractionDocument = parseLinkedomHTML(rawBody, finalUrl);
-      const extracted = await dependencies.defuddle(
-        extractionDocument,
-        finalUrl,
-        {
-          markdown: format !== "html",
-          removeImages,
-          includeReplies
+      if (isTwitterJsDisabledPage(fallbackDocument, opts.url)) {
+        return {
+          error: `Server returned HTTP 404 Not Found for ${opts.url}.`,
+          code: "http_error",
+          phase: "loading",
+          retryable: false,
+          timeoutMs,
+          url: opts.url,
+          finalUrl,
+          statusCode: 404,
+          statusText: "Not Found",
+          mimeType: normalizeContentType(contentType) || void 0,
+          contentLength: errorContext.contentLength
+        };
+      }
+      let extracted;
+      try {
+        const origConsoleError = console.error;
+        const suppressedErrors = [];
+        console.error = (...args) => {
+          suppressedErrors.push(args);
+        };
+        try {
+          extracted = await dependencies.defuddle(
+            extractionDocument,
+            finalUrl,
+            {
+              markdown: format !== "html",
+              removeImages,
+              includeReplies
+            }
+          );
+        } finally {
+          console.error = origConsoleError;
         }
-      );
+      } catch (_error) {
+        extracted = {
+          content: void 0,
+          wordCount: 0
+        };
+      }
       let extractedContent = extracted.content;
       let wordCount = extracted.wordCount;
       if (!extractedContent || wordCount === 0) {