npm - @minded-ai/mindedjs - Versions diffs - 1.0.0-ec2-beta-5 → 1.0.0-ec2-beta-7 - Mend

@minded-ai/mindedjs 1.0.0-ec2-beta-5 → 1.0.0-ec2-beta-7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/browserTask/executeBrowserTask.py +22 -15
package/package.json +1 -1
package/src/browserTask/executeBrowserTask.py +22 -15

package/dist/browserTask/executeBrowserTask.py CHANGED Viewed

@@ -1,15 +1,8 @@
 """
 Browser Task Executor with Screenshot Capture Support
-This script runs browser automation tasks using browser-use and automatically captures
-screenshots at the end of each step, uploading them to S3.
-Environment Variables for Screenshots:
-- SCREENSHOT_S3_BUCKET: S3 bucket name (default: 'global-development-agentsforce')
-- SCREENSHOT_S3_PREFIX: S3 key prefix (default: 'browser-use-runs-screenshots/')
-- AWS_REGION: AWS region for S3 (default: 'us-east-1')
-AWS credentials should be configured via standard AWS SDK methods.
+This script runs browser automation tasks using browser-use and can capture
+screenshots at the end of each step, uploading them to S3 when configured.
 """
 import asyncio
@@ -22,6 +15,7 @@ from browser_use.llm import ChatOpenAI
 import os
 import sys
 import logging
+import base64
 from datetime import datetime
 from pathlib import Path
 from dotenv import load_dotenv
@@ -89,22 +83,30 @@ class ScreenshotCapture:
             # Get current page
             page = await agent.browser_session.get_current_page()
-            # Get current URL for context - use page.url() method or fallback
+            # Get current URL for logging (browser-use might use method instead of property)
             try:
-                # Try as property first
                 current_url = page.url if hasattr(page, 'url') else page.url()
             except:
-                # Fallback if URL is not accessible
                 current_url = "unknown"
-            logger.info(f"📸 Taking step_end screenshot at: {current_url}")
+            logger.info(f"📸 Taking screenshot at: {current_url}")
             # Update step counter
             step_number = self.step_counter
             self.step_counter += 1
             # Take screenshot - browser-use takes viewport by default
-            screenshot_bytes = await page.screenshot()
+            screenshot_data = await page.screenshot()
+            # Ensure we have bytes - browser-use might return base64 string or bytes
+            if isinstance(screenshot_data, bytes):
+                screenshot_bytes = screenshot_data
+            elif isinstance(screenshot_data, str):
+                # If it's base64 encoded string, decode it
+                screenshot_bytes = base64.b64decode(screenshot_data)
+            else:
+                # Try to get bytes from whatever format it is
+                screenshot_bytes = bytes(screenshot_data)
             # Generate filename with metadata
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
@@ -114,7 +116,12 @@ class ScreenshotCapture:
             s3_key = f"{self.s3_prefix}{filename}"
             try:
-                # Upload with KMS encryption if required by bucket
+                # Verify we have valid data before uploading
+                if not screenshot_bytes or len(screenshot_bytes) == 0:
+                    logger.error(f"❌ Screenshot bytes are empty!")
+                    return
+                # Upload to S3 with KMS encryption
                 self.s3_client.put_object(
                     Bucket=self.s3_bucket,
                     Key=s3_key,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@minded-ai/mindedjs",
-  "version": "1.0.0-ec2-beta-5",
+  "version": "1.0.0-ec2-beta-7",
   "description": "MindedJS is a TypeScript library for building agents.",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",

package/src/browserTask/executeBrowserTask.py CHANGED Viewed

@@ -1,15 +1,8 @@
 """
 Browser Task Executor with Screenshot Capture Support
-This script runs browser automation tasks using browser-use and automatically captures
-screenshots at the end of each step, uploading them to S3.
-Environment Variables for Screenshots:
-- SCREENSHOT_S3_BUCKET: S3 bucket name (default: 'global-development-agentsforce')
-- SCREENSHOT_S3_PREFIX: S3 key prefix (default: 'browser-use-runs-screenshots/')
-- AWS_REGION: AWS region for S3 (default: 'us-east-1')
-AWS credentials should be configured via standard AWS SDK methods.
+This script runs browser automation tasks using browser-use and can capture
+screenshots at the end of each step, uploading them to S3 when configured.
 """
 import asyncio
@@ -22,6 +15,7 @@ from browser_use.llm import ChatOpenAI
 import os
 import sys
 import logging
+import base64
 from datetime import datetime
 from pathlib import Path
 from dotenv import load_dotenv
@@ -89,22 +83,30 @@ class ScreenshotCapture:
             # Get current page
             page = await agent.browser_session.get_current_page()
-            # Get current URL for context - use page.url() method or fallback
+            # Get current URL for logging (browser-use might use method instead of property)
             try:
-                # Try as property first
                 current_url = page.url if hasattr(page, 'url') else page.url()
             except:
-                # Fallback if URL is not accessible
                 current_url = "unknown"
-            logger.info(f"📸 Taking step_end screenshot at: {current_url}")
+            logger.info(f"📸 Taking screenshot at: {current_url}")
             # Update step counter
             step_number = self.step_counter
             self.step_counter += 1
             # Take screenshot - browser-use takes viewport by default
-            screenshot_bytes = await page.screenshot()
+            screenshot_data = await page.screenshot()
+            # Ensure we have bytes - browser-use might return base64 string or bytes
+            if isinstance(screenshot_data, bytes):
+                screenshot_bytes = screenshot_data
+            elif isinstance(screenshot_data, str):
+                # If it's base64 encoded string, decode it
+                screenshot_bytes = base64.b64decode(screenshot_data)
+            else:
+                # Try to get bytes from whatever format it is
+                screenshot_bytes = bytes(screenshot_data)
             # Generate filename with metadata
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
@@ -114,7 +116,12 @@ class ScreenshotCapture:
             s3_key = f"{self.s3_prefix}{filename}"
             try:
-                # Upload with KMS encryption if required by bucket
+                # Verify we have valid data before uploading
+                if not screenshot_bytes or len(screenshot_bytes) == 0:
+                    logger.error(f"❌ Screenshot bytes are empty!")
+                    return
+                # Upload to S3 with KMS encryption
                 self.s3_client.put_object(
                     Bucket=self.s3_bucket,
                     Key=s3_key,