@minded-ai/mindedjs 1.0.0-ec2-beta-5 → 1.0.0-ec2-beta-7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,8 @@
1
1
  """
2
2
  Browser Task Executor with Screenshot Capture Support
3
3
 
4
- This script runs browser automation tasks using browser-use and automatically captures
5
- screenshots at the end of each step, uploading them to S3.
6
-
7
- Environment Variables for Screenshots:
8
- - SCREENSHOT_S3_BUCKET: S3 bucket name (default: 'global-development-agentsforce')
9
- - SCREENSHOT_S3_PREFIX: S3 key prefix (default: 'browser-use-runs-screenshots/')
10
- - AWS_REGION: AWS region for S3 (default: 'us-east-1')
11
-
12
- AWS credentials should be configured via standard AWS SDK methods.
4
+ This script runs browser automation tasks using browser-use and can capture
5
+ screenshots at the end of each step, uploading them to S3 when configured.
13
6
  """
14
7
 
15
8
  import asyncio
@@ -22,6 +15,7 @@ from browser_use.llm import ChatOpenAI
22
15
  import os
23
16
  import sys
24
17
  import logging
18
+ import base64
25
19
  from datetime import datetime
26
20
  from pathlib import Path
27
21
  from dotenv import load_dotenv
@@ -89,22 +83,30 @@ class ScreenshotCapture:
89
83
  # Get current page
90
84
  page = await agent.browser_session.get_current_page()
91
85
 
92
- # Get current URL for context - use page.url() method or fallback
86
+ # Get current URL for logging (browser-use might use method instead of property)
93
87
  try:
94
- # Try as property first
95
88
  current_url = page.url if hasattr(page, 'url') else page.url()
96
89
  except:
97
- # Fallback if URL is not accessible
98
90
  current_url = "unknown"
99
91
 
100
- logger.info(f"📸 Taking step_end screenshot at: {current_url}")
92
+ logger.info(f"📸 Taking screenshot at: {current_url}")
101
93
 
102
94
  # Update step counter
103
95
  step_number = self.step_counter
104
96
  self.step_counter += 1
105
97
 
106
98
  # Take screenshot - browser-use takes viewport by default
107
- screenshot_bytes = await page.screenshot()
99
+ screenshot_data = await page.screenshot()
100
+
101
+ # Ensure we have bytes - browser-use might return base64 string or bytes
102
+ if isinstance(screenshot_data, bytes):
103
+ screenshot_bytes = screenshot_data
104
+ elif isinstance(screenshot_data, str):
105
+ # If it's base64 encoded string, decode it
106
+ screenshot_bytes = base64.b64decode(screenshot_data)
107
+ else:
108
+ # Try to get bytes from whatever format it is
109
+ screenshot_bytes = bytes(screenshot_data)
108
110
 
109
111
  # Generate filename with metadata
110
112
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
@@ -114,7 +116,12 @@ class ScreenshotCapture:
114
116
  s3_key = f"{self.s3_prefix}{filename}"
115
117
 
116
118
  try:
117
- # Upload with KMS encryption if required by bucket
119
+ # Verify we have valid data before uploading
120
+ if not screenshot_bytes or len(screenshot_bytes) == 0:
121
+ logger.error(f"❌ Screenshot bytes are empty!")
122
+ return
123
+
124
+ # Upload to S3 with KMS encryption
118
125
  self.s3_client.put_object(
119
126
  Bucket=self.s3_bucket,
120
127
  Key=s3_key,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@minded-ai/mindedjs",
3
- "version": "1.0.0-ec2-beta-5",
3
+ "version": "1.0.0-ec2-beta-7",
4
4
  "description": "MindedJS is a TypeScript library for building agents.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -1,15 +1,8 @@
1
1
  """
2
2
  Browser Task Executor with Screenshot Capture Support
3
3
 
4
- This script runs browser automation tasks using browser-use and automatically captures
5
- screenshots at the end of each step, uploading them to S3.
6
-
7
- Environment Variables for Screenshots:
8
- - SCREENSHOT_S3_BUCKET: S3 bucket name (default: 'global-development-agentsforce')
9
- - SCREENSHOT_S3_PREFIX: S3 key prefix (default: 'browser-use-runs-screenshots/')
10
- - AWS_REGION: AWS region for S3 (default: 'us-east-1')
11
-
12
- AWS credentials should be configured via standard AWS SDK methods.
4
+ This script runs browser automation tasks using browser-use and can capture
5
+ screenshots at the end of each step, uploading them to S3 when configured.
13
6
  """
14
7
 
15
8
  import asyncio
@@ -22,6 +15,7 @@ from browser_use.llm import ChatOpenAI
22
15
  import os
23
16
  import sys
24
17
  import logging
18
+ import base64
25
19
  from datetime import datetime
26
20
  from pathlib import Path
27
21
  from dotenv import load_dotenv
@@ -89,22 +83,30 @@ class ScreenshotCapture:
89
83
  # Get current page
90
84
  page = await agent.browser_session.get_current_page()
91
85
 
92
- # Get current URL for context - use page.url() method or fallback
86
+ # Get current URL for logging (browser-use might use method instead of property)
93
87
  try:
94
- # Try as property first
95
88
  current_url = page.url if hasattr(page, 'url') else page.url()
96
89
  except:
97
- # Fallback if URL is not accessible
98
90
  current_url = "unknown"
99
91
 
100
- logger.info(f"📸 Taking step_end screenshot at: {current_url}")
92
+ logger.info(f"📸 Taking screenshot at: {current_url}")
101
93
 
102
94
  # Update step counter
103
95
  step_number = self.step_counter
104
96
  self.step_counter += 1
105
97
 
106
98
  # Take screenshot - browser-use takes viewport by default
107
- screenshot_bytes = await page.screenshot()
99
+ screenshot_data = await page.screenshot()
100
+
101
+ # Ensure we have bytes - browser-use might return base64 string or bytes
102
+ if isinstance(screenshot_data, bytes):
103
+ screenshot_bytes = screenshot_data
104
+ elif isinstance(screenshot_data, str):
105
+ # If it's base64 encoded string, decode it
106
+ screenshot_bytes = base64.b64decode(screenshot_data)
107
+ else:
108
+ # Try to get bytes from whatever format it is
109
+ screenshot_bytes = bytes(screenshot_data)
108
110
 
109
111
  # Generate filename with metadata
110
112
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
@@ -114,7 +116,12 @@ class ScreenshotCapture:
114
116
  s3_key = f"{self.s3_prefix}{filename}"
115
117
 
116
118
  try:
117
- # Upload with KMS encryption if required by bucket
119
+ # Verify we have valid data before uploading
120
+ if not screenshot_bytes or len(screenshot_bytes) == 0:
121
+ logger.error(f"❌ Screenshot bytes are empty!")
122
+ return
123
+
124
+ # Upload to S3 with KMS encryption
118
125
  self.s3_client.put_object(
119
126
  Bucket=self.s3_bucket,
120
127
  Key=s3_key,