@minded-ai/mindedjs 1.0.0-ec2-beta-6 → 1.0.0-ec2-beta-8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,15 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Browser Task Executor with Screenshot Capture Support
|
|
3
3
|
|
|
4
|
-
This script runs browser automation tasks using browser-use and
|
|
5
|
-
screenshots at the end of each step, uploading them to S3.
|
|
6
|
-
|
|
7
|
-
Environment Variables for Screenshots:
|
|
8
|
-
- SCREENSHOT_S3_BUCKET: S3 bucket name (default: 'global-development-agentsforce')
|
|
9
|
-
- SCREENSHOT_S3_PREFIX: S3 key prefix (default: 'browser-use-runs-screenshots/')
|
|
10
|
-
- AWS_REGION: AWS region for S3 (default: 'us-east-1')
|
|
11
|
-
|
|
12
|
-
AWS credentials should be configured via standard AWS SDK methods.
|
|
4
|
+
This script runs browser automation tasks using browser-use and can capture
|
|
5
|
+
screenshots at the end of each step, uploading them to S3 when configured.
|
|
13
6
|
"""
|
|
14
7
|
|
|
15
8
|
import asyncio
|
|
@@ -81,8 +74,10 @@ class ScreenshotCapture:
|
|
|
81
74
|
# Track step counter
|
|
82
75
|
self.step_counter = 0
|
|
83
76
|
|
|
84
|
-
logger.info(f"📸 Screenshot capture
|
|
85
|
-
logger.info(f"
|
|
77
|
+
logger.info(f"📸 Screenshot capture ENABLED - will capture at end of each step")
|
|
78
|
+
logger.info(f" S3 destination: s3://{self.s3_bucket}/{self.s3_prefix}")
|
|
79
|
+
logger.info(f" Session ID: {self.session_id}")
|
|
80
|
+
logger.info(f" AWS Region: {self.aws_region}")
|
|
86
81
|
|
|
87
82
|
async def capture_screenshot(self, agent: Any) -> None:
|
|
88
83
|
"""Capture screenshot at step end and upload to S3"""
|
|
@@ -90,62 +85,45 @@ class ScreenshotCapture:
|
|
|
90
85
|
# Get current page
|
|
91
86
|
page = await agent.browser_session.get_current_page()
|
|
92
87
|
|
|
93
|
-
# Get current URL for
|
|
88
|
+
# Get current URL for logging (browser-use might use method instead of property)
|
|
94
89
|
try:
|
|
95
|
-
# Try as property first
|
|
96
90
|
current_url = page.url if hasattr(page, 'url') else page.url()
|
|
97
91
|
except:
|
|
98
|
-
# Fallback if URL is not accessible
|
|
99
92
|
current_url = "unknown"
|
|
100
93
|
|
|
101
|
-
logger.info(f"📸 Taking step_end screenshot at: {current_url}")
|
|
102
|
-
|
|
103
94
|
# Update step counter
|
|
104
95
|
step_number = self.step_counter
|
|
105
96
|
self.step_counter += 1
|
|
106
97
|
|
|
98
|
+
logger.info(f"📸 Capturing screenshot #{step_number} at: {current_url}")
|
|
99
|
+
|
|
107
100
|
# Take screenshot - browser-use takes viewport by default
|
|
108
101
|
screenshot_data = await page.screenshot()
|
|
109
102
|
|
|
110
|
-
# Ensure we have bytes - browser-use might return
|
|
103
|
+
# Ensure we have bytes - browser-use might return base64 string or bytes
|
|
111
104
|
if isinstance(screenshot_data, bytes):
|
|
112
105
|
screenshot_bytes = screenshot_data
|
|
113
106
|
elif isinstance(screenshot_data, str):
|
|
114
|
-
# If it's base64 encoded string
|
|
107
|
+
# If it's base64 encoded string, decode it
|
|
115
108
|
screenshot_bytes = base64.b64decode(screenshot_data)
|
|
116
109
|
else:
|
|
117
110
|
# Try to get bytes from whatever format it is
|
|
118
111
|
screenshot_bytes = bytes(screenshot_data)
|
|
119
112
|
|
|
120
|
-
logger.debug(f"Screenshot data type: {type(screenshot_data)}, size: {len(screenshot_bytes)} bytes")
|
|
121
|
-
|
|
122
113
|
# Generate filename with metadata
|
|
123
114
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
|
|
124
115
|
filename = f"{self.session_id}/step_{step_number:04d}_{timestamp}.png"
|
|
125
116
|
|
|
126
|
-
# Optional: Save to temp file for debugging (like Lambda does)
|
|
127
|
-
if os.getenv('DEBUG_SCREENSHOTS') == 'true':
|
|
128
|
-
temp_dir = Path("/tmp/screenshots")
|
|
129
|
-
temp_dir.mkdir(exist_ok=True, parents=True)
|
|
130
|
-
temp_path = temp_dir / f"step_{step_number:04d}_{timestamp}.png"
|
|
131
|
-
with open(temp_path, "wb") as f:
|
|
132
|
-
f.write(screenshot_bytes)
|
|
133
|
-
logger.debug(f"💾 Debug: Saved screenshot to: {temp_path}")
|
|
134
|
-
|
|
135
117
|
# Upload to S3
|
|
136
118
|
s3_key = f"{self.s3_prefix}{filename}"
|
|
137
119
|
|
|
138
120
|
try:
|
|
139
|
-
# Verify we have valid
|
|
121
|
+
# Verify we have valid data before uploading
|
|
140
122
|
if not screenshot_bytes or len(screenshot_bytes) == 0:
|
|
141
123
|
logger.error(f"❌ Screenshot bytes are empty!")
|
|
142
124
|
return
|
|
143
125
|
|
|
144
|
-
#
|
|
145
|
-
if len(screenshot_bytes) > 4 and screenshot_bytes[:4] != b'\x89PNG':
|
|
146
|
-
logger.warning(f"⚠️ Screenshot may not be valid PNG format. First 4 bytes: {screenshot_bytes[:4].hex()}")
|
|
147
|
-
|
|
148
|
-
# Upload with KMS encryption if required by bucket (matching Lambda implementation)
|
|
126
|
+
# Upload to S3 with KMS encryption
|
|
149
127
|
self.s3_client.put_object(
|
|
150
128
|
Bucket=self.s3_bucket,
|
|
151
129
|
Key=s3_key,
|
|
@@ -153,18 +131,20 @@ class ScreenshotCapture:
|
|
|
153
131
|
ContentType='image/png',
|
|
154
132
|
ServerSideEncryption='aws:kms',
|
|
155
133
|
Tagging='retention=30d'
|
|
156
|
-
# Using bucket's default KMS key
|
|
157
134
|
)
|
|
158
135
|
|
|
159
136
|
s3_url = f"s3://{self.s3_bucket}/{s3_key}"
|
|
160
|
-
logger.info(f"✅ Screenshot uploaded: {s3_url} (size: {len(screenshot_bytes)} bytes)")
|
|
137
|
+
logger.info(f"✅ Screenshot uploaded: {s3_url} (step {step_number}, size: {len(screenshot_bytes)} bytes)")
|
|
161
138
|
|
|
162
139
|
except self.ClientError as e:
|
|
163
|
-
logger.error(f"❌ Failed to upload screenshot to S3
|
|
140
|
+
logger.error(f"❌ Failed to upload screenshot #{step_number} to S3")
|
|
141
|
+
logger.error(f" Error: {str(e)}")
|
|
142
|
+
logger.error(f" Bucket: {self.s3_bucket}, Key: {s3_key}")
|
|
164
143
|
# Continue execution even if screenshot upload fails
|
|
165
144
|
|
|
166
145
|
except Exception as e:
|
|
167
|
-
|
|
146
|
+
step_num = getattr(self, 'step_counter', 'unknown')
|
|
147
|
+
logger.error(f"❌ Error capturing screenshot #{step_num}: {str(e)}")
|
|
168
148
|
# Don't raise - continue execution
|
|
169
149
|
|
|
170
150
|
|
|
@@ -215,7 +195,11 @@ async def main(session_id: str, cdp_url: str, task: str, output_schema_json: Opt
|
|
|
215
195
|
on_step_end_hook = None
|
|
216
196
|
|
|
217
197
|
if screenshot_config and screenshot_config.get('enabled', False):
|
|
218
|
-
logger.info("
|
|
198
|
+
logger.info("-" * 50)
|
|
199
|
+
logger.info("🎯 Initializing screenshot capture for browser task")
|
|
200
|
+
logger.info(f" Session: {session_id}")
|
|
201
|
+
logger.info("-" * 50)
|
|
202
|
+
|
|
219
203
|
# Add session_id to config
|
|
220
204
|
screenshot_config['session_id'] = session_id
|
|
221
205
|
screenshot_capture = ScreenshotCapture(screenshot_config)
|
|
@@ -225,6 +209,8 @@ async def main(session_id: str, cdp_url: str, task: str, output_schema_json: Opt
|
|
|
225
209
|
await screenshot_capture.capture_screenshot(agent)
|
|
226
210
|
|
|
227
211
|
on_step_end_hook = on_step_end
|
|
212
|
+
else:
|
|
213
|
+
logger.info("📷 Screenshot capture is DISABLED for this browser task")
|
|
228
214
|
|
|
229
215
|
output_schema = None
|
|
230
216
|
if output_schema_json:
|
package/package.json
CHANGED
|
@@ -1,15 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Browser Task Executor with Screenshot Capture Support
|
|
3
3
|
|
|
4
|
-
This script runs browser automation tasks using browser-use and
|
|
5
|
-
screenshots at the end of each step, uploading them to S3.
|
|
6
|
-
|
|
7
|
-
Environment Variables for Screenshots:
|
|
8
|
-
- SCREENSHOT_S3_BUCKET: S3 bucket name (default: 'global-development-agentsforce')
|
|
9
|
-
- SCREENSHOT_S3_PREFIX: S3 key prefix (default: 'browser-use-runs-screenshots/')
|
|
10
|
-
- AWS_REGION: AWS region for S3 (default: 'us-east-1')
|
|
11
|
-
|
|
12
|
-
AWS credentials should be configured via standard AWS SDK methods.
|
|
4
|
+
This script runs browser automation tasks using browser-use and can capture
|
|
5
|
+
screenshots at the end of each step, uploading them to S3 when configured.
|
|
13
6
|
"""
|
|
14
7
|
|
|
15
8
|
import asyncio
|
|
@@ -81,8 +74,10 @@ class ScreenshotCapture:
|
|
|
81
74
|
# Track step counter
|
|
82
75
|
self.step_counter = 0
|
|
83
76
|
|
|
84
|
-
logger.info(f"📸 Screenshot capture
|
|
85
|
-
logger.info(f"
|
|
77
|
+
logger.info(f"📸 Screenshot capture ENABLED - will capture at end of each step")
|
|
78
|
+
logger.info(f" S3 destination: s3://{self.s3_bucket}/{self.s3_prefix}")
|
|
79
|
+
logger.info(f" Session ID: {self.session_id}")
|
|
80
|
+
logger.info(f" AWS Region: {self.aws_region}")
|
|
86
81
|
|
|
87
82
|
async def capture_screenshot(self, agent: Any) -> None:
|
|
88
83
|
"""Capture screenshot at step end and upload to S3"""
|
|
@@ -90,62 +85,45 @@ class ScreenshotCapture:
|
|
|
90
85
|
# Get current page
|
|
91
86
|
page = await agent.browser_session.get_current_page()
|
|
92
87
|
|
|
93
|
-
# Get current URL for
|
|
88
|
+
# Get current URL for logging (browser-use might use method instead of property)
|
|
94
89
|
try:
|
|
95
|
-
# Try as property first
|
|
96
90
|
current_url = page.url if hasattr(page, 'url') else page.url()
|
|
97
91
|
except:
|
|
98
|
-
# Fallback if URL is not accessible
|
|
99
92
|
current_url = "unknown"
|
|
100
93
|
|
|
101
|
-
logger.info(f"📸 Taking step_end screenshot at: {current_url}")
|
|
102
|
-
|
|
103
94
|
# Update step counter
|
|
104
95
|
step_number = self.step_counter
|
|
105
96
|
self.step_counter += 1
|
|
106
97
|
|
|
98
|
+
logger.info(f"📸 Capturing screenshot #{step_number} at: {current_url}")
|
|
99
|
+
|
|
107
100
|
# Take screenshot - browser-use takes viewport by default
|
|
108
101
|
screenshot_data = await page.screenshot()
|
|
109
102
|
|
|
110
|
-
# Ensure we have bytes - browser-use might return
|
|
103
|
+
# Ensure we have bytes - browser-use might return base64 string or bytes
|
|
111
104
|
if isinstance(screenshot_data, bytes):
|
|
112
105
|
screenshot_bytes = screenshot_data
|
|
113
106
|
elif isinstance(screenshot_data, str):
|
|
114
|
-
# If it's base64 encoded string
|
|
107
|
+
# If it's base64 encoded string, decode it
|
|
115
108
|
screenshot_bytes = base64.b64decode(screenshot_data)
|
|
116
109
|
else:
|
|
117
110
|
# Try to get bytes from whatever format it is
|
|
118
111
|
screenshot_bytes = bytes(screenshot_data)
|
|
119
112
|
|
|
120
|
-
logger.debug(f"Screenshot data type: {type(screenshot_data)}, size: {len(screenshot_bytes)} bytes")
|
|
121
|
-
|
|
122
113
|
# Generate filename with metadata
|
|
123
114
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
|
|
124
115
|
filename = f"{self.session_id}/step_{step_number:04d}_{timestamp}.png"
|
|
125
116
|
|
|
126
|
-
# Optional: Save to temp file for debugging (like Lambda does)
|
|
127
|
-
if os.getenv('DEBUG_SCREENSHOTS') == 'true':
|
|
128
|
-
temp_dir = Path("/tmp/screenshots")
|
|
129
|
-
temp_dir.mkdir(exist_ok=True, parents=True)
|
|
130
|
-
temp_path = temp_dir / f"step_{step_number:04d}_{timestamp}.png"
|
|
131
|
-
with open(temp_path, "wb") as f:
|
|
132
|
-
f.write(screenshot_bytes)
|
|
133
|
-
logger.debug(f"💾 Debug: Saved screenshot to: {temp_path}")
|
|
134
|
-
|
|
135
117
|
# Upload to S3
|
|
136
118
|
s3_key = f"{self.s3_prefix}{filename}"
|
|
137
119
|
|
|
138
120
|
try:
|
|
139
|
-
# Verify we have valid
|
|
121
|
+
# Verify we have valid data before uploading
|
|
140
122
|
if not screenshot_bytes or len(screenshot_bytes) == 0:
|
|
141
123
|
logger.error(f"❌ Screenshot bytes are empty!")
|
|
142
124
|
return
|
|
143
125
|
|
|
144
|
-
#
|
|
145
|
-
if len(screenshot_bytes) > 4 and screenshot_bytes[:4] != b'\x89PNG':
|
|
146
|
-
logger.warning(f"⚠️ Screenshot may not be valid PNG format. First 4 bytes: {screenshot_bytes[:4].hex()}")
|
|
147
|
-
|
|
148
|
-
# Upload with KMS encryption if required by bucket (matching Lambda implementation)
|
|
126
|
+
# Upload to S3 with KMS encryption
|
|
149
127
|
self.s3_client.put_object(
|
|
150
128
|
Bucket=self.s3_bucket,
|
|
151
129
|
Key=s3_key,
|
|
@@ -153,18 +131,20 @@ class ScreenshotCapture:
|
|
|
153
131
|
ContentType='image/png',
|
|
154
132
|
ServerSideEncryption='aws:kms',
|
|
155
133
|
Tagging='retention=30d'
|
|
156
|
-
# Using bucket's default KMS key
|
|
157
134
|
)
|
|
158
135
|
|
|
159
136
|
s3_url = f"s3://{self.s3_bucket}/{s3_key}"
|
|
160
|
-
logger.info(f"✅ Screenshot uploaded: {s3_url} (size: {len(screenshot_bytes)} bytes)")
|
|
137
|
+
logger.info(f"✅ Screenshot uploaded: {s3_url} (step {step_number}, size: {len(screenshot_bytes)} bytes)")
|
|
161
138
|
|
|
162
139
|
except self.ClientError as e:
|
|
163
|
-
logger.error(f"❌ Failed to upload screenshot to S3
|
|
140
|
+
logger.error(f"❌ Failed to upload screenshot #{step_number} to S3")
|
|
141
|
+
logger.error(f" Error: {str(e)}")
|
|
142
|
+
logger.error(f" Bucket: {self.s3_bucket}, Key: {s3_key}")
|
|
164
143
|
# Continue execution even if screenshot upload fails
|
|
165
144
|
|
|
166
145
|
except Exception as e:
|
|
167
|
-
|
|
146
|
+
step_num = getattr(self, 'step_counter', 'unknown')
|
|
147
|
+
logger.error(f"❌ Error capturing screenshot #{step_num}: {str(e)}")
|
|
168
148
|
# Don't raise - continue execution
|
|
169
149
|
|
|
170
150
|
|
|
@@ -215,7 +195,11 @@ async def main(session_id: str, cdp_url: str, task: str, output_schema_json: Opt
|
|
|
215
195
|
on_step_end_hook = None
|
|
216
196
|
|
|
217
197
|
if screenshot_config and screenshot_config.get('enabled', False):
|
|
218
|
-
logger.info("
|
|
198
|
+
logger.info("-" * 50)
|
|
199
|
+
logger.info("🎯 Initializing screenshot capture for browser task")
|
|
200
|
+
logger.info(f" Session: {session_id}")
|
|
201
|
+
logger.info("-" * 50)
|
|
202
|
+
|
|
219
203
|
# Add session_id to config
|
|
220
204
|
screenshot_config['session_id'] = session_id
|
|
221
205
|
screenshot_capture = ScreenshotCapture(screenshot_config)
|
|
@@ -225,6 +209,8 @@ async def main(session_id: str, cdp_url: str, task: str, output_schema_json: Opt
|
|
|
225
209
|
await screenshot_capture.capture_screenshot(agent)
|
|
226
210
|
|
|
227
211
|
on_step_end_hook = on_step_end
|
|
212
|
+
else:
|
|
213
|
+
logger.info("📷 Screenshot capture is DISABLED for this browser task")
|
|
228
214
|
|
|
229
215
|
output_schema = None
|
|
230
216
|
if output_schema_json:
|