@minded-ai/mindedjs 1.0.0-ec2-beta-6 → 1.0.0-ec2-beta-8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,8 @@
1
1
  """
2
2
  Browser Task Executor with Screenshot Capture Support
3
3
 
4
- This script runs browser automation tasks using browser-use and automatically captures
5
- screenshots at the end of each step, uploading them to S3.
6
-
7
- Environment Variables for Screenshots:
8
- - SCREENSHOT_S3_BUCKET: S3 bucket name (default: 'global-development-agentsforce')
9
- - SCREENSHOT_S3_PREFIX: S3 key prefix (default: 'browser-use-runs-screenshots/')
10
- - AWS_REGION: AWS region for S3 (default: 'us-east-1')
11
-
12
- AWS credentials should be configured via standard AWS SDK methods.
4
+ This script runs browser automation tasks using browser-use and can capture
5
+ screenshots at the end of each step, uploading them to S3 when configured.
13
6
  """
14
7
 
15
8
  import asyncio
@@ -81,8 +74,10 @@ class ScreenshotCapture:
81
74
  # Track step counter
82
75
  self.step_counter = 0
83
76
 
84
- logger.info(f"📸 Screenshot capture initialized - S3: s3://{self.s3_bucket}/{self.s3_prefix}")
85
- logger.info(f" Session ID: {self.session_id}, Region: {self.aws_region}")
77
+ logger.info(f"📸 Screenshot capture ENABLED - will capture at end of each step")
78
+ logger.info(f" S3 destination: s3://{self.s3_bucket}/{self.s3_prefix}")
79
+ logger.info(f" Session ID: {self.session_id}")
80
+ logger.info(f" AWS Region: {self.aws_region}")
86
81
 
87
82
  async def capture_screenshot(self, agent: Any) -> None:
88
83
  """Capture screenshot at step end and upload to S3"""
@@ -90,62 +85,45 @@ class ScreenshotCapture:
90
85
  # Get current page
91
86
  page = await agent.browser_session.get_current_page()
92
87
 
93
- # Get current URL for context - use page.url() method or fallback
88
+ # Get current URL for logging (browser-use might use method instead of property)
94
89
  try:
95
- # Try as property first
96
90
  current_url = page.url if hasattr(page, 'url') else page.url()
97
91
  except:
98
- # Fallback if URL is not accessible
99
92
  current_url = "unknown"
100
93
 
101
- logger.info(f"📸 Taking step_end screenshot at: {current_url}")
102
-
103
94
  # Update step counter
104
95
  step_number = self.step_counter
105
96
  self.step_counter += 1
106
97
 
98
+ logger.info(f"📸 Capturing screenshot #{step_number} at: {current_url}")
99
+
107
100
  # Take screenshot - browser-use takes viewport by default
108
101
  screenshot_data = await page.screenshot()
109
102
 
110
- # Ensure we have bytes - browser-use might return different format
103
+ # Ensure we have bytes - browser-use might return base64 string or bytes
111
104
  if isinstance(screenshot_data, bytes):
112
105
  screenshot_bytes = screenshot_data
113
106
  elif isinstance(screenshot_data, str):
114
- # If it's base64 encoded string
107
+ # If it's base64 encoded string, decode it
115
108
  screenshot_bytes = base64.b64decode(screenshot_data)
116
109
  else:
117
110
  # Try to get bytes from whatever format it is
118
111
  screenshot_bytes = bytes(screenshot_data)
119
112
 
120
- logger.debug(f"Screenshot data type: {type(screenshot_data)}, size: {len(screenshot_bytes)} bytes")
121
-
122
113
  # Generate filename with metadata
123
114
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
124
115
  filename = f"{self.session_id}/step_{step_number:04d}_{timestamp}.png"
125
116
 
126
- # Optional: Save to temp file for debugging (like Lambda does)
127
- if os.getenv('DEBUG_SCREENSHOTS') == 'true':
128
- temp_dir = Path("/tmp/screenshots")
129
- temp_dir.mkdir(exist_ok=True, parents=True)
130
- temp_path = temp_dir / f"step_{step_number:04d}_{timestamp}.png"
131
- with open(temp_path, "wb") as f:
132
- f.write(screenshot_bytes)
133
- logger.debug(f"💾 Debug: Saved screenshot to: {temp_path}")
134
-
135
117
  # Upload to S3
136
118
  s3_key = f"{self.s3_prefix}{filename}"
137
119
 
138
120
  try:
139
- # Verify we have valid PNG data before uploading
121
+ # Verify we have valid data before uploading
140
122
  if not screenshot_bytes or len(screenshot_bytes) == 0:
141
123
  logger.error(f"❌ Screenshot bytes are empty!")
142
124
  return
143
125
 
144
- # Check for PNG header (89 50 4E 47 = PNG signature)
145
- if len(screenshot_bytes) > 4 and screenshot_bytes[:4] != b'\x89PNG':
146
- logger.warning(f"⚠️ Screenshot may not be valid PNG format. First 4 bytes: {screenshot_bytes[:4].hex()}")
147
-
148
- # Upload with KMS encryption if required by bucket (matching Lambda implementation)
126
+ # Upload to S3 with KMS encryption
149
127
  self.s3_client.put_object(
150
128
  Bucket=self.s3_bucket,
151
129
  Key=s3_key,
@@ -153,18 +131,20 @@ class ScreenshotCapture:
153
131
  ContentType='image/png',
154
132
  ServerSideEncryption='aws:kms',
155
133
  Tagging='retention=30d'
156
- # Using bucket's default KMS key
157
134
  )
158
135
 
159
136
  s3_url = f"s3://{self.s3_bucket}/{s3_key}"
160
- logger.info(f"✅ Screenshot uploaded: {s3_url} (size: {len(screenshot_bytes)} bytes)")
137
+ logger.info(f"✅ Screenshot uploaded: {s3_url} (step {step_number}, size: {len(screenshot_bytes)} bytes)")
161
138
 
162
139
  except self.ClientError as e:
163
- logger.error(f"❌ Failed to upload screenshot to S3: {str(e)}")
140
+ logger.error(f"❌ Failed to upload screenshot #{step_number} to S3")
141
+ logger.error(f" Error: {str(e)}")
142
+ logger.error(f" Bucket: {self.s3_bucket}, Key: {s3_key}")
164
143
  # Continue execution even if screenshot upload fails
165
144
 
166
145
  except Exception as e:
167
- logger.error(f"❌ Error capturing screenshot: {str(e)}")
146
+ step_num = getattr(self, 'step_counter', 'unknown')
147
+ logger.error(f"❌ Error capturing screenshot #{step_num}: {str(e)}")
168
148
  # Don't raise - continue execution
169
149
 
170
150
 
@@ -215,7 +195,11 @@ async def main(session_id: str, cdp_url: str, task: str, output_schema_json: Opt
215
195
  on_step_end_hook = None
216
196
 
217
197
  if screenshot_config and screenshot_config.get('enabled', False):
218
- logger.info("📸 Screenshot capture enabled (captures at step end)")
198
+ logger.info("-" * 50)
199
+ logger.info("🎯 Initializing screenshot capture for browser task")
200
+ logger.info(f" Session: {session_id}")
201
+ logger.info("-" * 50)
202
+
219
203
  # Add session_id to config
220
204
  screenshot_config['session_id'] = session_id
221
205
  screenshot_capture = ScreenshotCapture(screenshot_config)
@@ -225,6 +209,8 @@ async def main(session_id: str, cdp_url: str, task: str, output_schema_json: Opt
225
209
  await screenshot_capture.capture_screenshot(agent)
226
210
 
227
211
  on_step_end_hook = on_step_end
212
+ else:
213
+ logger.info("📷 Screenshot capture is DISABLED for this browser task")
228
214
 
229
215
  output_schema = None
230
216
  if output_schema_json:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@minded-ai/mindedjs",
3
- "version": "1.0.0-ec2-beta-6",
3
+ "version": "1.0.0-ec2-beta-8",
4
4
  "description": "MindedJS is a TypeScript library for building agents.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -1,15 +1,8 @@
1
1
  """
2
2
  Browser Task Executor with Screenshot Capture Support
3
3
 
4
- This script runs browser automation tasks using browser-use and automatically captures
5
- screenshots at the end of each step, uploading them to S3.
6
-
7
- Environment Variables for Screenshots:
8
- - SCREENSHOT_S3_BUCKET: S3 bucket name (default: 'global-development-agentsforce')
9
- - SCREENSHOT_S3_PREFIX: S3 key prefix (default: 'browser-use-runs-screenshots/')
10
- - AWS_REGION: AWS region for S3 (default: 'us-east-1')
11
-
12
- AWS credentials should be configured via standard AWS SDK methods.
4
+ This script runs browser automation tasks using browser-use and can capture
5
+ screenshots at the end of each step, uploading them to S3 when configured.
13
6
  """
14
7
 
15
8
  import asyncio
@@ -81,8 +74,10 @@ class ScreenshotCapture:
81
74
  # Track step counter
82
75
  self.step_counter = 0
83
76
 
84
- logger.info(f"📸 Screenshot capture initialized - S3: s3://{self.s3_bucket}/{self.s3_prefix}")
85
- logger.info(f" Session ID: {self.session_id}, Region: {self.aws_region}")
77
+ logger.info(f"📸 Screenshot capture ENABLED - will capture at end of each step")
78
+ logger.info(f" S3 destination: s3://{self.s3_bucket}/{self.s3_prefix}")
79
+ logger.info(f" Session ID: {self.session_id}")
80
+ logger.info(f" AWS Region: {self.aws_region}")
86
81
 
87
82
  async def capture_screenshot(self, agent: Any) -> None:
88
83
  """Capture screenshot at step end and upload to S3"""
@@ -90,62 +85,45 @@ class ScreenshotCapture:
90
85
  # Get current page
91
86
  page = await agent.browser_session.get_current_page()
92
87
 
93
- # Get current URL for context - use page.url() method or fallback
88
+ # Get current URL for logging (browser-use might use method instead of property)
94
89
  try:
95
- # Try as property first
96
90
  current_url = page.url if hasattr(page, 'url') else page.url()
97
91
  except:
98
- # Fallback if URL is not accessible
99
92
  current_url = "unknown"
100
93
 
101
- logger.info(f"📸 Taking step_end screenshot at: {current_url}")
102
-
103
94
  # Update step counter
104
95
  step_number = self.step_counter
105
96
  self.step_counter += 1
106
97
 
98
+ logger.info(f"📸 Capturing screenshot #{step_number} at: {current_url}")
99
+
107
100
  # Take screenshot - browser-use takes viewport by default
108
101
  screenshot_data = await page.screenshot()
109
102
 
110
- # Ensure we have bytes - browser-use might return different format
103
+ # Ensure we have bytes - browser-use might return base64 string or bytes
111
104
  if isinstance(screenshot_data, bytes):
112
105
  screenshot_bytes = screenshot_data
113
106
  elif isinstance(screenshot_data, str):
114
- # If it's base64 encoded string
107
+ # If it's base64 encoded string, decode it
115
108
  screenshot_bytes = base64.b64decode(screenshot_data)
116
109
  else:
117
110
  # Try to get bytes from whatever format it is
118
111
  screenshot_bytes = bytes(screenshot_data)
119
112
 
120
- logger.debug(f"Screenshot data type: {type(screenshot_data)}, size: {len(screenshot_bytes)} bytes")
121
-
122
113
  # Generate filename with metadata
123
114
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
124
115
  filename = f"{self.session_id}/step_{step_number:04d}_{timestamp}.png"
125
116
 
126
- # Optional: Save to temp file for debugging (like Lambda does)
127
- if os.getenv('DEBUG_SCREENSHOTS') == 'true':
128
- temp_dir = Path("/tmp/screenshots")
129
- temp_dir.mkdir(exist_ok=True, parents=True)
130
- temp_path = temp_dir / f"step_{step_number:04d}_{timestamp}.png"
131
- with open(temp_path, "wb") as f:
132
- f.write(screenshot_bytes)
133
- logger.debug(f"💾 Debug: Saved screenshot to: {temp_path}")
134
-
135
117
  # Upload to S3
136
118
  s3_key = f"{self.s3_prefix}{filename}"
137
119
 
138
120
  try:
139
- # Verify we have valid PNG data before uploading
121
+ # Verify we have valid data before uploading
140
122
  if not screenshot_bytes or len(screenshot_bytes) == 0:
141
123
  logger.error(f"❌ Screenshot bytes are empty!")
142
124
  return
143
125
 
144
- # Check for PNG header (89 50 4E 47 = PNG signature)
145
- if len(screenshot_bytes) > 4 and screenshot_bytes[:4] != b'\x89PNG':
146
- logger.warning(f"⚠️ Screenshot may not be valid PNG format. First 4 bytes: {screenshot_bytes[:4].hex()}")
147
-
148
- # Upload with KMS encryption if required by bucket (matching Lambda implementation)
126
+ # Upload to S3 with KMS encryption
149
127
  self.s3_client.put_object(
150
128
  Bucket=self.s3_bucket,
151
129
  Key=s3_key,
@@ -153,18 +131,20 @@ class ScreenshotCapture:
153
131
  ContentType='image/png',
154
132
  ServerSideEncryption='aws:kms',
155
133
  Tagging='retention=30d'
156
- # Using bucket's default KMS key
157
134
  )
158
135
 
159
136
  s3_url = f"s3://{self.s3_bucket}/{s3_key}"
160
- logger.info(f"✅ Screenshot uploaded: {s3_url} (size: {len(screenshot_bytes)} bytes)")
137
+ logger.info(f"✅ Screenshot uploaded: {s3_url} (step {step_number}, size: {len(screenshot_bytes)} bytes)")
161
138
 
162
139
  except self.ClientError as e:
163
- logger.error(f"❌ Failed to upload screenshot to S3: {str(e)}")
140
+ logger.error(f"❌ Failed to upload screenshot #{step_number} to S3")
141
+ logger.error(f" Error: {str(e)}")
142
+ logger.error(f" Bucket: {self.s3_bucket}, Key: {s3_key}")
164
143
  # Continue execution even if screenshot upload fails
165
144
 
166
145
  except Exception as e:
167
- logger.error(f"❌ Error capturing screenshot: {str(e)}")
146
+ step_num = getattr(self, 'step_counter', 'unknown')
147
+ logger.error(f"❌ Error capturing screenshot #{step_num}: {str(e)}")
168
148
  # Don't raise - continue execution
169
149
 
170
150
 
@@ -215,7 +195,11 @@ async def main(session_id: str, cdp_url: str, task: str, output_schema_json: Opt
215
195
  on_step_end_hook = None
216
196
 
217
197
  if screenshot_config and screenshot_config.get('enabled', False):
218
- logger.info("📸 Screenshot capture enabled (captures at step end)")
198
+ logger.info("-" * 50)
199
+ logger.info("🎯 Initializing screenshot capture for browser task")
200
+ logger.info(f" Session: {session_id}")
201
+ logger.info("-" * 50)
202
+
219
203
  # Add session_id to config
220
204
  screenshot_config['session_id'] = session_id
221
205
  screenshot_capture = ScreenshotCapture(screenshot_config)
@@ -225,6 +209,8 @@ async def main(session_id: str, cdp_url: str, task: str, output_schema_json: Opt
225
209
  await screenshot_capture.capture_screenshot(agent)
226
210
 
227
211
  on_step_end_hook = on_step_end
212
+ else:
213
+ logger.info("📷 Screenshot capture is DISABLED for this browser task")
228
214
 
229
215
  output_schema = None
230
216
  if output_schema_json: