PyPI - optexity-browser-use - Versions diffs - 0.9.5__py3-none-any.whl - Mend

optexity-browser-use 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (147) hide show

browser_use/__init__.py +157 -0
browser_use/actor/__init__.py +11 -0
browser_use/actor/element.py +1175 -0
browser_use/actor/mouse.py +134 -0
browser_use/actor/page.py +561 -0
browser_use/actor/playground/flights.py +41 -0
browser_use/actor/playground/mixed_automation.py +54 -0
browser_use/actor/playground/playground.py +236 -0
browser_use/actor/utils.py +176 -0
browser_use/agent/cloud_events.py +282 -0
browser_use/agent/gif.py +424 -0
browser_use/agent/judge.py +170 -0
browser_use/agent/message_manager/service.py +473 -0
browser_use/agent/message_manager/utils.py +52 -0
browser_use/agent/message_manager/views.py +98 -0
browser_use/agent/prompts.py +413 -0
browser_use/agent/service.py +2316 -0
browser_use/agent/system_prompt.md +185 -0
browser_use/agent/system_prompt_flash.md +10 -0
browser_use/agent/system_prompt_no_thinking.md +183 -0
browser_use/agent/views.py +743 -0
browser_use/browser/__init__.py +41 -0
browser_use/browser/cloud/cloud.py +203 -0
browser_use/browser/cloud/views.py +89 -0
browser_use/browser/events.py +578 -0
browser_use/browser/profile.py +1158 -0
browser_use/browser/python_highlights.py +548 -0
browser_use/browser/session.py +3225 -0
browser_use/browser/session_manager.py +399 -0
browser_use/browser/video_recorder.py +162 -0
browser_use/browser/views.py +200 -0
browser_use/browser/watchdog_base.py +260 -0
browser_use/browser/watchdogs/__init__.py +0 -0
browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
browser_use/browser/watchdogs/crash_watchdog.py +335 -0
browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
browser_use/browser/watchdogs/dom_watchdog.py +817 -0
browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
browser_use/browser/watchdogs/popups_watchdog.py +143 -0
browser_use/browser/watchdogs/recording_watchdog.py +126 -0
browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
browser_use/browser/watchdogs/security_watchdog.py +280 -0
browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
browser_use/cli.py +2359 -0
browser_use/code_use/__init__.py +16 -0
browser_use/code_use/formatting.py +192 -0
browser_use/code_use/namespace.py +665 -0
browser_use/code_use/notebook_export.py +276 -0
browser_use/code_use/service.py +1340 -0
browser_use/code_use/system_prompt.md +574 -0
browser_use/code_use/utils.py +150 -0
browser_use/code_use/views.py +171 -0
browser_use/config.py +505 -0
browser_use/controller/__init__.py +3 -0
browser_use/dom/enhanced_snapshot.py +161 -0
browser_use/dom/markdown_extractor.py +169 -0
browser_use/dom/playground/extraction.py +312 -0
browser_use/dom/playground/multi_act.py +32 -0
browser_use/dom/serializer/clickable_elements.py +200 -0
browser_use/dom/serializer/code_use_serializer.py +287 -0
browser_use/dom/serializer/eval_serializer.py +478 -0
browser_use/dom/serializer/html_serializer.py +212 -0
browser_use/dom/serializer/paint_order.py +197 -0
browser_use/dom/serializer/serializer.py +1170 -0
browser_use/dom/service.py +825 -0
browser_use/dom/utils.py +129 -0
browser_use/dom/views.py +906 -0
browser_use/exceptions.py +5 -0
browser_use/filesystem/__init__.py +0 -0
browser_use/filesystem/file_system.py +619 -0
browser_use/init_cmd.py +376 -0
browser_use/integrations/gmail/__init__.py +24 -0
browser_use/integrations/gmail/actions.py +115 -0
browser_use/integrations/gmail/service.py +225 -0
browser_use/llm/__init__.py +155 -0
browser_use/llm/anthropic/chat.py +242 -0
browser_use/llm/anthropic/serializer.py +312 -0
browser_use/llm/aws/__init__.py +36 -0
browser_use/llm/aws/chat_anthropic.py +242 -0
browser_use/llm/aws/chat_bedrock.py +289 -0
browser_use/llm/aws/serializer.py +257 -0
browser_use/llm/azure/chat.py +91 -0
browser_use/llm/base.py +57 -0
browser_use/llm/browser_use/__init__.py +3 -0
browser_use/llm/browser_use/chat.py +201 -0
browser_use/llm/cerebras/chat.py +193 -0
browser_use/llm/cerebras/serializer.py +109 -0
browser_use/llm/deepseek/chat.py +212 -0
browser_use/llm/deepseek/serializer.py +109 -0
browser_use/llm/exceptions.py +29 -0
browser_use/llm/google/__init__.py +3 -0
browser_use/llm/google/chat.py +542 -0
browser_use/llm/google/serializer.py +120 -0
browser_use/llm/groq/chat.py +229 -0
browser_use/llm/groq/parser.py +158 -0
browser_use/llm/groq/serializer.py +159 -0
browser_use/llm/messages.py +238 -0
browser_use/llm/models.py +271 -0
browser_use/llm/oci_raw/__init__.py +10 -0
browser_use/llm/oci_raw/chat.py +443 -0
browser_use/llm/oci_raw/serializer.py +229 -0
browser_use/llm/ollama/chat.py +97 -0
browser_use/llm/ollama/serializer.py +143 -0
browser_use/llm/openai/chat.py +264 -0
browser_use/llm/openai/like.py +15 -0
browser_use/llm/openai/serializer.py +165 -0
browser_use/llm/openrouter/chat.py +211 -0
browser_use/llm/openrouter/serializer.py +26 -0
browser_use/llm/schema.py +176 -0
browser_use/llm/views.py +48 -0
browser_use/logging_config.py +330 -0
browser_use/mcp/__init__.py +18 -0
browser_use/mcp/__main__.py +12 -0
browser_use/mcp/client.py +544 -0
browser_use/mcp/controller.py +264 -0
browser_use/mcp/server.py +1114 -0
browser_use/observability.py +204 -0
browser_use/py.typed +0 -0
browser_use/sandbox/__init__.py +41 -0
browser_use/sandbox/sandbox.py +637 -0
browser_use/sandbox/views.py +132 -0
browser_use/screenshots/__init__.py +1 -0
browser_use/screenshots/service.py +52 -0
browser_use/sync/__init__.py +6 -0
browser_use/sync/auth.py +357 -0
browser_use/sync/service.py +161 -0
browser_use/telemetry/__init__.py +51 -0
browser_use/telemetry/service.py +112 -0
browser_use/telemetry/views.py +101 -0
browser_use/tokens/__init__.py +0 -0
browser_use/tokens/custom_pricing.py +24 -0
browser_use/tokens/mappings.py +4 -0
browser_use/tokens/service.py +580 -0
browser_use/tokens/views.py +108 -0
browser_use/tools/registry/service.py +572 -0
browser_use/tools/registry/views.py +174 -0
browser_use/tools/service.py +1675 -0
browser_use/tools/utils.py +82 -0
browser_use/tools/views.py +100 -0
browser_use/utils.py +670 -0
optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0

browser_use/agent/cloud_events.py ADDED Viewed

@@ -0,0 +1,282 @@
+import base64
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+import anyio
+from bubus import BaseEvent
+from pydantic import Field, field_validator
+from uuid_extensions import uuid7str
+MAX_STRING_LENGTH = 100000  # 100K chars ~ 25k tokens should be enough
+MAX_URL_LENGTH = 100000
+MAX_TASK_LENGTH = 100000
+MAX_COMMENT_LENGTH = 2000
+MAX_FILE_CONTENT_SIZE = 50 * 1024 * 1024  # 50MB
+class UpdateAgentTaskEvent(BaseEvent):
+	# Required fields for identification
+	id: str  # The task ID to update
+	user_id: str = Field(max_length=255)  # For authorization
+	device_id: str | None = Field(None, max_length=255)  # Device ID for auth lookup
+	# Optional fields that can be updated
+	stopped: bool | None = None
+	paused: bool | None = None
+	done_output: str | None = Field(None, max_length=MAX_STRING_LENGTH)
+	finished_at: datetime | None = None
+	agent_state: dict | None = None
+	user_feedback_type: str | None = Field(None, max_length=10)  # UserFeedbackType enum value as string
+	user_comment: str | None = Field(None, max_length=MAX_COMMENT_LENGTH)
+	gif_url: str | None = Field(None, max_length=MAX_URL_LENGTH)
+	@classmethod
+	def from_agent(cls, agent) -> 'UpdateAgentTaskEvent':
+		"""Create an UpdateAgentTaskEvent from an Agent instance"""
+		if not hasattr(agent, '_task_start_time'):
+			raise ValueError('Agent must have _task_start_time attribute')
+		done_output = agent.history.final_result() if agent.history else None
+		return cls(
+			id=str(agent.task_id),
+			user_id='',  # To be filled by cloud handler
+			device_id=agent.cloud_sync.auth_client.device_id
+			if hasattr(agent, 'cloud_sync') and agent.cloud_sync and agent.cloud_sync.auth_client
+			else None,
+			stopped=agent.state.stopped if hasattr(agent.state, 'stopped') else False,
+			paused=agent.state.paused if hasattr(agent.state, 'paused') else False,
+			done_output=done_output,
+			finished_at=datetime.now(timezone.utc) if agent.history and agent.history.is_done() else None,
+			agent_state=agent.state.model_dump() if hasattr(agent.state, 'model_dump') else {},
+			user_feedback_type=None,
+			user_comment=None,
+			gif_url=None,
+			# user_feedback_type and user_comment would be set by the API/frontend
+			# gif_url would be set after GIF generation if needed
+		)
+class CreateAgentOutputFileEvent(BaseEvent):
+	# Model fields
+	id: str = Field(default_factory=uuid7str)
+	user_id: str = Field(max_length=255)
+	device_id: str | None = Field(None, max_length=255)  # Device ID for auth lookup
+	task_id: str
+	file_name: str = Field(max_length=255)
+	file_content: str | None = None  # Base64 encoded file content
+	content_type: str | None = Field(None, max_length=100)  # MIME type for file uploads
+	created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+	@field_validator('file_content')
+	@classmethod
+	def validate_file_size(cls, v: str | None) -> str | None:
+		"""Validate base64 file content size."""
+		if v is None:
+			return v
+		# Remove data URL prefix if present
+		if ',' in v:
+			v = v.split(',')[1]
+		# Estimate decoded size (base64 is ~33% larger)
+		estimated_size = len(v) * 3 / 4
+		if estimated_size > MAX_FILE_CONTENT_SIZE:
+			raise ValueError(f'File content exceeds maximum size of {MAX_FILE_CONTENT_SIZE / 1024 / 1024}MB')
+		return v
+	@classmethod
+	async def from_agent_and_file(cls, agent, output_path: str) -> 'CreateAgentOutputFileEvent':
+		"""Create a CreateAgentOutputFileEvent from a file path"""
+		gif_path = Path(output_path)
+		if not gif_path.exists():
+			raise FileNotFoundError(f'File not found: {output_path}')
+		gif_size = os.path.getsize(gif_path)
+		# Read GIF content for base64 encoding if needed
+		gif_content = None
+		if gif_size < 50 * 1024 * 1024:  # Only read if < 50MB
+			async with await anyio.open_file(gif_path, 'rb') as f:
+				gif_bytes = await f.read()
+				gif_content = base64.b64encode(gif_bytes).decode('utf-8')
+		return cls(
+			user_id='',  # To be filled by cloud handler
+			device_id=agent.cloud_sync.auth_client.device_id
+			if hasattr(agent, 'cloud_sync') and agent.cloud_sync and agent.cloud_sync.auth_client
+			else None,
+			task_id=str(agent.task_id),
+			file_name=gif_path.name,
+			file_content=gif_content,  # Base64 encoded
+			content_type='image/gif',
+		)
+class CreateAgentStepEvent(BaseEvent):
+	# Model fields
+	id: str = Field(default_factory=uuid7str)
+	user_id: str = Field(max_length=255)  # Added for authorization checks
+	device_id: str | None = Field(None, max_length=255)  # Device ID for auth lookup
+	created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+	agent_task_id: str
+	step: int
+	evaluation_previous_goal: str = Field(max_length=MAX_STRING_LENGTH)
+	memory: str = Field(max_length=MAX_STRING_LENGTH)
+	next_goal: str = Field(max_length=MAX_STRING_LENGTH)
+	actions: list[dict]
+	screenshot_url: str | None = Field(None, max_length=MAX_FILE_CONTENT_SIZE)  # ~50MB for base64 images
+	url: str = Field(default='', max_length=MAX_URL_LENGTH)
+	@field_validator('screenshot_url')
+	@classmethod
+	def validate_screenshot_size(cls, v: str | None) -> str | None:
+		"""Validate screenshot URL or base64 content size."""
+		if v is None or not v.startswith('data:'):
+			return v
+		# It's base64 data, check size
+		if ',' in v:
+			base64_part = v.split(',')[1]
+			estimated_size = len(base64_part) * 3 / 4
+			if estimated_size > MAX_FILE_CONTENT_SIZE:
+				raise ValueError(f'Screenshot content exceeds maximum size of {MAX_FILE_CONTENT_SIZE / 1024 / 1024}MB')
+		return v
+	@classmethod
+	def from_agent_step(
+		cls, agent, model_output, result: list, actions_data: list[dict], browser_state_summary
+	) -> 'CreateAgentStepEvent':
+		"""Create a CreateAgentStepEvent from agent step data"""
+		# Get first action details if available
+		first_action = model_output.action[0] if model_output.action else None
+		# Extract current state from model output
+		current_state = model_output.current_state if hasattr(model_output, 'current_state') else None
+		# Capture screenshot as base64 data URL if available
+		screenshot_url = None
+		if browser_state_summary.screenshot:
+			screenshot_url = f'data:image/jpeg;base64,{browser_state_summary.screenshot}'
+			import logging
+			logger = logging.getLogger(__name__)
+			logger.debug(f'📸 Including screenshot in CreateAgentStepEvent, length: {len(browser_state_summary.screenshot)}')
+		else:
+			import logging
+			logger = logging.getLogger(__name__)
+			logger.debug('📸 No screenshot in browser_state_summary for CreateAgentStepEvent')
+		return cls(
+			user_id='',  # To be filled by cloud handler
+			device_id=agent.cloud_sync.auth_client.device_id
+			if hasattr(agent, 'cloud_sync') and agent.cloud_sync and agent.cloud_sync.auth_client
+			else None,
+			agent_task_id=str(agent.task_id),
+			step=agent.state.n_steps,
+			evaluation_previous_goal=current_state.evaluation_previous_goal if current_state else '',
+			memory=current_state.memory if current_state else '',
+			next_goal=current_state.next_goal if current_state else '',
+			actions=actions_data,  # List of action dicts
+			url=browser_state_summary.url,
+			screenshot_url=screenshot_url,
+		)
+class CreateAgentTaskEvent(BaseEvent):
+	# Model fields
+	id: str = Field(default_factory=uuid7str)
+	user_id: str = Field(max_length=255)  # Added for authorization checks
+	device_id: str | None = Field(None, max_length=255)  # Device ID for auth lookup
+	agent_session_id: str
+	llm_model: str = Field(max_length=200)  # LLMModel enum value as string
+	stopped: bool = False
+	paused: bool = False
+	task: str = Field(max_length=MAX_TASK_LENGTH)
+	done_output: str | None = Field(None, max_length=MAX_STRING_LENGTH)
+	scheduled_task_id: str | None = None
+	started_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+	finished_at: datetime | None = None
+	agent_state: dict = Field(default_factory=dict)
+	user_feedback_type: str | None = Field(None, max_length=10)  # UserFeedbackType enum value as string
+	user_comment: str | None = Field(None, max_length=MAX_COMMENT_LENGTH)
+	gif_url: str | None = Field(None, max_length=MAX_URL_LENGTH)
+	@classmethod
+	def from_agent(cls, agent) -> 'CreateAgentTaskEvent':
+		"""Create a CreateAgentTaskEvent from an Agent instance"""
+		return cls(
+			id=str(agent.task_id),
+			user_id='',  # To be filled by cloud handler
+			device_id=agent.cloud_sync.auth_client.device_id
+			if hasattr(agent, 'cloud_sync') and agent.cloud_sync and agent.cloud_sync.auth_client
+			else None,
+			agent_session_id=str(agent.session_id),
+			task=agent.task,
+			llm_model=agent.llm.model_name,
+			agent_state=agent.state.model_dump() if hasattr(agent.state, 'model_dump') else {},
+			stopped=False,
+			paused=False,
+			done_output=None,
+			started_at=datetime.fromtimestamp(agent._task_start_time, tz=timezone.utc),
+			finished_at=None,
+			user_feedback_type=None,
+			user_comment=None,
+			gif_url=None,
+		)
+class CreateAgentSessionEvent(BaseEvent):
+	# Model fields
+	id: str = Field(default_factory=uuid7str)
+	user_id: str = Field(max_length=255)
+	device_id: str | None = Field(None, max_length=255)  # Device ID for auth lookup
+	browser_session_id: str = Field(max_length=255)
+	browser_session_live_url: str = Field(max_length=MAX_URL_LENGTH)
+	browser_session_cdp_url: str = Field(max_length=MAX_URL_LENGTH)
+	browser_session_stopped: bool = False
+	browser_session_stopped_at: datetime | None = None
+	is_source_api: bool | None = None
+	browser_state: dict = Field(default_factory=dict)
+	browser_session_data: dict | None = None
+	@classmethod
+	def from_agent(cls, agent) -> 'CreateAgentSessionEvent':
+		"""Create a CreateAgentSessionEvent from an Agent instance"""
+		return cls(
+			id=str(agent.session_id),
+			user_id='',  # To be filled by cloud handler
+			device_id=agent.cloud_sync.auth_client.device_id
+			if hasattr(agent, 'cloud_sync') and agent.cloud_sync and agent.cloud_sync.auth_client
+			else None,
+			browser_session_id=agent.browser_session.id,
+			browser_session_live_url='',  # To be filled by cloud handler
+			browser_session_cdp_url='',  # To be filled by cloud handler
+			browser_state={
+				'viewport': agent.browser_profile.viewport if agent.browser_profile else {'width': 1280, 'height': 720},
+				'user_agent': agent.browser_profile.user_agent if agent.browser_profile else None,
+				'headless': agent.browser_profile.headless if agent.browser_profile else True,
+				'initial_url': None,  # Will be updated during execution
+				'final_url': None,  # Will be updated during execution
+				'total_pages_visited': 0,  # Will be updated during execution
+				'session_duration_seconds': 0,  # Will be updated during execution
+			},
+			browser_session_data={
+				'cookies': [],
+				'secrets': {},
+				# TODO: send secrets safely so tasks can be replayed on cloud seamlessly
+				# 'secrets': dict(agent.sensitive_data) if agent.sensitive_data else {},
+				'allowed_domains': agent.browser_profile.allowed_domains if agent.browser_profile else [],
+			},
+		)
+class UpdateAgentSessionEvent(BaseEvent):
+	"""Event to update an existing agent session"""
+	# Model fields
+	id: str  # Session ID to update
+	user_id: str = Field(max_length=255)
+	device_id: str | None = Field(None, max_length=255)
+	browser_session_stopped: bool | None = None
+	browser_session_stopped_at: datetime | None = None
+	end_reason: str | None = Field(None, max_length=100)  # Why the session ended

browser_use/agent/gif.py ADDED Viewed

@@ -0,0 +1,424 @@
+from __future__ import annotations
+import base64
+import io
+import logging
+import os
+import platform
+from typing import TYPE_CHECKING
+from browser_use.agent.views import AgentHistoryList
+from browser_use.browser.views import PLACEHOLDER_4PX_SCREENSHOT
+from browser_use.config import CONFIG
+if TYPE_CHECKING:
+	from PIL import Image, ImageFont
+logger = logging.getLogger(__name__)
+def decode_unicode_escapes_to_utf8(text: str) -> str:
+	"""Handle decoding any unicode escape sequences embedded in a string (needed to render non-ASCII languages like chinese or arabic in the GIF overlay text)"""
+	if r'\u' not in text:
+		# doesn't have any escape sequences that need to be decoded
+		return text
+	try:
+		# Try to decode Unicode escape sequences
+		return text.encode('latin1').decode('unicode_escape')
+	except (UnicodeEncodeError, UnicodeDecodeError):
+		# logger.debug(f"Failed to decode unicode escape sequences while generating gif text: {text}")
+		return text
+def create_history_gif(
+	task: str,
+	history: AgentHistoryList,
+	#
+	output_path: str = 'agent_history.gif',
+	duration: int = 3000,
+	show_goals: bool = True,
+	show_task: bool = True,
+	show_logo: bool = False,
+	font_size: int = 40,
+	title_font_size: int = 56,
+	goal_font_size: int = 44,
+	margin: int = 40,
+	line_spacing: float = 1.5,
+) -> None:
+	"""Create a GIF from the agent's history with overlaid task and goal text."""
+	if not history.history:
+		logger.warning('No history to create GIF from')
+		return
+	from PIL import Image, ImageFont
+	images = []
+	# if history is empty, we can't create a gif
+	if not history.history:
+		logger.warning('No history to create GIF from')
+		return
+	# Get all screenshots from history (including None placeholders)
+	screenshots = history.screenshots(return_none_if_not_screenshot=True)
+	if not screenshots:
+		logger.warning('No screenshots found in history')
+		return
+	# Find the first non-placeholder screenshot
+	# A screenshot is considered a placeholder if:
+	# 1. It's the exact 4px placeholder for about:blank pages, OR
+	# 2. It comes from a new tab page (chrome://newtab/, about:blank, etc.)
+	first_real_screenshot = None
+	for screenshot in screenshots:
+		if screenshot and screenshot != PLACEHOLDER_4PX_SCREENSHOT:
+			first_real_screenshot = screenshot
+			break
+	if not first_real_screenshot:
+		logger.warning('No valid screenshots found (all are placeholders or from new tab pages)')
+		return
+	# Try to load nicer fonts
+	try:
+		# Try different font options in order of preference
+		# ArialUni is a font that comes with Office and can render most non-alphabet characters
+		font_options = [
+			'PingFang',
+			'STHeiti Medium',
+			'Microsoft YaHei',  # 微软雅黑
+			'SimHei',  # 黑体
+			'SimSun',  # 宋体
+			'Noto Sans CJK SC',  # 思源黑体
+			'WenQuanYi Micro Hei',  # 文泉驿微米黑
+			'Helvetica',
+			'Arial',
+			'DejaVuSans',
+			'Verdana',
+		]
+		font_loaded = False
+		for font_name in font_options:
+			try:
+				if platform.system() == 'Windows':
+					# Need to specify the abs font path on Windows
+					font_name = os.path.join(CONFIG.WIN_FONT_DIR, font_name + '.ttf')
+				regular_font = ImageFont.truetype(font_name, font_size)
+				title_font = ImageFont.truetype(font_name, title_font_size)
+				goal_font = ImageFont.truetype(font_name, goal_font_size)
+				font_loaded = True
+				break
+			except OSError:
+				continue
+		if not font_loaded:
+			raise OSError('No preferred fonts found')
+	except OSError:
+		regular_font = ImageFont.load_default()
+		title_font = ImageFont.load_default()
+		goal_font = regular_font
+	# Load logo if requested
+	logo = None
+	if show_logo:
+		try:
+			logo = Image.open('./static/browser-use.png')
+			# Resize logo to be small (e.g., 40px height)
+			logo_height = 150
+			aspect_ratio = logo.width / logo.height
+			logo_width = int(logo_height * aspect_ratio)
+			logo = logo.resize((logo_width, logo_height), Image.Resampling.LANCZOS)
+		except Exception as e:
+			logger.warning(f'Could not load logo: {e}')
+	# Create task frame if requested
+	if show_task and task:
+		# Find the first non-placeholder screenshot for the task frame
+		first_real_screenshot = None
+		for item in history.history:
+			screenshot_b64 = item.state.get_screenshot()
+			if screenshot_b64 and screenshot_b64 != PLACEHOLDER_4PX_SCREENSHOT:
+				first_real_screenshot = screenshot_b64
+				break
+		if first_real_screenshot:
+			task_frame = _create_task_frame(
+				task,
+				first_real_screenshot,
+				title_font,  # type: ignore
+				regular_font,  # type: ignore
+				logo,
+				line_spacing,
+			)
+			images.append(task_frame)
+		else:
+			logger.warning('No real screenshots found for task frame, skipping task frame')
+	# Process each history item with its corresponding screenshot
+	for i, (item, screenshot) in enumerate(zip(history.history, screenshots), 1):
+		if not screenshot:
+			continue
+		# Skip placeholder screenshots from about:blank pages
+		# These are 4x4 white PNGs encoded as a specific base64 string
+		if screenshot == PLACEHOLDER_4PX_SCREENSHOT:
+			logger.debug(f'Skipping placeholder screenshot from about:blank page at step {i}')
+			continue
+		# Skip screenshots from new tab pages
+		from browser_use.utils import is_new_tab_page
+		if is_new_tab_page(item.state.url):
+			logger.debug(f'Skipping screenshot from new tab page ({item.state.url}) at step {i}')
+			continue
+		# Convert base64 screenshot to PIL Image
+		img_data = base64.b64decode(screenshot)
+		image = Image.open(io.BytesIO(img_data))
+		if show_goals and item.model_output:
+			image = _add_overlay_to_image(
+				image=image,
+				step_number=i,
+				goal_text=item.model_output.current_state.next_goal,
+				regular_font=regular_font,  # type: ignore
+				title_font=title_font,  # type: ignore
+				margin=margin,
+				logo=logo,
+			)
+		images.append(image)
+	if images:
+		# Save the GIF
+		images[0].save(
+			output_path,
+			save_all=True,
+			append_images=images[1:],
+			duration=duration,
+			loop=0,
+			optimize=False,
+		)
+		logger.info(f'Created GIF at {output_path}')
+	else:
+		logger.warning('No images found in history to create GIF')
+def _create_task_frame(
+	task: str,
+	first_screenshot: str,
+	title_font: ImageFont.FreeTypeFont,
+	regular_font: ImageFont.FreeTypeFont,
+	logo: Image.Image | None = None,
+	line_spacing: float = 1.5,
+) -> Image.Image:
+	"""Create initial frame showing the task."""
+	from PIL import Image, ImageDraw, ImageFont
+	img_data = base64.b64decode(first_screenshot)
+	template = Image.open(io.BytesIO(img_data))
+	image = Image.new('RGB', template.size, (0, 0, 0))
+	draw = ImageDraw.Draw(image)
+	# Calculate vertical center of image
+	center_y = image.height // 2
+	# Draw task text with dynamic font size based on task length
+	margin = 140  # Increased margin
+	max_width = image.width - (2 * margin)
+	# Dynamic font size calculation based on task length
+	# Start with base font size (regular + 16)
+	base_font_size = regular_font.size + 16
+	min_font_size = max(regular_font.size - 10, 16)  # Don't go below 16pt
+	max_font_size = base_font_size  # Cap at the base font size
+	# Calculate dynamic font size based on text length and complexity
+	# Longer texts get progressively smaller fonts
+	text_length = len(task)
+	if text_length > 200:
+		# For very long text, reduce font size logarithmically
+		font_size = max(base_font_size - int(10 * (text_length / 200)), min_font_size)
+	else:
+		font_size = base_font_size
+	# Try to create a larger font, but fall back to regular font if it fails
+	try:
+		larger_font = ImageFont.truetype(regular_font.path, font_size)  # type: ignore
+	except (OSError, AttributeError):
+		# Fall back to regular font if .path is not available or font loading fails
+		larger_font = regular_font
+	# Generate wrapped text with the calculated font size
+	wrapped_text = _wrap_text(task, larger_font, max_width)
+	# Calculate line height with spacing
+	line_height = larger_font.size * line_spacing
+	# Split text into lines and draw with custom spacing
+	lines = wrapped_text.split('\n')
+	total_height = line_height * len(lines)
+	# Start position for first line
+	text_y = center_y - (total_height / 2) + 50  # Shifted down slightly
+	for line in lines:
+		# Get line width for centering
+		line_bbox = draw.textbbox((0, 0), line, font=larger_font)
+		text_x = (image.width - (line_bbox[2] - line_bbox[0])) // 2
+		draw.text(
+			(text_x, text_y),
+			line,
+			font=larger_font,
+			fill=(255, 255, 255),
+		)
+		text_y += line_height
+	# Add logo if provided (top right corner)
+	if logo:
+		logo_margin = 20
+		logo_x = image.width - logo.width - logo_margin
+		image.paste(logo, (logo_x, logo_margin), logo if logo.mode == 'RGBA' else None)
+	return image
+def _add_overlay_to_image(
+	image: Image.Image,
+	step_number: int,
+	goal_text: str,
+	regular_font: ImageFont.FreeTypeFont,
+	title_font: ImageFont.FreeTypeFont,
+	margin: int,
+	logo: Image.Image | None = None,
+	display_step: bool = True,
+	text_color: tuple[int, int, int, int] = (255, 255, 255, 255),
+	text_box_color: tuple[int, int, int, int] = (0, 0, 0, 255),
+) -> Image.Image:
+	"""Add step number and goal overlay to an image."""
+	from PIL import Image, ImageDraw
+	goal_text = decode_unicode_escapes_to_utf8(goal_text)
+	image = image.convert('RGBA')
+	txt_layer = Image.new('RGBA', image.size, (0, 0, 0, 0))
+	draw = ImageDraw.Draw(txt_layer)
+	if display_step:
+		# Add step number (bottom left)
+		step_text = str(step_number)
+		step_bbox = draw.textbbox((0, 0), step_text, font=title_font)
+		step_width = step_bbox[2] - step_bbox[0]
+		step_height = step_bbox[3] - step_bbox[1]
+		# Position step number in bottom left
+		x_step = margin + 10  # Slight additional offset from edge
+		y_step = image.height - margin - step_height - 10  # Slight offset from bottom
+		# Draw rounded rectangle background for step number
+		padding = 20  # Increased padding
+		step_bg_bbox = (
+			x_step - padding,
+			y_step - padding,
+			x_step + step_width + padding,
+			y_step + step_height + padding,
+		)
+		draw.rounded_rectangle(
+			step_bg_bbox,
+			radius=15,  # Add rounded corners
+			fill=text_box_color,
+		)
+		# Draw step number
+		draw.text(
+			(x_step, y_step),
+			step_text,
+			font=title_font,
+			fill=text_color,
+		)
+	# Draw goal text (centered, bottom)
+	max_width = image.width - (4 * margin)
+	wrapped_goal = _wrap_text(goal_text, title_font, max_width)
+	goal_bbox = draw.multiline_textbbox((0, 0), wrapped_goal, font=title_font)
+	goal_width = goal_bbox[2] - goal_bbox[0]
+	goal_height = goal_bbox[3] - goal_bbox[1]
+	# Center goal text horizontally, place above step number
+	x_goal = (image.width - goal_width) // 2
+	y_goal = y_step - goal_height - padding * 4  # More space between step and goal
+	# Draw rounded rectangle background for goal
+	padding_goal = 25  # Increased padding for goal
+	goal_bg_bbox = (
+		x_goal - padding_goal,  # Remove extra space for logo
+		y_goal - padding_goal,
+		x_goal + goal_width + padding_goal,
+		y_goal + goal_height + padding_goal,
+	)
+	draw.rounded_rectangle(
+		goal_bg_bbox,
+		radius=15,  # Add rounded corners
+		fill=text_box_color,
+	)
+	# Draw goal text
+	draw.multiline_text(
+		(x_goal, y_goal),
+		wrapped_goal,
+		font=title_font,
+		fill=text_color,
+		align='center',
+	)
+	# Add logo if provided (top right corner)
+	if logo:
+		logo_layer = Image.new('RGBA', image.size, (0, 0, 0, 0))
+		logo_margin = 20
+		logo_x = image.width - logo.width - logo_margin
+		logo_layer.paste(logo, (logo_x, logo_margin), logo if logo.mode == 'RGBA' else None)
+		txt_layer = Image.alpha_composite(logo_layer, txt_layer)
+	# Composite and convert
+	result = Image.alpha_composite(image, txt_layer)
+	return result.convert('RGB')
+def _wrap_text(text: str, font: ImageFont.FreeTypeFont, max_width: int) -> str:
+	"""
+	Wrap text to fit within a given width.
+	Args:
+	    text: Text to wrap
+	    font: Font to use for text
+	    max_width: Maximum width in pixels
+	Returns:
+	    Wrapped text with newlines
+	"""
+	text = decode_unicode_escapes_to_utf8(text)
+	words = text.split()
+	lines = []
+	current_line = []
+	for word in words:
+		current_line.append(word)
+		line = ' '.join(current_line)
+		bbox = font.getbbox(line)
+		if bbox[2] > max_width:
+			if len(current_line) == 1:
+				lines.append(current_line.pop())
+			else:
+				current_line.pop()
+				lines.append(' '.join(current_line))
+				current_line = [word]
+	if current_line:
+		lines.append(' '.join(current_line))
+	return '\n'.join(lines)