PyPI - firecrawl - Versions diffs - 4.12.0__py3-none-any.whl - Mend

firecrawl 4.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

firecrawl/__init__.py +87 -0
firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
firecrawl/__tests__/e2e/v2/conftest.py +73 -0
firecrawl/__tests__/e2e/v2/test_async.py +73 -0
firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
firecrawl/__tests__/e2e/v2/test_map.py +61 -0
firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
firecrawl/__tests__/e2e/v2/test_search.py +270 -0
firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
firecrawl/client.py +281 -0
firecrawl/firecrawl.backup.py +4635 -0
firecrawl/types.py +167 -0
firecrawl/v1/__init__.py +14 -0
firecrawl/v1/client.py +5164 -0
firecrawl/v2/__init__.py +4 -0
firecrawl/v2/client.py +967 -0
firecrawl/v2/client_async.py +408 -0
firecrawl/v2/methods/agent.py +144 -0
firecrawl/v2/methods/aio/__init__.py +1 -0
firecrawl/v2/methods/aio/agent.py +137 -0
firecrawl/v2/methods/aio/batch.py +188 -0
firecrawl/v2/methods/aio/crawl.py +351 -0
firecrawl/v2/methods/aio/extract.py +133 -0
firecrawl/v2/methods/aio/map.py +65 -0
firecrawl/v2/methods/aio/scrape.py +33 -0
firecrawl/v2/methods/aio/search.py +176 -0
firecrawl/v2/methods/aio/usage.py +89 -0
firecrawl/v2/methods/batch.py +499 -0
firecrawl/v2/methods/crawl.py +592 -0
firecrawl/v2/methods/extract.py +161 -0
firecrawl/v2/methods/map.py +83 -0
firecrawl/v2/methods/scrape.py +64 -0
firecrawl/v2/methods/search.py +215 -0
firecrawl/v2/methods/usage.py +84 -0
firecrawl/v2/types.py +1143 -0
firecrawl/v2/utils/__init__.py +9 -0
firecrawl/v2/utils/error_handler.py +107 -0
firecrawl/v2/utils/get_version.py +15 -0
firecrawl/v2/utils/http_client.py +178 -0
firecrawl/v2/utils/http_client_async.py +69 -0
firecrawl/v2/utils/normalize.py +125 -0
firecrawl/v2/utils/validation.py +692 -0
firecrawl/v2/watcher.py +301 -0
firecrawl/v2/watcher_async.py +243 -0
firecrawl-4.12.0.dist-info/METADATA +234 -0
firecrawl-4.12.0.dist-info/RECORD +92 -0
firecrawl-4.12.0.dist-info/WHEEL +5 -0
firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
firecrawl-4.12.0.dist-info/top_level.txt +2 -0
tests/test_agent_integration.py +277 -0
tests/test_api_key_handling.py +44 -0
tests/test_change_tracking.py +98 -0
tests/test_timeout_conversion.py +117 -0

firecrawl/__tests__/e2e/v2/test_crawl.py ADDED Viewed

@@ -0,0 +1,278 @@
+import pytest
+import time
+import os
+from dotenv import load_dotenv
+from firecrawl import Firecrawl
+from firecrawl.v2.types import ScrapeOptions
+load_dotenv()
+if not os.getenv("API_KEY"):
+    raise ValueError("API_KEY is not set")
+if not os.getenv("API_URL"):
+    raise ValueError("API_URL is not set")
+class TestCrawlE2E:
+    """End-to-end tests for crawl functionality."""
+    def setup_method(self):
+        """Set up test client."""
+        self.client = Firecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
+    def test_start_crawl_minimal_request(self):
+        """Test starting a crawl with minimal parameters."""
+        crawl_job = self.client.start_crawl("https://docs.firecrawl.dev", limit=3)
+        assert crawl_job.id is not None
+        assert crawl_job.url is not None
+    def test_start_crawl_with_options(self):
+        """Test starting a crawl with options."""
+        crawl_job = self.client.start_crawl(
+            "https://docs.firecrawl.dev",
+            limit=5,
+            max_discovery_depth=2
+        )
+        assert crawl_job.id is not None
+        assert crawl_job.url is not None
+    def test_start_crawl_with_prompt(self):
+        """Test starting a crawl with prompt."""
+        crawl_job = self.client.start_crawl(
+            "https://firecrawl.dev",
+            prompt="Extract all blog posts",
+            limit=3
+        )
+        assert crawl_job.id is not None
+        assert crawl_job.url is not None
+    def test_get_crawl_status(self):
+        """Test getting crawl status."""
+        # First start a crawl
+        start_job = self.client.start_crawl("https://docs.firecrawl.dev", limit=3)
+        assert start_job.id is not None
+        job_id = start_job.id
+        # Get status
+        status_job = self.client.get_crawl_status(job_id)
+        assert status_job.status in ["scraping", "completed", "failed"]
+        assert status_job.completed >= 0
+        assert status_job.expires_at is not None
+        assert status_job.next is None
+        assert isinstance(status_job.data, list)
+    def test_cancel_crawl(self):
+        """Test canceling a crawl."""
+        start_job = self.client.start_crawl("https://docs.firecrawl.dev", limit=3)
+        assert start_job.id is not None
+        job_id = start_job.id
+        cancel_job = self.client.cancel_crawl(job_id)
+        time.sleep(5)
+        assert cancel_job == True
+    def test_get_crawl_errors(self):
+        """Test getting crawl errors."""
+        # First start a crawl
+        start_job = self.client.start_crawl("https://docs.firecrawl.dev", limit=3)
+        assert start_job.id is not None
+        job_id = start_job.id
+        # Get errors (should work even if no errors exist)
+        errors_response = self.client.get_crawl_errors(job_id)
+        # Verify the response structure
+        assert hasattr(errors_response, 'errors')
+        assert hasattr(errors_response, 'robots_blocked')
+        assert isinstance(errors_response.errors, list)
+        assert isinstance(errors_response.robots_blocked, list)
+        # Errors list should contain dictionaries with expected fields
+        for error in errors_response.errors:
+            assert isinstance(error, dict)
+            assert 'id' in error
+            assert 'timestamp' in error
+            assert 'url' in error
+            assert 'error' in error
+            assert isinstance(error['id'], str)
+            assert isinstance(error['timestamp'], str)
+            assert isinstance(error['url'], str)
+            assert isinstance(error['error'], str)
+        # Robots blocked should be a list of strings
+        for blocked_url in errors_response.robots_blocked:
+            assert isinstance(blocked_url, str)
+    def test_get_crawl_errors_with_invalid_job_id(self):
+        """Test getting crawl errors with an invalid job ID."""
+        with pytest.raises(Exception):
+            self.client.get_crawl_errors("invalid-job-id-12345")
+    def test_get_active_crawls(self):
+        """Test getting active crawls."""
+        # Get active crawls
+        active_crawls_response = self.client.active_crawls()
+        # Verify the response structure
+        assert hasattr(active_crawls_response, 'success')
+        assert hasattr(active_crawls_response, 'crawls')
+        assert isinstance(active_crawls_response.success, bool)
+        assert isinstance(active_crawls_response.crawls, list)
+        # Each crawl should have the required fields
+        for crawl in active_crawls_response.crawls:
+            assert hasattr(crawl, 'id')
+            assert hasattr(crawl, 'team_id')
+            assert hasattr(crawl, 'url')
+            assert isinstance(crawl.id, str)
+            assert isinstance(crawl.team_id, str)
+            assert isinstance(crawl.url, str)
+            # Options field is optional but if present should be a dict
+            if hasattr(crawl, 'options') and crawl.options is not None:
+                assert isinstance(crawl.options, dict)
+    def test_get_active_crawls_with_running_crawl(self):
+        """Test getting active crawls when there's a running crawl."""
+        # Start a crawl
+        start_job = self.client.start_crawl("https://docs.firecrawl.dev", limit=5)
+        assert start_job.id is not None
+        # Get active crawls
+        active_crawls_response = self.client.active_crawls()
+        # Verify the response structure
+        assert hasattr(active_crawls_response, 'success')
+        assert hasattr(active_crawls_response, 'crawls')
+        assert isinstance(active_crawls_response.success, bool)
+        assert isinstance(active_crawls_response.crawls, list)
+        # The started crawl should be in the active crawls list
+        active_crawl_ids = [crawl.id for crawl in active_crawls_response.crawls]
+        assert start_job.id in active_crawl_ids
+        # Cancel the crawl to clean up
+        self.client.cancel_crawl(start_job.id)
+    def test_crawl_with_wait(self):
+        """Test crawl with wait for completion."""
+        crawl_job = self.client.crawl(
+            "docs.firecrawl.dev",
+            limit=3,
+            max_discovery_depth=2,
+            poll_interval=1,
+            timeout=120,
+            integration="_e2e-test",
+        )
+        assert crawl_job.status in ["completed", "failed"]
+        assert crawl_job.completed >= 0
+        assert crawl_job.total >= 0
+        assert isinstance(crawl_job.data, list)
+    def test_crawl_with_prompt_and_wait(self):
+        """Test crawl with prompt and wait for completion."""
+        crawl_job = self.client.crawl(
+            "https://docs.firecrawl.dev",
+            prompt="Extract all blog posts",
+            limit=3,
+            poll_interval=1,
+            timeout=120
+        )
+        assert crawl_job.status in ["completed", "failed"]
+        assert crawl_job.completed >= 0
+        assert crawl_job.total >= 0
+        assert isinstance(crawl_job.data, list)
+    def test_crawl_with_scrape_options(self):
+        """Test crawl with scrape options."""
+        scrape_opts = ScrapeOptions(
+            formats=["markdown", "links"],
+            only_main_content=False,
+            mobile=True,
+        )
+        crawl_job = self.client.start_crawl(
+            "https://docs.firecrawl.dev",
+            limit=2,
+            scrape_options=scrape_opts
+        )
+        assert crawl_job.id is not None
+    def test_crawl_with_json_format_object(self):
+        """Crawl with scrape_options including a JSON format object (prompt + schema)."""
+        json_schema = {
+            "type": "object",
+            "properties": {
+                "title": {"type": "string"}
+            },
+            "required": ["title"],
+        }
+        scrape_opts = ScrapeOptions(
+            formats=[{"type": "json", "prompt": "Extract page title", "schema": json_schema}]
+        )
+        crawl_job = self.client.start_crawl(
+            "https://docs.firecrawl.dev",
+            limit=2,
+            scrape_options=scrape_opts
+        )
+        assert crawl_job.id is not None
+    def test_crawl_all_parameters(self):
+        """Test crawl with all possible parameters."""
+        scrape_opts = ScrapeOptions(
+            formats=["markdown", "html"],
+            headers={"User-Agent": "Test Bot"},
+            include_tags=["h1", "h2"],
+            exclude_tags=["nav"],
+            only_main_content=False,
+            timeout=15000,
+            wait_for=2000,
+            mobile=True,
+            skip_tls_verification=True,
+            remove_base64_images=False
+        )
+        crawl_job = self.client.start_crawl(
+            "https://docs.firecrawl.dev",
+            prompt="Extract all blog posts and documentation",
+            include_paths=["/blog/*", "/docs/*"],
+            exclude_paths=["/admin/*"],
+            max_discovery_depth=3,
+            ignore_sitemap=False,
+            ignore_query_parameters=True,
+            limit=5,
+            crawl_entire_domain=True,
+            allow_external_links=False,
+            allow_subdomains=True,
+            delay=1,
+            max_concurrency=2,
+            webhook="https://example.com/hook",
+            scrape_options=scrape_opts,
+            zero_data_retention=False,
+            integration="_e2e-test",
+        )
+        assert crawl_job.id is not None
+    def test_crawl_params_preview(self):
+        """Test crawl_params function."""
+        params_data = self.client.crawl_params_preview(
+            "https://docs.firecrawl.dev",
+            "Extract all blog posts and documentation"
+        )
+        assert params_data is not None
+        assert params_data.limit is not None or params_data.include_paths is not None or params_data.max_discovery_depth is not None
+        assert 'blog/.*' in params_data.include_paths
+        assert 'docs/.*' in params_data.include_paths

firecrawl/__tests__/e2e/v2/test_extract.py ADDED Viewed

@@ -0,0 +1,55 @@
+import os
+from dotenv import load_dotenv
+from firecrawl import Firecrawl
+load_dotenv()
+if not os.getenv("API_KEY"):
+    raise ValueError("API_KEY is not set")
+if not os.getenv("API_URL"):
+    raise ValueError("API_URL is not set")
+class TestExtractE2E:
+    """E2E tests for v2 client extract (proxied to v1)."""
+    def setup_method(self):
+        self.client = Firecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
+    def test_extract_minimal_with_prompt(self):
+        resp = self.client.extract(
+            urls=["https://docs.firecrawl.dev"],
+            prompt="Extract the main page title",
+        )
+        assert hasattr(resp, "success")
+        assert resp.success is True or resp.success is False
+        # data may be None if backend omits; presence depends on implementation
+    def test_extract_with_schema(self):
+        schema = {
+            "type": "object",
+            "properties": {
+                "title": {"type": "string"}
+            },
+            "required": ["title"],
+        }
+        resp = self.client.extract(
+            urls=["https://docs.firecrawl.dev"],
+            schema=schema,
+            prompt="Extract the main page title",
+            show_sources=True,
+            enable_web_search=False,
+            integration="_e2e-test",
+        )
+        assert hasattr(resp, "success")
+        # if backend includes sources, ensure structure is a dict (do not fail if omitted)
+        if hasattr(resp, "sources") and resp.sources is not None:
+            assert isinstance(resp.sources, dict)
+        # check if resp.data schema is equal to schema
+        assert isinstance(resp.data, dict)
+        assert resp.data["title"] is not None

firecrawl/__tests__/e2e/v2/test_map.py ADDED Viewed

@@ -0,0 +1,61 @@
+import os
+from dotenv import load_dotenv
+from firecrawl import Firecrawl
+load_dotenv()
+if not os.getenv("API_KEY"):
+    raise ValueError("API_KEY is not set")
+if not os.getenv("API_URL"):
+    raise ValueError("API_URL is not set")
+import pytest
+class TestMapE2E:
+    """End-to-end tests for map functionality (v2)."""
+    def setup_method(self):
+        self.client = Firecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
+    def test_map_minimal_request(self):
+        resp = self.client.map("https://docs.firecrawl.dev")
+        assert hasattr(resp, "links") and resp.links is not None
+        assert isinstance(resp.links, list)
+        # Basic sanity checks on at least one link
+        if len(resp.links) > 0:
+            first = resp.links[0]
+            assert hasattr(first, "url")
+            assert isinstance(first.url, str) and first.url.startswith("http")
+    @pytest.mark.parametrize(
+        "sitemap",
+        [
+            "only",
+            "skip",
+            "include",
+        ],
+    )
+    def test_map_with_options(self, sitemap):
+        resp = self.client.map(
+            "https://docs.firecrawl.dev",
+            search="docs",
+            include_subdomains=True,            limit=10,
+            sitemap=sitemap,
+            timeout=15000,
+            integration="_e2e-test",
+        )
+        assert hasattr(resp, "links") and isinstance(resp.links, list)
+        # Limit should be respected (server-side)
+        assert len(resp.links) <= 10
+        for link in resp.links:
+            assert hasattr(link, "url")
+            assert isinstance(link.url, str) and link.url.startswith("http")

firecrawl/__tests__/e2e/v2/test_scrape.py ADDED Viewed

@@ -0,0 +1,191 @@
+import os
+import pytest
+from dotenv import load_dotenv
+from firecrawl import Firecrawl
+import json as _json
+import pytest
+from firecrawl.v2.types import Viewport, ScreenshotAction, Document
+load_dotenv()
+if not os.getenv("API_KEY"):
+    raise ValueError("API_KEY is not set")
+if not os.getenv("API_URL"):
+    raise ValueError("API_URL is not set")
+class TestScrapeE2E:
+    """End-to-end tests for scrape functionality (v2)."""
+    def setup_method(self):
+        self.client = Firecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
+    def _assert_valid_document(self, doc: Document):
+        assert isinstance(doc, Document)
+        # At least one main content field should be present
+        assert (
+            (doc.markdown is not None and len(doc.markdown) > 0)
+            or (doc.html is not None and len(doc.html) > 0)
+            or (doc.raw_html is not None and len(doc.raw_html) > 0)
+            or (doc.summary is not None and len(doc.summary) > 0)
+        )
+        # Metadata should exist with a source URL or title when available
+        assert doc.metadata is not None
+    def test_scrape_minimal(self):
+        """Scrape a URL with minimal parameters and return a document."""
+        doc = self.client.scrape("https://docs.firecrawl.dev")
+        self._assert_valid_document(doc)
+    def test_scrape_with_options_markdown(self):
+        """Scrape with simple markdown format and options passed as kwargs."""
+        doc = self.client.scrape(
+            "https://docs.firecrawl.dev",
+            formats=["markdown"],
+            only_main_content=False,
+            mobile=False,
+        )
+        self._assert_valid_document(doc)
+    def test_scrape_with_screenshot_action_viewport(self):
+        """Scrape with a screenshot action including viewport passed as kwargs."""
+        viewport = Viewport(width=800, height=600)
+        action = ScreenshotAction(full_page=False, quality=80, viewport=viewport)
+        doc = self.client.scrape(
+            "https://docs.firecrawl.dev",
+            formats=["markdown"],
+            actions=[action],
+        )
+        self._assert_valid_document(doc)
+    @pytest.mark.parametrize("fmt,expect_field", [
+        ("markdown", "markdown"),
+        ("html", "html"),
+        ("raw_html", "raw_html"),
+        ("links", "links"),
+        ("screenshot", "screenshot"),
+        ("summary", "summary"),
+    ])
+    def test_scrape_basic_formats(self, fmt, expect_field):
+        """Verify basic formats request succeeds and expected fields are present when applicable."""
+        doc = self.client.scrape(
+            "https://docs.firecrawl.dev",
+            formats=[fmt],
+        )
+        # For formats that are not content (links/screenshot/json), skip main-content assertion
+        if expect_field not in {"links", "screenshot"}:
+            self._assert_valid_document(doc)
+        if expect_field == "markdown":
+            assert doc.markdown is not None
+        elif expect_field == "html":
+            assert doc.html is not None
+        elif expect_field == "raw_html":
+            assert doc.raw_html is not None
+        elif expect_field == "screenshot":
+            assert doc.screenshot is not None
+        elif expect_field == "links":
+            assert isinstance(doc.links, list)
+            assert len(doc.links) > 0
+    def test_scrape_with_json_format_object(self):
+        """Scrape with JSON format object (requires prompt and schema)."""
+        json_schema = {
+            "type": "object",
+            "properties": {
+                "title": {"type": "string"}
+            },
+            "required": ["title"],
+        }
+        doc = self.client.scrape(
+            "https://docs.firecrawl.dev",
+            formats=[{"type": "json", "prompt": "Extract page title", "schema": json_schema}],
+            only_main_content=True,
+        )
+        # JSON format may not include main content fields; ensure request succeeded
+        assert isinstance(doc, Document)
+        # If backend returns extracted json content, it should be present under `json`
+        # (Do not fail if backend omits it; existence depends on implementation)
+        # if hasattr(doc, 'json'):
+        #     assert doc.json is not None
+    def test_scrape_invalid_url(self):
+        """Scrape should fail with empty or invalid URLs."""
+        with pytest.raises(ValueError, match="URL cannot be empty"):
+            self.client.scrape("")
+        with pytest.raises(ValueError, match="URL cannot be empty"):
+            self.client.scrape("   ")
+    def test_scrape_with_all_params(self):
+        """Comprehensive scrape using multiple formats and options."""
+        json_schema = {
+            "type": "object",
+            "properties": {"title": {"type": "string"}},
+            "required": ["title"],
+        }
+        doc = self.client.scrape(
+            "https://docs.firecrawl.dev",
+            formats=[
+                "markdown",
+                "raw_html",
+                {"type": "screenshot", "full_page": False, "quality": 70},
+                {"type": "json", "prompt": "Extract title", "schema": json_schema},
+                {"type": "summary" },
+            ],
+            headers={"User-Agent": "E2E"},
+            include_tags=["main"],
+            exclude_tags=["nav"],
+            only_main_content=True,
+            timeout=20000,
+            wait_for=500,
+            mobile=False,
+            parsers=["pdf"],
+            actions=[],
+            skip_tls_verification=False,
+            remove_base64_images=False,
+            fast_mode=False,
+            use_mock=None,
+            block_ads=False,
+            proxy="basic",
+            max_age=0,
+            store_in_cache=False,
+            integration="_e2e-test",
+        )
+        assert isinstance(doc, Document)
+    def test_scrape_images_format(self):
+        """Test images format extraction."""
+        doc = self.client.scrape(
+            "https://firecrawl.dev",
+            formats=["images"]
+        )
+        assert isinstance(doc, Document)
+        assert doc.images is not None
+        assert isinstance(doc.images, list)
+        assert len(doc.images) > 0
+        # Should find firecrawl logo/branding images
+        assert any("firecrawl" in img.lower() or "logo" in img.lower() for img in doc.images)
+    def test_scrape_images_with_multiple_formats(self):
+        """Test images format works with other formats."""
+        doc = self.client.scrape(
+            "https://github.com",
+            formats=["markdown", "links", "images"]
+        )
+        assert isinstance(doc, Document)
+        assert doc.markdown is not None
+        assert doc.links is not None
+        assert doc.images is not None
+        assert isinstance(doc.images, list)
+        assert len(doc.images) > 0
+        # Images should find content not available in links format
+        image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.ico']
+        link_images = [
+            link for link in (doc.links or [])
+            if any(ext in link.lower() for ext in image_extensions)
+        ]
+        # Should discover additional images beyond those with obvious extensions
+        assert len(doc.images) >= len(link_images)