PyPI - firecrawl - Versions diffs - 3.0.3__tar.gz → 3.1.0__tar.gz - Mend

firecrawl 3.0.3tar.gz → 3.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of firecrawl might be problematic. Click here for more details.

Files changed (85) hide show

{firecrawl-3.0.3 → firecrawl-3.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: firecrawl
-Version: 3.0.3
+Version: 3.1.0
 Summary: Python SDK for Firecrawl API
 Home-page: https://github.com/firecrawl/firecrawl
 Author: Mendable.ai

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl/__init__.py RENAMED Viewed

@@ -17,7 +17,7 @@ from .v1 import (
     V1ChangeTrackingOptions,
 )
-__version__ = "3.0.3"
+__version__ = "3.1.0"
 # Define the logger for the Firecrawl project
 logger: logging.Logger = logging.getLogger("firecrawl")

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py RENAMED Viewed

@@ -96,7 +96,6 @@ async def test_async_get_crawl_status_shape():
     assert status.status in ("scraping", "completed", "failed")
     assert status.completed >= 0
     assert status.expires_at is not None
-    assert status.next is not None
     assert isinstance(status.data, list)

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl/client.py RENAMED Viewed

@@ -25,6 +25,7 @@ import logging
 from .v1 import V1FirecrawlApp, AsyncV1FirecrawlApp
 from .v2 import FirecrawlClient as V2FirecrawlClient
 from .v2.client_async import AsyncFirecrawlClient
+from .v2.types import Document
 logger = logging.getLogger("firecrawl")

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl/v2/methods/aio/crawl.py RENAMED Viewed

@@ -14,6 +14,7 @@ from ...types import (
 from ...utils.error_handler import handle_response_error
 from ...utils.validation import prepare_scrape_options
 from ...utils.http_client_async import AsyncHttpClient
+from ...utils.normalize import normalize_document_input
 def _prepare_crawl_request(request: CrawlRequest) -> dict:
@@ -76,11 +77,7 @@ async def get_crawl_status(client: AsyncHttpClient, job_id: str) -> CrawlJob:
         documents = []
         for doc_data in body.get("data", []):
             if isinstance(doc_data, dict):
-                normalized = dict(doc_data)
-                if 'rawHtml' in normalized and 'raw_html' not in normalized:
-                    normalized['raw_html'] = normalized.pop('rawHtml')
-                if 'changeTracking' in normalized and 'change_tracking' not in normalized:
-                    normalized['change_tracking'] = normalized.pop('changeTracking')
+                normalized = normalize_document_input(doc_data)
                 documents.append(Document(**normalized))
         return CrawlJob(
             status=body.get("status"),

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl/v2/methods/aio/scrape.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from typing import Optional, Dict, Any
 from ...types import ScrapeOptions, Document
+from ...utils.normalize import normalize_document_input
 from ...utils.error_handler import handle_response_error
 from ...utils.validation import prepare_scrape_options, validate_scrape_options
 from ...utils.http_client_async import AsyncHttpClient
@@ -27,10 +28,6 @@ async def scrape(client: AsyncHttpClient, url: str, options: Optional[ScrapeOpti
     if not body.get("success"):
         raise Exception(body.get("error", "Unknown error occurred"))
     document_data = body.get("data", {})
-    normalized = dict(document_data)
-    if 'rawHtml' in normalized and 'raw_html' not in normalized:
-        normalized['raw_html'] = normalized.pop('rawHtml')
-    if 'changeTracking' in normalized and 'change_tracking' not in normalized:
-        normalized['change_tracking'] = normalized.pop('changeTracking')
+    normalized = normalize_document_input(document_data)
     return Document(**normalized)

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl/v2/methods/aio/search.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from typing import Dict, Any
 from ...types import SearchRequest, SearchData, SearchResult, Document
+from ...utils.normalize import normalize_document_input
 from ...utils.http_client_async import AsyncHttpClient
 from ...utils.error_handler import handle_response_error
 from ...utils.validation import prepare_scrape_options, validate_scrape_options
@@ -38,11 +39,7 @@ async def search(client: AsyncHttpClient, request: SearchRequest) -> SearchData:
                     if request.scrape_options is not None and any(
                         key in doc_data for key in ['markdown', 'html', 'rawHtml', 'links', 'summary', 'screenshot', 'changeTracking']
                     ):
-                        normalized = dict(doc_data)
-                        if 'rawHtml' in normalized and 'raw_html' not in normalized:
-                            normalized['raw_html'] = normalized.pop('rawHtml')
-                        if 'changeTracking' in normalized and 'change_tracking' not in normalized:
-                            normalized['change_tracking'] = normalized.pop('changeTracking')
+                        normalized = normalize_document_input(doc_data)
                         results.append(Document(**normalized))
                     else:
                         results.append(SearchResult(

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl/v2/methods/batch.py RENAMED Viewed

@@ -13,6 +13,7 @@ from ..types import (
     WebhookConfig,
 )
 from ..utils import HttpClient, handle_response_error, validate_scrape_options, prepare_scrape_options
+from ..utils.normalize import normalize_document_input
 from ..types import CrawlErrorsResponse
@@ -107,11 +108,7 @@ def get_batch_scrape_status(
     documents: List[Document] = []
     for doc in body.get("data", []) or []:
         if isinstance(doc, dict):
-            normalized = dict(doc)
-            if 'rawHtml' in normalized and 'raw_html' not in normalized:
-                normalized['raw_html'] = normalized.pop('rawHtml')
-            if 'changeTracking' in normalized and 'change_tracking' not in normalized:
-                normalized['change_tracking'] = normalized.pop('changeTracking')
+            normalized = normalize_document_input(doc)
             documents.append(Document(**normalized))
     return BatchScrapeJob(

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl/v2/methods/crawl.py RENAMED Viewed

@@ -11,6 +11,7 @@ from ..types import (
     WebhookConfig, CrawlErrorsResponse, ActiveCrawlsResponse, ActiveCrawl
 )
 from ..utils import HttpClient, handle_response_error, validate_scrape_options, prepare_scrape_options
+from ..utils.normalize import normalize_document_input
 def _validate_crawl_request(request: CrawlRequest) -> None:
@@ -173,7 +174,7 @@ def get_crawl_status(client: HttpClient, job_id: str) -> CrawlJob:
                 # but we'll handle it gracefully
                 continue
             else:
-                documents.append(Document(**doc_data))
+                documents.append(Document(**normalize_document_input(doc_data)))
         # Create CrawlJob with current status and data
         return CrawlJob(

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl/v2/methods/scrape.py RENAMED Viewed

@@ -4,6 +4,7 @@ Scraping functionality for Firecrawl v2 API.
 from typing import Optional, Dict, Any
 from ..types import ScrapeOptions, Document
+from ..utils.normalize import normalize_document_input
 from ..utils import HttpClient, handle_response_error, prepare_scrape_options, validate_scrape_options
@@ -59,10 +60,5 @@ def scrape(client: HttpClient, url: str, options: Optional[ScrapeOptions] = None
         raise Exception(body.get("error", "Unknown error occurred"))
     document_data = body.get("data", {})
-    # Normalize keys for Document (no Pydantic aliases)
-    normalized = dict(document_data)
-    if 'rawHtml' in normalized and 'raw_html' not in normalized:
-        normalized['raw_html'] = normalized.pop('rawHtml')
-    if 'changeTracking' in normalized and 'change_tracking' not in normalized:
-        normalized['change_tracking'] = normalized.pop('changeTracking')
+    normalized = normalize_document_input(document_data)
     return Document(**normalized)

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl/v2/methods/search.py RENAMED Viewed

@@ -4,6 +4,7 @@ Search functionality for Firecrawl v2 API.
 from typing import Optional, Dict, Any, Union
 from ..types import SearchRequest, SearchData, SearchResult, Document
+from ..utils.normalize import normalize_document_input
 from ..utils import HttpClient, handle_response_error, validate_scrape_options, prepare_scrape_options
@@ -50,12 +51,7 @@ def search(
                     if request.scrape_options is not None and any(
                         key in doc_data for key in ['markdown', 'html', 'rawHtml', 'links', 'summary', 'screenshot', 'changeTracking']
                     ):
-                        # Normalize keys for Document (no Pydantic aliases)
-                        normalized = dict(doc_data)
-                        if 'rawHtml' in normalized and 'raw_html' not in normalized:
-                            normalized['raw_html'] = normalized.pop('rawHtml')
-                        if 'changeTracking' in normalized and 'change_tracking' not in normalized:
-                            normalized['change_tracking'] = normalized.pop('changeTracking')
+                        normalized = normalize_document_input(doc_data)
                         results.append(Document(**normalized))
                     else:
                         # Minimal search result shape

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl/v2/types.py RENAMED Viewed

@@ -7,7 +7,8 @@ This module contains clean, modern type definitions for the v2 API.
 import warnings
 from datetime import datetime
 from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union
-from pydantic import BaseModel, Field, field_validator
+import logging
+from pydantic import BaseModel, Field, field_validator, ValidationError
 # Suppress pydantic warnings about schema field shadowing
 # Tested using schema_field alias="schema" but it doesn't work.
@@ -19,6 +20,9 @@ warnings.filterwarnings("ignore", message="Field name \"json\" in \"Document\" s
 T = TypeVar('T')
+# Module logger
+logger = logging.getLogger("firecrawl")
 # Base response types
 class BaseResponse(BaseModel, Generic[T]):
     """Base response structure for all API responses."""
@@ -29,18 +33,57 @@ class BaseResponse(BaseModel, Generic[T]):
 # Document and content types
 class DocumentMetadata(BaseModel):
-    """Metadata for scraped documents."""
+    """Metadata for scraped documents (snake_case only; API camelCase normalized in code)."""
+    # Common metadata fields
     title: Optional[str] = None
     description: Optional[str] = None
+    url: Optional[str] = None
     language: Optional[str] = None
     keywords: Optional[Union[str, List[str]]] = None
     robots: Optional[str] = None
+    # OpenGraph and social metadata
     og_title: Optional[str] = None
     og_description: Optional[str] = None
     og_url: Optional[str] = None
     og_image: Optional[str] = None
+    og_audio: Optional[str] = None
+    og_determiner: Optional[str] = None
+    og_locale: Optional[str] = None
+    og_locale_alternate: Optional[List[str]] = None
+    og_site_name: Optional[str] = None
+    og_video: Optional[str] = None
+    # Dublin Core and other site metadata
+    favicon: Optional[str] = None
+    dc_terms_created: Optional[str] = None
+    dc_date_created: Optional[str] = None
+    dc_date: Optional[str] = None
+    dc_terms_type: Optional[str] = None
+    dc_type: Optional[str] = None
+    dc_terms_audience: Optional[str] = None
+    dc_terms_subject: Optional[str] = None
+    dc_subject: Optional[str] = None
+    dc_description: Optional[str] = None
+    dc_terms_keywords: Optional[str] = None
+    modified_time: Optional[str] = None
+    published_time: Optional[str] = None
+    article_tag: Optional[str] = None
+    article_section: Optional[str] = None
+    # Response-level metadata
     source_url: Optional[str] = None
     status_code: Optional[int] = None
+    scrape_id: Optional[str] = None
+    num_pages: Optional[int] = None
+    content_type: Optional[str] = None
+    proxy_used: Optional[Literal["basic", "stealth"]] = None
+    cache_state: Optional[Literal["hit", "miss"]] = None
+    cached_at: Optional[str] = None
+    credits_used: Optional[int] = None
+    # Error information
     error: Optional[str] = None
     @staticmethod
@@ -85,6 +128,29 @@ class Document(BaseModel):
     warning: Optional[str] = None
     change_tracking: Optional[Dict[str, Any]] = None
+    @property
+    def metadata_typed(self) -> DocumentMetadata:
+        """Always returns a DocumentMetadata instance for LSP-friendly access."""
+        md = self.metadata
+        if isinstance(md, DocumentMetadata):
+            return md
+        if isinstance(md, dict):
+            try:
+                return DocumentMetadata(**md)
+            except (ValidationError, TypeError) as exc:
+                logger.debug("Failed to construct DocumentMetadata from dict: %s", exc)
+        return DocumentMetadata()
+    @property
+    def metadata_dict(self) -> Dict[str, Any]:
+        """Returns metadata as a plain dict (exclude None)."""
+        md = self.metadata
+        if isinstance(md, DocumentMetadata):
+            return md.model_dump(exclude_none=True)
+        if isinstance(md, dict):
+            return {k: v for k, v in md.items() if v is not None}
+        return {}
 # Webhook types
 class WebhookConfig(BaseModel):
     """Configuration for webhooks."""

firecrawl-3.1.0/firecrawl/v2/utils/normalize.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""
+Normalization helpers for v2 API payloads to avoid relying on Pydantic aliases.
+"""
+from typing import Any, Dict, List
+from ..types import DocumentMetadata
+def _map_metadata_keys(md: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Convert API v2 camelCase metadata keys to snake_case expected by DocumentMetadata.
+    Leaves unknown keys as-is.
+    """
+    mapping = {
+        # OpenGraph
+        "ogTitle": "og_title",
+        "ogDescription": "og_description",
+        "ogUrl": "og_url",
+        "ogImage": "og_image",
+        "ogAudio": "og_audio",
+        "ogDeterminer": "og_determiner",
+        "ogLocale": "og_locale",
+        "ogLocaleAlternate": "og_locale_alternate",
+        "ogSiteName": "og_site_name",
+        "ogVideo": "og_video",
+        # Dublin Core and misc
+        "dcTermsCreated": "dc_terms_created",
+        "dcDateCreated": "dc_date_created",
+        "dcDate": "dc_date",
+        "dcTermsType": "dc_terms_type",
+        "dcType": "dc_type",
+        "dcTermsAudience": "dc_terms_audience",
+        "dcTermsSubject": "dc_terms_subject",
+        "dcSubject": "dc_subject",
+        "dcDescription": "dc_description",
+        "dcTermsKeywords": "dc_terms_keywords",
+        "modifiedTime": "modified_time",
+        "publishedTime": "published_time",
+        "articleTag": "article_tag",
+        "articleSection": "article_section",
+        # Response-level
+        "sourceURL": "source_url",
+        "statusCode": "status_code",
+        "scrapeId": "scrape_id",
+        "numPages": "num_pages",
+        "contentType": "content_type",
+        "proxyUsed": "proxy_used",
+        "cacheState": "cache_state",
+        "cachedAt": "cached_at",
+        "creditsUsed": "credits_used",
+    }
+    out: Dict[str, Any] = {}
+    for k, v in md.items():
+        snake = mapping.get(k, k)
+        out[snake] = v
+    # Light coercions where server may send strings/lists
+    if isinstance(out.get("status_code"), str):
+        try:
+            out["status_code"] = int(out["status_code"])  # type: ignore
+        except ValueError:
+            pass
+    # Generic rule: if a value is a list, join with ", " for string-like fields,
+    # except for explicit fields we preserve as lists.
+    preserve_list_fields: List[str] = [
+        "og_locale_alternate",
+    ]
+    for f, val in list(out.items()):
+        if isinstance(val, list) and f not in preserve_list_fields:
+            try:
+                out[f] = ", ".join(str(x) for x in val)
+            except Exception:
+                # Fallback: keep original list if join fails
+                pass
+    return out
+def normalize_document_input(doc: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Normalize a raw Document dict from the API into the Python SDK's expected shape:
+    - Convert top-level keys rawHtml->raw_html, changeTracking->change_tracking
+    - Convert metadata keys from camelCase to snake_case
+    """
+    normalized = dict(doc)
+    if "rawHtml" in normalized and "raw_html" not in normalized:
+        normalized["raw_html"] = normalized.pop("rawHtml")
+    if "changeTracking" in normalized and "change_tracking" not in normalized:
+        normalized["change_tracking"] = normalized.pop("changeTracking")
+    md = normalized.get("metadata")
+    if isinstance(md, dict):
+        mapped = _map_metadata_keys(md)
+        # Construct a concrete DocumentMetadata so downstream has a typed object
+        try:
+            normalized["metadata"] = DocumentMetadata(**mapped)
+        except Exception:
+            # Fallback to mapped dict if model construction fails for any reason
+            normalized["metadata"] = mapped
+    return normalized

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl/v2/watcher.py RENAMED Viewed

@@ -15,6 +15,7 @@ from typing import Callable, List, Optional, Literal, Union, Dict, Any
 import websockets
 from .types import CrawlJob, BatchScrapeJob, Document
+from .utils.normalize import normalize_document_input
 JobKind = Literal["crawl", "batch"]
@@ -172,11 +173,7 @@ class Watcher:
                         docs: List[Document] = []
                         for doc in self.data:
                             if isinstance(doc, dict):
-                                d = dict(doc)
-                                if "rawHtml" in d and "raw_html" not in d:
-                                    d["raw_html"] = d.pop("rawHtml")
-                                if "changeTracking" in d and "change_tracking" not in d:
-                                    d["change_tracking"] = d.pop("changeTracking")
+                                d = normalize_document_input(doc)
                                 docs.append(Document(**d))
                         if self._kind == "crawl":
                             job = CrawlJob(
@@ -212,11 +209,7 @@ class Watcher:
                         docs = []
                         for doc in payload.get("data", []):
                             if isinstance(doc, dict):
-                                d = dict(doc)
-                                if "rawHtml" in d and "raw_html" not in d:
-                                    d["raw_html"] = d.pop("rawHtml")
-                                if "changeTracking" in d and "change_tracking" not in d:
-                                    d["change_tracking"] = d.pop("changeTracking")
+                                d = normalize_document_input(doc)
                                 docs.append(Document(**d))
                         job = CrawlJob(
                             status=status_str,
@@ -241,11 +234,7 @@ class Watcher:
                         docs = []
                         for doc in payload.get("data", []):
                             if isinstance(doc, dict):
-                                d = dict(doc)
-                                if "rawHtml" in d and "raw_html" not in d:
-                                    d["raw_html"] = d.pop("rawHtml")
-                                if "changeTracking" in d and "change_tracking" not in d:
-                                    d["change_tracking"] = d.pop("changeTracking")
+                                d = normalize_document_input(doc)
                                 docs.append(Document(**d))
                         job = BatchScrapeJob(
                             status=status_str,

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl/v2/watcher_async.py RENAMED Viewed

@@ -16,6 +16,7 @@ import websockets
 from websockets.exceptions import ConnectionClosed, ConnectionClosedOK, ConnectionClosedError
 from .types import BatchScrapeJob, CrawlJob, Document
+from .utils.normalize import normalize_document_input
 JobKind = Literal["crawl", "batch"]
@@ -216,11 +217,7 @@ class AsyncWatcher:
         source_docs = docs_override if docs_override is not None else payload.get("data", []) or []
         for doc in source_docs:
             if isinstance(doc, dict):
-                d = dict(doc)
-                if "rawHtml" in d and "raw_html" not in d:
-                    d["raw_html"] = d.pop("rawHtml")
-                if "changeTracking" in d and "change_tracking" not in d:
-                    d["change_tracking"] = d.pop("changeTracking")
+                d = normalize_document_input(doc)
                 docs.append(Document(**d))
         if self._kind == "crawl":

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: firecrawl
-Version: 3.0.3
+Version: 3.1.0
 Summary: Python SDK for Firecrawl API
 Home-page: https://github.com/firecrawl/firecrawl
 Author: Mendable.ai

{firecrawl-3.0.3 → firecrawl-3.1.0}/firecrawl.egg-info/SOURCES.txt RENAMED Viewed

@@ -77,6 +77,7 @@ firecrawl/v2/utils/error_handler.py
 firecrawl/v2/utils/get_version.py
 firecrawl/v2/utils/http_client.py
 firecrawl/v2/utils/http_client_async.py
+firecrawl/v2/utils/normalize.py
 firecrawl/v2/utils/validation.py
 tests/test_change_tracking.py
 tests/test_timeout_conversion.py