PyPI - firecrawl - Versions diffs - 2.5.0__tar.gz → 2.5.1__tar.gz - Mend

firecrawl 2.5.0tar.gz → 2.5.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of firecrawl might be problematic. Click here for more details.

Files changed (18) hide show

{firecrawl-2.5.0 → firecrawl-2.5.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: firecrawl
-Version: 2.5.0
+Version: 2.5.1
 Summary: Python SDK for Firecrawl API
 Home-page: https://github.com/mendableai/firecrawl
 Author: Mendable.ai

{firecrawl-2.5.0 → firecrawl-2.5.1}/firecrawl/__init__.py RENAMED Viewed

@@ -13,7 +13,7 @@ import os
 from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
-__version__ = "2.5.0"
+__version__ = "2.5.1"
 # Define the logger for the Firecrawl project
 logger: logging.Logger = logging.getLogger("firecrawl")

{firecrawl-2.5.0 → firecrawl-2.5.1}/firecrawl/firecrawl.py RENAMED Viewed

@@ -29,7 +29,7 @@ warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDoc
 warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"")
 warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonConfig\" shadows an attribute in parent \"BaseModel\"")
 warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"")
+warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ChangeTrackingOptions\" shadows an attribute in parent \"BaseModel\"")
 def get_version():
   try:
@@ -529,14 +529,16 @@ class FirecrawlApp:
             scrape_params['blockAds'] = block_ads
         if proxy:
             scrape_params['proxy'] = proxy
-        if extract:
-            if hasattr(extract.schema, 'schema'):
-                extract.schema = extract.schema.schema()
-            scrape_params['extract'] = extract.dict(exclude_none=True)
-        if json_options:
-            if hasattr(json_options.schema, 'schema'):
-                json_options.schema = json_options.schema.schema()
-            scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
+        if extract is not None:
+            extract = self._ensure_schema_dict(extract)
+            if isinstance(extract, dict) and "schema" in extract:
+                extract["schema"] = self._ensure_schema_dict(extract["schema"])
+            scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
+        if json_options is not None:
+            json_options = self._ensure_schema_dict(json_options)
+            if isinstance(json_options, dict) and "schema" in json_options:
+                json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
+            scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
         if actions:
             scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
         if change_tracking_options:
@@ -544,6 +546,11 @@ class FirecrawlApp:
         scrape_params.update(kwargs)
+        if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
+            scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
+        if 'jsonOptions' in scrape_params and scrape_params['jsonOptions'] and 'schema' in scrape_params['jsonOptions']:
+            scrape_params['jsonOptions']['schema'] = self._ensure_schema_dict(scrape_params['jsonOptions']['schema'])
         # Make request
         response = requests.post(
             f'{self.api_url}/v1/scrape',
@@ -1252,13 +1259,15 @@ class FirecrawlApp:
         if proxy is not None:
             scrape_params['proxy'] = proxy
         if extract is not None:
-            if hasattr(extract.schema, 'schema'):
-                extract.schema = extract.schema.schema()
-            scrape_params['extract'] = extract.dict(exclude_none=True)
+            extract = self._ensure_schema_dict(extract)
+            if isinstance(extract, dict) and "schema" in extract:
+                extract["schema"] = self._ensure_schema_dict(extract["schema"])
+            scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
         if json_options is not None:
-            if hasattr(json_options.schema, 'schema'):
-                json_options.schema = json_options.schema.schema()
-            scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
+            json_options = self._ensure_schema_dict(json_options)
+            if isinstance(json_options, dict) and "schema" in json_options:
+                json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
+            scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
         if actions is not None:
             scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
         if agent is not None:
@@ -1273,6 +1282,11 @@ class FirecrawlApp:
         params_dict['urls'] = urls
         params_dict['origin'] = f"python-sdk@{version}"
+        if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
+            params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
+        if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
+            params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
         # Make request
         headers = self._prepare_headers(idempotency_key)
         response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
@@ -1378,13 +1392,15 @@ class FirecrawlApp:
         if proxy is not None:
             scrape_params['proxy'] = proxy
         if extract is not None:
-            if hasattr(extract.schema, 'schema'):
-                extract.schema = extract.schema.schema()
-            scrape_params['extract'] = extract.dict(exclude_none=True)
+            extract = self._ensure_schema_dict(extract)
+            if isinstance(extract, dict) and "schema" in extract:
+                extract["schema"] = self._ensure_schema_dict(extract["schema"])
+            scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
         if json_options is not None:
-            if hasattr(json_options.schema, 'schema'):
-                json_options.schema = json_options.schema.schema()
-            scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
+            json_options = self._ensure_schema_dict(json_options)
+            if isinstance(json_options, dict) and "schema" in json_options:
+                json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
+            scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
         if actions is not None:
             scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
         if agent is not None:
@@ -1399,6 +1415,11 @@ class FirecrawlApp:
         params_dict['urls'] = urls
         params_dict['origin'] = f"python-sdk@{version}"
+        if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
+            params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
+        if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
+            params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
         # Make request
         headers = self._prepare_headers(idempotency_key)
         response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
@@ -1499,13 +1520,15 @@ class FirecrawlApp:
         if proxy is not None:
             scrape_params['proxy'] = proxy
         if extract is not None:
-            if hasattr(extract.schema, 'schema'):
-                extract.schema = extract.schema.schema()
-            scrape_params['extract'] = extract.dict(exclude_none=True)
+            extract = self._ensure_schema_dict(extract)
+            if isinstance(extract, dict) and "schema" in extract:
+                extract["schema"] = self._ensure_schema_dict(extract["schema"])
+            scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
         if json_options is not None:
-            if hasattr(json_options.schema, 'schema'):
-                json_options.schema = json_options.schema.schema()
-            scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
+            json_options = self._ensure_schema_dict(json_options)
+            if isinstance(json_options, dict) and "schema" in json_options:
+                json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
+            scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
         if actions is not None:
             scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
         if agent is not None:
@@ -1520,6 +1543,11 @@ class FirecrawlApp:
         params_dict['urls'] = urls
         params_dict['origin'] = f"python-sdk@{version}"
+        if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
+            params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
+        if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
+            params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
         # Make request
         headers = self._prepare_headers(idempotency_key)
         response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
@@ -1606,7 +1634,7 @@ class FirecrawlApp:
             id (str): The ID of the crawl job.
         Returns:
-            CrawlErrorsResponse: A response containing:
+            CrawlErrorsResponse containing:
             * errors (List[Dict[str, str]]): List of errors with fields:
               * id (str): Error ID
               * timestamp (str): When the error occurred
@@ -1669,10 +1697,7 @@ class FirecrawlApp:
             raise ValueError("Either urls or prompt is required")
         if schema:
-            if hasattr(schema, 'model_json_schema'):
-                # Convert Pydantic model to JSON schema
-                schema = schema.model_json_schema()
-            # Otherwise assume it's already a JSON schema dict
+            schema = self._ensure_schema_dict(schema)
         request_data = {
             'urls': urls or [],
@@ -1801,10 +1826,7 @@ class FirecrawlApp:
         schema = schema
         if schema:
-            if hasattr(schema, 'model_json_schema'):
-                # Convert Pydantic model to JSON schema
-                schema = schema.model_json_schema()
-            # Otherwise assume it's already a JSON schema dict
+            schema = self._ensure_schema_dict(schema)
         request_data = {
             'urls': urls,
@@ -2467,6 +2489,24 @@ class FirecrawlApp:
         # Additional type validation can be added here if needed
         # For now, we rely on Pydantic models for detailed type validation
+    def _ensure_schema_dict(self, schema):
+        """
+        Utility to ensure a schema is a dict, not a Pydantic model class. Recursively checks dicts and lists.
+        """
+        if schema is None:
+            return schema
+        if isinstance(schema, type):
+            # Pydantic v1/v2 model class
+            if hasattr(schema, 'model_json_schema'):
+                return schema.model_json_schema()
+            elif hasattr(schema, 'schema'):
+                return schema.schema()
+        if isinstance(schema, dict):
+            return {k: self._ensure_schema_dict(v) for k, v in schema.items()}
+        if isinstance(schema, (list, tuple)):
+            return [self._ensure_schema_dict(v) for v in schema]
+        return schema
 class CrawlWatcher:
     """
     A class to watch and handle crawl job events via WebSocket connection.
@@ -2873,19 +2913,24 @@ class AsyncFirecrawlApp(FirecrawlApp):
             scrape_params['blockAds'] = block_ads
         if proxy:
             scrape_params['proxy'] = proxy
-        if extract:
-            extract_dict = extract.dict(exclude_none=True)
-            if 'schema' in extract_dict and hasattr(extract.schema, 'schema'):
-                extract_dict['schema'] = extract.schema.schema() # Ensure pydantic model schema is converted
-            scrape_params['extract'] = extract_dict
-        if json_options:
-            json_options_dict = json_options.dict(exclude_none=True)
-            if 'schema' in json_options_dict and hasattr(json_options.schema, 'schema'):
-                 json_options_dict['schema'] = json_options.schema.schema() # Ensure pydantic model schema is converted
-            scrape_params['jsonOptions'] = json_options_dict
+        if extract is not None:
+            extract = self._ensure_schema_dict(extract)
+            if isinstance(extract, dict) and "schema" in extract:
+                extract["schema"] = self._ensure_schema_dict(extract["schema"])
+            scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
+        if json_options is not None:
+            json_options = self._ensure_schema_dict(json_options)
+            if isinstance(json_options, dict) and "schema" in json_options:
+                json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
+            scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
         if actions:
             scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
+        if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
+            scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
+        if 'jsonOptions' in scrape_params and scrape_params['jsonOptions'] and 'schema' in scrape_params['jsonOptions']:
+            scrape_params['jsonOptions']['schema'] = self._ensure_schema_dict(scrape_params['jsonOptions']['schema'])
         # Make async request
         endpoint = f'/v1/scrape'
         response = await self._async_post_request(
@@ -2996,13 +3041,15 @@ class AsyncFirecrawlApp(FirecrawlApp):
         if proxy is not None:
             scrape_params['proxy'] = proxy
         if extract is not None:
-            if hasattr(extract.schema, 'schema'):
-                extract.schema = extract.schema.schema()
-            scrape_params['extract'] = extract.dict(exclude_none=True)
+            extract = self._ensure_schema_dict(extract)
+            if isinstance(extract, dict) and "schema" in extract:
+                extract["schema"] = self._ensure_schema_dict(extract["schema"])
+            scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
         if json_options is not None:
-            if hasattr(json_options.schema, 'schema'):
-                json_options.schema = json_options.schema.schema()
-            scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
+            json_options = self._ensure_schema_dict(json_options)
+            if isinstance(json_options, dict) and "schema" in json_options:
+                json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
+            scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
         if actions is not None:
             scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
         if agent is not None:
@@ -3017,6 +3064,11 @@ class AsyncFirecrawlApp(FirecrawlApp):
         params_dict['urls'] = urls
         params_dict['origin'] = f"python-sdk@{version}"
+        if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
+            params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
+        if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
+            params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
         # Make request
         headers = self._prepare_headers(idempotency_key)
         response = await self._async_post_request(
@@ -3127,13 +3179,15 @@ class AsyncFirecrawlApp(FirecrawlApp):
         if proxy is not None:
             scrape_params['proxy'] = proxy
         if extract is not None:
-            if hasattr(extract.schema, 'schema'):
-                extract.schema = extract.schema.schema()
-            scrape_params['extract'] = extract.dict(exclude_none=True)
+            extract = self._ensure_schema_dict(extract)
+            if isinstance(extract, dict) and "schema" in extract:
+                extract["schema"] = self._ensure_schema_dict(extract["schema"])
+            scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
         if json_options is not None:
-            if hasattr(json_options.schema, 'schema'):
-                json_options.schema = json_options.schema.schema()
-            scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
+            json_options = self._ensure_schema_dict(json_options)
+            if isinstance(json_options, dict) and "schema" in json_options:
+                json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
+            scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
         if actions is not None:
             scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
         if agent is not None:
@@ -3148,6 +3202,11 @@ class AsyncFirecrawlApp(FirecrawlApp):
         params_dict['urls'] = urls
         params_dict['origin'] = f"python-sdk@{version}"
+        if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
+            params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
+        if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
+            params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
         # Make request
         headers = self._prepare_headers(idempotency_key)
         response = await self._async_post_request(
@@ -3605,10 +3664,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
             raise ValueError("Either urls or prompt is required")
         if schema:
-            if hasattr(schema, 'model_json_schema'):
-                # Convert Pydantic model to JSON schema
-                schema = schema.model_json_schema()
-            # Otherwise assume it's already a JSON schema dict
+            schema = self._ensure_schema_dict(schema)
         request_data = {
             'urls': urls or [],
@@ -3862,8 +3918,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
             raise ValueError("Either urls or prompt is required")
         if schema:
-            if hasattr(schema, 'model_json_schema'):
-                schema = schema.model_json_schema()
+            schema = self._ensure_schema_dict(schema)
         request_data = ExtractResponse(
             urls=urls or [],

{firecrawl-2.5.0 → firecrawl-2.5.1}/firecrawl.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: firecrawl
-Version: 2.5.0
+Version: 2.5.1
 Summary: Python SDK for Firecrawl API
 Home-page: https://github.com/mendableai/firecrawl
 Author: Mendable.ai