firecrawl 2.16.5__py3-none-any.whl → 3.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

Files changed (82) hide show
  1. firecrawl/__init__.py +27 -19
  2. firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +79 -0
  3. firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  4. firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +38 -0
  5. firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +40 -0
  6. firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +137 -0
  7. firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +183 -0
  8. firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +35 -0
  9. firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  10. firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  11. firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  12. firecrawl/__tests__/e2e/v2/test_batch_scrape.py +105 -0
  13. firecrawl/__tests__/e2e/v2/test_crawl.py +276 -0
  14. firecrawl/__tests__/e2e/v2/test_extract.py +54 -0
  15. firecrawl/__tests__/e2e/v2/test_map.py +60 -0
  16. firecrawl/__tests__/e2e/v2/test_scrape.py +154 -0
  17. firecrawl/__tests__/e2e/v2/test_search.py +265 -0
  18. firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  19. firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  20. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  21. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +61 -0
  22. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  23. firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +19 -0
  24. firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  25. firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +63 -0
  26. firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  27. firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  28. firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  29. firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  30. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  31. firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  32. firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +53 -0
  33. firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +92 -0
  34. firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +167 -0
  35. firecrawl/__tests__/unit/v2/methods/test_search_validation.py +206 -0
  36. firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  37. firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  38. firecrawl/__tests__/unit/v2/utils/test_validation.py +290 -0
  39. firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  40. firecrawl/client.py +241 -0
  41. firecrawl/{firecrawl.py → firecrawl.backup.py} +17 -15
  42. firecrawl/types.py +157 -0
  43. firecrawl/v1/__init__.py +14 -0
  44. firecrawl/v1/client.py +4653 -0
  45. firecrawl/v2/__init__.py +4 -0
  46. firecrawl/v2/client.py +802 -0
  47. firecrawl/v2/client_async.py +250 -0
  48. firecrawl/v2/methods/aio/__init__.py +1 -0
  49. firecrawl/v2/methods/aio/batch.py +85 -0
  50. firecrawl/v2/methods/aio/crawl.py +174 -0
  51. firecrawl/v2/methods/aio/extract.py +126 -0
  52. firecrawl/v2/methods/aio/map.py +59 -0
  53. firecrawl/v2/methods/aio/scrape.py +36 -0
  54. firecrawl/v2/methods/aio/search.py +58 -0
  55. firecrawl/v2/methods/aio/usage.py +42 -0
  56. firecrawl/v2/methods/batch.py +420 -0
  57. firecrawl/v2/methods/crawl.py +468 -0
  58. firecrawl/v2/methods/extract.py +131 -0
  59. firecrawl/v2/methods/map.py +77 -0
  60. firecrawl/v2/methods/scrape.py +68 -0
  61. firecrawl/v2/methods/search.py +173 -0
  62. firecrawl/v2/methods/usage.py +41 -0
  63. firecrawl/v2/types.py +546 -0
  64. firecrawl/v2/utils/__init__.py +9 -0
  65. firecrawl/v2/utils/error_handler.py +107 -0
  66. firecrawl/v2/utils/get_version.py +15 -0
  67. firecrawl/v2/utils/http_client.py +153 -0
  68. firecrawl/v2/utils/http_client_async.py +64 -0
  69. firecrawl/v2/utils/validation.py +324 -0
  70. firecrawl/v2/watcher.py +312 -0
  71. firecrawl/v2/watcher_async.py +245 -0
  72. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/LICENSE +0 -0
  73. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/METADATA +49 -32
  74. firecrawl-3.0.3.dist-info/RECORD +78 -0
  75. tests/test_timeout_conversion.py +117 -0
  76. firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
  77. firecrawl/__tests__/e2e_withAuth/test.py +0 -170
  78. firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
  79. firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -465
  80. firecrawl-2.16.5.dist-info/RECORD +0 -12
  81. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/WHEEL +0 -0
  82. {firecrawl-2.16.5.dist-info → firecrawl-3.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,153 @@
1
+ """
2
+ HTTP client utilities for v2 API.
3
+ """
4
+
5
+ import time
6
+ from typing import Dict, Any, Optional
7
+ import requests
8
+ from .get_version import get_version
9
+
10
+ version = get_version()
11
+
12
+ class HttpClient:
13
+ """HTTP client with retry logic and error handling."""
14
+
15
+ def __init__(self, api_key: str, api_url: str):
16
+ self.api_key = api_key
17
+ self.api_url = api_url
18
+
19
+ def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]:
20
+ """Prepare headers for API requests."""
21
+ headers = {
22
+ 'Content-Type': 'application/json',
23
+ 'Authorization': f'Bearer {self.api_key}',
24
+ }
25
+
26
+ if idempotency_key:
27
+ headers['x-idempotency-key'] = idempotency_key
28
+
29
+ return headers
30
+
31
+ def post(
32
+ self,
33
+ endpoint: str,
34
+ data: Dict[str, Any],
35
+ headers: Optional[Dict[str, str]] = None,
36
+ timeout: Optional[float] = None,
37
+ retries: int = 3,
38
+ backoff_factor: float = 0.5
39
+ ) -> requests.Response:
40
+ """Make a POST request with retry logic."""
41
+ if headers is None:
42
+ headers = self._prepare_headers()
43
+
44
+ data['origin'] = f'python-sdk@{version}'
45
+
46
+ url = f"{self.api_url}{endpoint}"
47
+
48
+ last_exception = None
49
+
50
+ for attempt in range(retries):
51
+ try:
52
+ response = requests.post(
53
+ url,
54
+ headers=headers,
55
+ json=data,
56
+ timeout=timeout
57
+ )
58
+
59
+ if response.status_code == 502:
60
+ if attempt < retries - 1:
61
+ time.sleep(backoff_factor * (2 ** attempt))
62
+ continue
63
+
64
+ return response
65
+
66
+ except requests.RequestException as e:
67
+ last_exception = e
68
+ if attempt == retries - 1:
69
+ raise e
70
+ time.sleep(backoff_factor * (2 ** attempt))
71
+
72
+ # This should never be reached due to the exception handling above
73
+ raise last_exception or Exception("Unexpected error in POST request")
74
+
75
+ def get(
76
+ self,
77
+ endpoint: str,
78
+ headers: Optional[Dict[str, str]] = None,
79
+ timeout: Optional[float] = None,
80
+ retries: int = 3,
81
+ backoff_factor: float = 0.5
82
+ ) -> requests.Response:
83
+ """Make a GET request with retry logic."""
84
+ if headers is None:
85
+ headers = self._prepare_headers()
86
+
87
+ url = f"{self.api_url}{endpoint}"
88
+
89
+ last_exception = None
90
+
91
+ for attempt in range(retries):
92
+ try:
93
+ response = requests.get(
94
+ url,
95
+ headers=headers,
96
+ timeout=timeout
97
+ )
98
+
99
+ if response.status_code == 502:
100
+ if attempt < retries - 1:
101
+ time.sleep(backoff_factor * (2 ** attempt))
102
+ continue
103
+
104
+ return response
105
+
106
+ except requests.RequestException as e:
107
+ last_exception = e
108
+ if attempt == retries - 1:
109
+ raise e
110
+ time.sleep(backoff_factor * (2 ** attempt))
111
+
112
+ # This should never be reached due to the exception handling above
113
+ raise last_exception or Exception("Unexpected error in GET request")
114
+
115
+ def delete(
116
+ self,
117
+ endpoint: str,
118
+ headers: Optional[Dict[str, str]] = None,
119
+ timeout: Optional[float] = None,
120
+ retries: int = 3,
121
+ backoff_factor: float = 0.5
122
+ ) -> requests.Response:
123
+ """Make a DELETE request with retry logic."""
124
+ if headers is None:
125
+ headers = self._prepare_headers()
126
+
127
+ url = f"{self.api_url}{endpoint}"
128
+
129
+ last_exception = None
130
+
131
+ for attempt in range(retries):
132
+ try:
133
+ response = requests.delete(
134
+ url,
135
+ headers=headers,
136
+ timeout=timeout
137
+ )
138
+
139
+ if response.status_code == 502:
140
+ if attempt < retries - 1:
141
+ time.sleep(backoff_factor * (2 ** attempt))
142
+ continue
143
+
144
+ return response
145
+
146
+ except requests.RequestException as e:
147
+ last_exception = e
148
+ if attempt == retries - 1:
149
+ raise e
150
+ time.sleep(backoff_factor * (2 ** attempt))
151
+
152
+ # This should never be reached due to the exception handling above
153
+ raise last_exception or Exception("Unexpected error in DELETE request")
@@ -0,0 +1,64 @@
1
+ import httpx
2
+ from typing import Optional, Dict, Any
3
+ from .get_version import get_version
4
+
5
+ version = get_version()
6
+
7
+
8
+ class AsyncHttpClient:
9
+ def __init__(self, api_key: str, api_url: str):
10
+ self.api_key = api_key
11
+ self.api_url = api_url
12
+ self._client = httpx.AsyncClient(
13
+ base_url=api_url,
14
+ headers={
15
+ "Authorization": f"Bearer {api_key}",
16
+ "Content-Type": "application/json",
17
+ },
18
+ )
19
+
20
+ async def close(self) -> None:
21
+ await self._client.aclose()
22
+
23
+ def _headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]:
24
+ headers: Dict[str, str] = {}
25
+ if idempotency_key:
26
+ headers["x-idempotency-key"] = idempotency_key
27
+ return headers
28
+
29
+ async def post(
30
+ self,
31
+ endpoint: str,
32
+ data: Dict[str, Any],
33
+ headers: Optional[Dict[str, str]] = None,
34
+ timeout: Optional[float] = None,
35
+ ) -> httpx.Response:
36
+ payload = dict(data)
37
+ payload["origin"] = f"python-sdk@{version}"
38
+ return await self._client.post(
39
+ endpoint,
40
+ json=payload,
41
+ headers={**self._headers(), **(headers or {})},
42
+ timeout=timeout,
43
+ )
44
+
45
+ async def get(
46
+ self,
47
+ endpoint: str,
48
+ headers: Optional[Dict[str, str]] = None,
49
+ timeout: Optional[float] = None,
50
+ ) -> httpx.Response:
51
+ return await self._client.get(
52
+ endpoint, headers={**self._headers(), **(headers or {})}, timeout=timeout
53
+ )
54
+
55
+ async def delete(
56
+ self,
57
+ endpoint: str,
58
+ headers: Optional[Dict[str, str]] = None,
59
+ timeout: Optional[float] = None,
60
+ ) -> httpx.Response:
61
+ return await self._client.delete(
62
+ endpoint, headers={**self._headers(), **(headers or {})}, timeout=timeout
63
+ )
64
+
@@ -0,0 +1,324 @@
1
+ """
2
+ Shared validation functions for Firecrawl v2 API.
3
+ """
4
+
5
+ from typing import Optional, Dict, Any, List
6
+ from ..types import ScrapeOptions, ScrapeFormats
7
+
8
+
9
+ def _convert_format_string(format_str: str) -> str:
10
+ """
11
+ Convert format string from snake_case to camelCase.
12
+
13
+ Args:
14
+ format_str: Format string in snake_case
15
+
16
+ Returns:
17
+ Format string in camelCase
18
+ """
19
+ format_mapping = {
20
+ "raw_html": "rawHtml",
21
+ "change_tracking": "changeTracking",
22
+ "screenshot_full_page": "screenshot@fullPage"
23
+ }
24
+ return format_mapping.get(format_str, format_str)
25
+
26
+
27
+ def _normalize_schema(schema: Any) -> Optional[Dict[str, Any]]:
28
+ """
29
+ Normalize a schema object which may be a dict, Pydantic BaseModel subclass,
30
+ or a Pydantic model instance into a plain dict.
31
+ """
32
+ try:
33
+ # Pydantic v2 BaseModel subclass: has "model_json_schema"
34
+ if hasattr(schema, "model_json_schema") and callable(schema.model_json_schema):
35
+ return schema.model_json_schema()
36
+ # Pydantic v2 BaseModel instance: has "model_dump" or "model_json_schema"
37
+ if hasattr(schema, "model_dump") and callable(schema.model_dump):
38
+ # Try to get JSON schema if available on the class
39
+ mjs = getattr(schema.__class__, "model_json_schema", None)
40
+ if callable(mjs):
41
+ return schema.__class__.model_json_schema()
42
+ # Fallback to data shape (not ideal, but better than dropping)
43
+ return schema.model_dump()
44
+ # Pydantic v1 BaseModel subclass: has "schema"
45
+ if hasattr(schema, "schema") and callable(schema.schema):
46
+ return schema.schema()
47
+ # Pydantic v1 BaseModel instance
48
+ if hasattr(schema, "dict") and callable(schema.dict):
49
+ # Prefer class-level schema if present
50
+ sch = getattr(schema.__class__, "schema", None)
51
+ if callable(sch):
52
+ return schema.__class__.schema()
53
+ return schema.dict()
54
+ except Exception:
55
+ pass
56
+ # Already a dict or unsupported type
57
+ return schema if isinstance(schema, dict) else None
58
+
59
+
60
+ def _validate_json_format(format_obj: Any) -> Dict[str, Any]:
61
+ """
62
+ Validate and prepare json format object.
63
+
64
+ Args:
65
+ format_obj: Format object that should be json type
66
+
67
+ Returns:
68
+ Validated json format dict
69
+
70
+ Raises:
71
+ ValueError: If json format is missing required fields
72
+ """
73
+ if not isinstance(format_obj, dict):
74
+ raise ValueError("json format must be an object with 'type', 'prompt', and 'schema' fields")
75
+
76
+ if format_obj.get('type') != 'json':
77
+ raise ValueError("json format must have type='json'")
78
+
79
+ # prompt is optional in v2; only normalize when present
80
+ # schema is recommended; if provided, normalize Pydantic forms
81
+ schema = format_obj.get('schema')
82
+ normalized = dict(format_obj)
83
+ if schema is not None:
84
+ normalized_schema = _normalize_schema(schema)
85
+ if normalized_schema is not None:
86
+ normalized['schema'] = normalized_schema
87
+ return normalized
88
+
89
+
90
+ def validate_scrape_options(options: Optional[ScrapeOptions]) -> Optional[ScrapeOptions]:
91
+ """
92
+ Validate and normalize scrape options.
93
+
94
+ Args:
95
+ options: Scraping options to validate
96
+
97
+ Returns:
98
+ Validated options or None
99
+
100
+ Raises:
101
+ ValueError: If options are invalid
102
+ """
103
+ if options is None:
104
+ return None
105
+
106
+ # Validate timeout
107
+ if options.timeout is not None and options.timeout <= 0:
108
+ raise ValueError("Timeout must be positive")
109
+
110
+ # Validate wait_for
111
+ if options.wait_for is not None and options.wait_for < 0:
112
+ raise ValueError("wait_for must be non-negative")
113
+
114
+ return options
115
+
116
+
117
+ def prepare_scrape_options(options: Optional[ScrapeOptions]) -> Optional[Dict[str, Any]]:
118
+ """
119
+ Prepare ScrapeOptions for API submission with manual snake_case to camelCase conversion.
120
+
121
+ Args:
122
+ options: ScrapeOptions to prepare
123
+
124
+ Returns:
125
+ Dictionary ready for API submission or None if options is None
126
+ """
127
+ if options is None:
128
+ return None
129
+
130
+ # Validate options first
131
+ validated_options = validate_scrape_options(options)
132
+ if validated_options is None:
133
+ return None
134
+
135
+ # Apply default values for None fields
136
+ default_values = {
137
+ "only_main_content": True,
138
+ "mobile": False,
139
+ "skip_tls_verification": True,
140
+ "remove_base64_images": True,
141
+ "fast_mode": False,
142
+ "block_ads": True,
143
+ "max_age": 14400000,
144
+ "store_in_cache": True
145
+ }
146
+
147
+ # Convert to dict and handle manual snake_case to camelCase conversion
148
+ options_data = validated_options.model_dump(exclude_none=True)
149
+
150
+ # Apply defaults for None fields
151
+ for field, default_value in default_values.items():
152
+ if field not in options_data:
153
+ options_data[field] = default_value
154
+
155
+ scrape_data = {}
156
+
157
+ # Manual field mapping for snake_case to camelCase conversion
158
+ field_mappings = {
159
+ "include_tags": "includeTags",
160
+ "exclude_tags": "excludeTags",
161
+ "only_main_content": "onlyMainContent",
162
+ "wait_for": "waitFor",
163
+ "skip_tls_verification": "skipTlsVerification",
164
+ "remove_base64_images": "removeBase64Images",
165
+ "fast_mode": "fastMode",
166
+ "use_mock": "useMock",
167
+ "block_ads": "blockAds",
168
+ "store_in_cache": "storeInCache",
169
+ "max_age": "maxAge"
170
+ }
171
+
172
+ # Apply field mappings
173
+ for snake_case, camel_case in field_mappings.items():
174
+ if snake_case in options_data:
175
+ scrape_data[camel_case] = options_data.pop(snake_case)
176
+
177
+ # Handle special cases
178
+ for key, value in options_data.items():
179
+ if value is not None:
180
+ if key == "formats":
181
+ # Handle formats conversion
182
+ converted_formats: List[Any] = []
183
+
184
+ # Prefer using original object to detect ScrapeFormats vs list
185
+ original_formats = getattr(options, 'formats', None)
186
+
187
+ if isinstance(original_formats, ScrapeFormats):
188
+ # Include explicit list first
189
+ if original_formats.formats:
190
+ for fmt in original_formats.formats:
191
+ if isinstance(fmt, str):
192
+ if fmt == "json":
193
+ raise ValueError("json format must be an object with 'type', 'prompt', and 'schema' fields")
194
+ converted_formats.append(_convert_format_string(fmt))
195
+ elif isinstance(fmt, dict):
196
+ fmt_type = _convert_format_string(fmt.get('type')) if fmt.get('type') else None
197
+ if fmt_type == 'json':
198
+ validated_json = _validate_json_format({**fmt, 'type': 'json'})
199
+ converted_formats.append(validated_json)
200
+ elif fmt_type == 'screenshot':
201
+ # Normalize screenshot options
202
+ normalized = {**fmt, 'type': 'screenshot'}
203
+ if 'full_page' in normalized:
204
+ normalized['fullPage'] = normalized.pop('full_page')
205
+ # Normalize viewport if it's a model instance
206
+ vp = normalized.get('viewport')
207
+ if hasattr(vp, 'model_dump'):
208
+ normalized['viewport'] = vp.model_dump(exclude_none=True)
209
+ converted_formats.append(normalized)
210
+ else:
211
+ if 'type' in fmt:
212
+ fmt['type'] = fmt_type or fmt['type']
213
+ converted_formats.append(fmt)
214
+ elif hasattr(fmt, 'type'):
215
+ if fmt.type == 'json':
216
+ converted_formats.append(_validate_json_format(fmt.model_dump()))
217
+ else:
218
+ converted_formats.append(_convert_format_string(fmt.type))
219
+ else:
220
+ converted_formats.append(fmt)
221
+
222
+ # Add booleans from ScrapeFormats
223
+ if original_formats.markdown:
224
+ converted_formats.append("markdown")
225
+ if original_formats.html:
226
+ converted_formats.append("html")
227
+ if original_formats.raw_html:
228
+ converted_formats.append("rawHtml")
229
+ if original_formats.summary:
230
+ converted_formats.append("summary")
231
+ if original_formats.links:
232
+ converted_formats.append("links")
233
+ if original_formats.screenshot:
234
+ converted_formats.append("screenshot")
235
+ if original_formats.change_tracking:
236
+ converted_formats.append("changeTracking")
237
+ # Note: We intentionally do not auto-include 'json' when boolean is set,
238
+ # because JSON requires an object with schema/prompt. The caller must
239
+ # supply the full json format object explicitly.
240
+ elif isinstance(original_formats, list):
241
+ for fmt in original_formats:
242
+ if isinstance(fmt, str):
243
+ if fmt == "json":
244
+ raise ValueError("json format must be an object with 'type', 'prompt', and 'schema' fields")
245
+ converted_formats.append(_convert_format_string(fmt))
246
+ elif isinstance(fmt, dict):
247
+ fmt_type = _convert_format_string(fmt.get('type')) if fmt.get('type') else None
248
+ if fmt_type == 'json':
249
+ validated_json = _validate_json_format({**fmt, 'type': 'json'})
250
+ converted_formats.append(validated_json)
251
+ elif fmt_type == 'screenshot':
252
+ normalized = {**fmt, 'type': 'screenshot'}
253
+ if 'full_page' in normalized:
254
+ normalized['fullPage'] = normalized.pop('full_page')
255
+ vp = normalized.get('viewport')
256
+ if hasattr(vp, 'model_dump'):
257
+ normalized['viewport'] = vp.model_dump(exclude_none=True)
258
+ converted_formats.append(normalized)
259
+ else:
260
+ if 'type' in fmt:
261
+ fmt['type'] = fmt_type or fmt['type']
262
+ converted_formats.append(fmt)
263
+ elif hasattr(fmt, 'type'):
264
+ if fmt.type == 'json':
265
+ converted_formats.append(_validate_json_format(fmt.model_dump()))
266
+ elif fmt.type == 'screenshot':
267
+ normalized = {'type': 'screenshot'}
268
+ if getattr(fmt, 'full_page', None) is not None:
269
+ normalized['fullPage'] = fmt.full_page
270
+ if getattr(fmt, 'quality', None) is not None:
271
+ normalized['quality'] = fmt.quality
272
+ vp = getattr(fmt, 'viewport', None)
273
+ if vp is not None:
274
+ normalized['viewport'] = vp.model_dump(exclude_none=True) if hasattr(vp, 'model_dump') else vp
275
+ converted_formats.append(normalized)
276
+ else:
277
+ converted_formats.append(_convert_format_string(fmt.type))
278
+ else:
279
+ converted_formats.append(fmt)
280
+ else:
281
+ # Fallback: try to iterate over value if it's a list-like
282
+ try:
283
+ for fmt in value:
284
+ converted_formats.append(fmt)
285
+ except TypeError:
286
+ pass
287
+
288
+ if converted_formats:
289
+ scrape_data["formats"] = converted_formats
290
+ elif key == "actions":
291
+ # Handle actions conversion
292
+ converted_actions = []
293
+ for action in value:
294
+ if isinstance(action, dict):
295
+ # Convert action dict
296
+ converted_action = {}
297
+ for action_key, action_value in action.items():
298
+ if action_key == "full_page":
299
+ converted_action["fullPage"] = action_value
300
+ else:
301
+ converted_action[action_key] = action_value
302
+ converted_actions.append(converted_action)
303
+ else:
304
+ # Handle action objects
305
+ action_data = action.model_dump(exclude_none=True)
306
+ converted_action = {}
307
+ for action_key, action_value in action_data.items():
308
+ if action_key == "full_page":
309
+ converted_action["fullPage"] = action_value
310
+ else:
311
+ converted_action[action_key] = action_value
312
+ converted_actions.append(converted_action)
313
+ scrape_data["actions"] = converted_actions
314
+ elif key == "location":
315
+ # Handle location conversion
316
+ if isinstance(value, dict):
317
+ scrape_data["location"] = value
318
+ else:
319
+ scrape_data["location"] = value.model_dump(exclude_none=True)
320
+ else:
321
+ # For fields that don't need conversion, use as-is
322
+ scrape_data[key] = value
323
+
324
+ return scrape_data