firecrawl-py 3.2.1__py3-none-any.whl → 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl-py might be problematic. Click here for more details.

Files changed (85) hide show
  1. build/lib/firecrawl/__init__.py +87 -0
  2. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +79 -0
  3. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +188 -0
  4. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +38 -0
  5. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +40 -0
  6. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +137 -0
  7. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +248 -0
  8. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +35 -0
  9. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  10. build/lib/firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  11. build/lib/firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  12. build/lib/firecrawl/__tests__/e2e/v2/test_batch_scrape.py +105 -0
  13. build/lib/firecrawl/__tests__/e2e/v2/test_crawl.py +276 -0
  14. build/lib/firecrawl/__tests__/e2e/v2/test_extract.py +54 -0
  15. build/lib/firecrawl/__tests__/e2e/v2/test_map.py +60 -0
  16. build/lib/firecrawl/__tests__/e2e/v2/test_scrape.py +154 -0
  17. build/lib/firecrawl/__tests__/e2e/v2/test_search.py +269 -0
  18. build/lib/firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  19. build/lib/firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  20. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  21. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +61 -0
  22. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  23. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +19 -0
  24. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  25. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +63 -0
  26. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  27. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  28. build/lib/firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  29. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  30. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  31. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  32. build/lib/firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +53 -0
  33. build/lib/firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +92 -0
  34. build/lib/firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +167 -0
  35. build/lib/firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
  36. build/lib/firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  37. build/lib/firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  38. build/lib/firecrawl/__tests__/unit/v2/utils/test_validation.py +290 -0
  39. build/lib/firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  40. build/lib/firecrawl/client.py +242 -0
  41. build/lib/firecrawl/firecrawl.backup.py +4635 -0
  42. build/lib/firecrawl/types.py +161 -0
  43. build/lib/firecrawl/v1/__init__.py +14 -0
  44. build/lib/firecrawl/v1/client.py +4653 -0
  45. build/lib/firecrawl/v2/__init__.py +4 -0
  46. build/lib/firecrawl/v2/client.py +802 -0
  47. build/lib/firecrawl/v2/client_async.py +250 -0
  48. build/lib/firecrawl/v2/methods/aio/__init__.py +1 -0
  49. build/lib/firecrawl/v2/methods/aio/batch.py +85 -0
  50. build/lib/firecrawl/v2/methods/aio/crawl.py +171 -0
  51. build/lib/firecrawl/v2/methods/aio/extract.py +126 -0
  52. build/lib/firecrawl/v2/methods/aio/map.py +59 -0
  53. build/lib/firecrawl/v2/methods/aio/scrape.py +33 -0
  54. build/lib/firecrawl/v2/methods/aio/search.py +172 -0
  55. build/lib/firecrawl/v2/methods/aio/usage.py +42 -0
  56. build/lib/firecrawl/v2/methods/batch.py +417 -0
  57. build/lib/firecrawl/v2/methods/crawl.py +469 -0
  58. build/lib/firecrawl/v2/methods/extract.py +131 -0
  59. build/lib/firecrawl/v2/methods/map.py +77 -0
  60. build/lib/firecrawl/v2/methods/scrape.py +64 -0
  61. build/lib/firecrawl/v2/methods/search.py +197 -0
  62. build/lib/firecrawl/v2/methods/usage.py +41 -0
  63. build/lib/firecrawl/v2/types.py +665 -0
  64. build/lib/firecrawl/v2/utils/__init__.py +9 -0
  65. build/lib/firecrawl/v2/utils/error_handler.py +107 -0
  66. build/lib/firecrawl/v2/utils/get_version.py +15 -0
  67. build/lib/firecrawl/v2/utils/http_client.py +153 -0
  68. build/lib/firecrawl/v2/utils/http_client_async.py +65 -0
  69. build/lib/firecrawl/v2/utils/normalize.py +107 -0
  70. build/lib/firecrawl/v2/utils/validation.py +324 -0
  71. build/lib/firecrawl/v2/watcher.py +301 -0
  72. build/lib/firecrawl/v2/watcher_async.py +242 -0
  73. build/lib/tests/test_change_tracking.py +98 -0
  74. build/lib/tests/test_timeout_conversion.py +117 -0
  75. firecrawl/__init__.py +1 -1
  76. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +2 -2
  77. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +6 -6
  78. firecrawl/v2/methods/search.py +11 -0
  79. firecrawl/v2/types.py +30 -1
  80. {firecrawl_py-3.2.1.dist-info/licenses → firecrawl_py-3.3.0.dist-info}/LICENSE +0 -0
  81. {firecrawl_py-3.2.1.dist-info → firecrawl_py-3.3.0.dist-info}/METADATA +3 -7
  82. firecrawl_py-3.3.0.dist-info/RECORD +153 -0
  83. {firecrawl_py-3.2.1.dist-info → firecrawl_py-3.3.0.dist-info}/WHEEL +1 -1
  84. {firecrawl_py-3.2.1.dist-info → firecrawl_py-3.3.0.dist-info}/top_level.txt +2 -0
  85. firecrawl_py-3.2.1.dist-info/RECORD +0 -79
@@ -0,0 +1,665 @@
1
+ """
2
+ Type definitions for Firecrawl v2 API.
3
+
4
+ This module contains clean, modern type definitions for the v2 API.
5
+ """
6
+
7
+ import warnings
8
+ from datetime import datetime
9
+ from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union
10
+ import logging
11
+ from pydantic import BaseModel, Field, field_validator, ValidationError
12
+
13
+ # Suppress pydantic warnings about schema field shadowing
14
+ # Tested using schema_field alias="schema" but it doesn't work.
15
+ warnings.filterwarnings("ignore", message="Field name \"schema\" in \"Format\" shadows an attribute in parent \"BaseModel\"")
16
+ warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonFormat\" shadows an attribute in parent \"Format\"")
17
+ warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ChangeTrackingFormat\" shadows an attribute in parent \"Format\"")
18
+ warnings.filterwarnings("ignore", message="Field name \"json\" in \"ScrapeFormats\" shadows an attribute in parent \"BaseModel\"")
19
+ warnings.filterwarnings("ignore", message="Field name \"json\" in \"Document\" shadows an attribute in parent \"BaseModel\"")
20
+
21
+ T = TypeVar('T')
22
+
23
+ # Module logger
24
+ logger = logging.getLogger("firecrawl")
25
+
26
+ # Base response types
27
+ class BaseResponse(BaseModel, Generic[T]):
28
+ """Base response structure for all API responses."""
29
+ success: bool
30
+ data: Optional[T] = None
31
+ error: Optional[str] = None
32
+ warning: Optional[str] = None
33
+
34
+ # Document and content types
35
+ class DocumentMetadata(BaseModel):
36
+ """Metadata for scraped documents (snake_case only; API camelCase normalized in code)."""
37
+ # Common metadata fields
38
+ title: Optional[str] = None
39
+ description: Optional[str] = None
40
+ url: Optional[str] = None
41
+ language: Optional[str] = None
42
+ keywords: Optional[Union[str, List[str]]] = None
43
+ robots: Optional[str] = None
44
+
45
+ # OpenGraph and social metadata
46
+ og_title: Optional[str] = None
47
+ og_description: Optional[str] = None
48
+ og_url: Optional[str] = None
49
+ og_image: Optional[str] = None
50
+ og_audio: Optional[str] = None
51
+ og_determiner: Optional[str] = None
52
+ og_locale: Optional[str] = None
53
+ og_locale_alternate: Optional[List[str]] = None
54
+ og_site_name: Optional[str] = None
55
+ og_video: Optional[str] = None
56
+
57
+ # Dublin Core and other site metadata
58
+ favicon: Optional[str] = None
59
+ dc_terms_created: Optional[str] = None
60
+ dc_date_created: Optional[str] = None
61
+ dc_date: Optional[str] = None
62
+ dc_terms_type: Optional[str] = None
63
+ dc_type: Optional[str] = None
64
+ dc_terms_audience: Optional[str] = None
65
+ dc_terms_subject: Optional[str] = None
66
+ dc_subject: Optional[str] = None
67
+ dc_description: Optional[str] = None
68
+ dc_terms_keywords: Optional[str] = None
69
+
70
+ modified_time: Optional[str] = None
71
+ published_time: Optional[str] = None
72
+ article_tag: Optional[str] = None
73
+ article_section: Optional[str] = None
74
+
75
+ # Response-level metadata
76
+ source_url: Optional[str] = None
77
+ status_code: Optional[int] = None
78
+ scrape_id: Optional[str] = None
79
+ num_pages: Optional[int] = None
80
+ content_type: Optional[str] = None
81
+ proxy_used: Optional[Literal["basic", "stealth"]] = None
82
+ cache_state: Optional[Literal["hit", "miss"]] = None
83
+ cached_at: Optional[str] = None
84
+ credits_used: Optional[int] = None
85
+
86
+ # Error information
87
+ error: Optional[str] = None
88
+
89
+ @staticmethod
90
+ def _coerce_list_to_string(value: Any) -> Any:
91
+ if isinstance(value, list):
92
+ # Prefer first string if semantically a single-valued field, else join
93
+ if len(value) == 1:
94
+ return str(value[0])
95
+ return ', '.join(str(item) for item in value)
96
+ return value
97
+
98
+ @staticmethod
99
+ def _coerce_string_to_int(value: Any) -> Any:
100
+ if isinstance(value, str):
101
+ try:
102
+ return int(value)
103
+ except ValueError:
104
+ return value
105
+ return value
106
+
107
+ @field_validator('robots', 'og_title', 'og_description', 'og_url', 'og_image', 'language', mode='before')
108
+ @classmethod
109
+ def coerce_lists_to_string_fields(cls, v):
110
+ return cls._coerce_list_to_string(v)
111
+
112
+ @field_validator('status_code', mode='before')
113
+ @classmethod
114
+ def coerce_status_code_to_int(cls, v):
115
+ return cls._coerce_string_to_int(v)
116
+
117
+ class Document(BaseModel):
118
+ """A scraped document."""
119
+ markdown: Optional[str] = None
120
+ html: Optional[str] = None
121
+ raw_html: Optional[str] = None
122
+ json: Optional[Any] = None
123
+ summary: Optional[str] = None
124
+ metadata: Optional[DocumentMetadata] = None
125
+ links: Optional[List[str]] = None
126
+ screenshot: Optional[str] = None
127
+ actions: Optional[Dict[str, Any]] = None
128
+ warning: Optional[str] = None
129
+ change_tracking: Optional[Dict[str, Any]] = None
130
+
131
+ @property
132
+ def metadata_typed(self) -> DocumentMetadata:
133
+ """Always returns a DocumentMetadata instance for LSP-friendly access."""
134
+ md = self.metadata
135
+ if isinstance(md, DocumentMetadata):
136
+ return md
137
+ if isinstance(md, dict):
138
+ try:
139
+ return DocumentMetadata(**md)
140
+ except (ValidationError, TypeError) as exc:
141
+ logger.debug("Failed to construct DocumentMetadata from dict: %s", exc)
142
+ return DocumentMetadata()
143
+
144
+ @property
145
+ def metadata_dict(self) -> Dict[str, Any]:
146
+ """Returns metadata as a plain dict (exclude None)."""
147
+ md = self.metadata
148
+ if isinstance(md, DocumentMetadata):
149
+ return md.model_dump(exclude_none=True)
150
+ if isinstance(md, dict):
151
+ return {k: v for k, v in md.items() if v is not None}
152
+ return {}
153
+
154
+ # Webhook types
155
+ class WebhookConfig(BaseModel):
156
+ """Configuration for webhooks."""
157
+ url: str
158
+ headers: Optional[Dict[str, str]] = None
159
+ metadata: Optional[Dict[str, str]] = None
160
+ events: Optional[List[Literal["completed", "failed", "page", "started"]]] = None
161
+
162
+ class WebhookData(BaseModel):
163
+ """Data sent to webhooks."""
164
+ job_id: str
165
+ status: str
166
+ current: Optional[int] = None
167
+ total: Optional[int] = None
168
+ data: Optional[List[Document]] = None
169
+ error: Optional[str] = None
170
+
171
+ class Source(BaseModel):
172
+ """Configuration for a search source."""
173
+ type: str
174
+
175
+ SourceOption = Union[str, Source]
176
+
177
+ class Category(BaseModel):
178
+ """Configuration for a search category."""
179
+ type: str
180
+
181
+ CategoryOption = Union[str, Category]
182
+
183
+ FormatString = Literal[
184
+ # camelCase versions (API format)
185
+ "markdown", "html", "rawHtml", "links", "screenshot", "summary", "changeTracking", "json",
186
+ # snake_case versions (user-friendly)
187
+ "raw_html", "change_tracking"
188
+ ]
189
+
190
+ class Viewport(BaseModel):
191
+ """Viewport configuration for screenshots."""
192
+ width: int
193
+ height: int
194
+
195
+ class Format(BaseModel):
196
+ """Configuration for a format."""
197
+ type: FormatString
198
+
199
+ class JsonFormat(Format):
200
+ """Configuration for JSON extraction."""
201
+ prompt: Optional[str] = None
202
+ schema: Optional[Any] = None
203
+
204
+ class ChangeTrackingFormat(Format):
205
+ """Configuration for change tracking."""
206
+ modes: List[Literal["git-diff", "json"]]
207
+ schema: Optional[Dict[str, Any]] = None
208
+ prompt: Optional[str] = None
209
+ tag: Optional[str] = None
210
+
211
+ class ScreenshotFormat(BaseModel):
212
+ """Configuration for screenshot format."""
213
+ type: Literal["screenshot"] = "screenshot"
214
+ full_page: Optional[bool] = None
215
+ quality: Optional[int] = None
216
+ viewport: Optional[Union[Dict[str, int], Viewport]] = None
217
+
218
+ FormatOption = Union[Dict[str, Any], FormatString, JsonFormat, ChangeTrackingFormat, ScreenshotFormat, Format]
219
+
220
+ # Scrape types
221
+ class ScrapeFormats(BaseModel):
222
+ """Output formats for scraping."""
223
+ formats: Optional[List[FormatOption]] = None
224
+ markdown: bool = True
225
+ html: bool = False
226
+ raw_html: bool = False
227
+ summary: bool = False
228
+ links: bool = False
229
+ screenshot: bool = False
230
+ change_tracking: bool = False
231
+ json: bool = False
232
+
233
+ @field_validator('formats')
234
+ @classmethod
235
+ def validate_formats(cls, v):
236
+ """Validate and normalize formats input."""
237
+ if v is None:
238
+ return v
239
+
240
+ normalized_formats = []
241
+ for format_item in v:
242
+ if isinstance(format_item, str):
243
+ normalized_formats.append(Format(type=format_item))
244
+ elif isinstance(format_item, dict):
245
+ # Preserve dicts as-is to avoid dropping custom fields like 'schema'
246
+ normalized_formats.append(format_item)
247
+ elif isinstance(format_item, Format):
248
+ normalized_formats.append(format_item)
249
+ else:
250
+ raise ValueError(f"Invalid format format: {format_item}")
251
+
252
+ return normalized_formats
253
+
254
+ class ScrapeOptions(BaseModel):
255
+ """Options for scraping operations."""
256
+ formats: Optional[Union['ScrapeFormats', List[FormatOption]]] = None
257
+ headers: Optional[Dict[str, str]] = None
258
+ include_tags: Optional[List[str]] = None
259
+ exclude_tags: Optional[List[str]] = None
260
+ only_main_content: Optional[bool] = None
261
+ timeout: Optional[int] = None
262
+ wait_for: Optional[int] = None
263
+ mobile: Optional[bool] = None
264
+ parsers: Optional[List[str]] = None
265
+ actions: Optional[List[Union['WaitAction', 'ScreenshotAction', 'ClickAction', 'WriteAction', 'PressAction', 'ScrollAction', 'ScrapeAction', 'ExecuteJavascriptAction', 'PDFAction']]] = None
266
+ location: Optional['Location'] = None
267
+ skip_tls_verification: Optional[bool] = None
268
+ remove_base64_images: Optional[bool] = None
269
+ fast_mode: Optional[bool] = None
270
+ use_mock: Optional[str] = None
271
+ block_ads: Optional[bool] = None
272
+ proxy: Optional[Literal["basic", "stealth", "auto"]] = None
273
+ max_age: Optional[int] = None
274
+ store_in_cache: Optional[bool] = None
275
+
276
+ @field_validator('formats')
277
+ @classmethod
278
+ def validate_formats(cls, v):
279
+ """Validate and normalize formats input."""
280
+ if v is None:
281
+ return v
282
+ if isinstance(v, ScrapeFormats):
283
+ return v
284
+ if isinstance(v, list):
285
+ return v
286
+ raise ValueError(f"Invalid formats type: {type(v)}. Expected ScrapeFormats or List[FormatOption]")
287
+
288
+ class ScrapeRequest(BaseModel):
289
+ """Request for scraping a single URL."""
290
+ url: str
291
+ options: Optional[ScrapeOptions] = None
292
+
293
+ class ScrapeData(Document):
294
+ """Scrape results data."""
295
+ pass
296
+
297
+ class ScrapeResponse(BaseResponse[ScrapeData]):
298
+ """Response for scrape operations."""
299
+ pass
300
+
301
+ # Crawl types
302
+ class CrawlRequest(BaseModel):
303
+ """Request for crawling a website."""
304
+ url: str
305
+ prompt: Optional[str] = None
306
+ exclude_paths: Optional[List[str]] = None
307
+ include_paths: Optional[List[str]] = None
308
+ max_discovery_depth: Optional[int] = None
309
+ sitemap: Literal["skip", "include"] = "include"
310
+ ignore_query_parameters: bool = False
311
+ limit: Optional[int] = None
312
+ crawl_entire_domain: bool = False
313
+ allow_external_links: bool = False
314
+ allow_subdomains: bool = False
315
+ delay: Optional[int] = None
316
+ max_concurrency: Optional[int] = None
317
+ webhook: Optional[Union[str, WebhookConfig]] = None
318
+ scrape_options: Optional[ScrapeOptions] = None
319
+ zero_data_retention: bool = False
320
+
321
+ class CrawlResponse(BaseModel):
322
+ """Information about a crawl job."""
323
+ id: str
324
+ url: str
325
+
326
+ class CrawlJob(BaseModel):
327
+ """Crawl job status and progress data."""
328
+ status: Literal["scraping", "completed", "failed"]
329
+ total: int = 0
330
+ completed: int = 0
331
+ credits_used: int = 0
332
+ expires_at: Optional[datetime] = None
333
+ next: Optional[str] = None
334
+ data: List[Document] = []
335
+
336
+ class SearchResultWeb(BaseModel):
337
+ """A web search result with URL, title, and description."""
338
+ url: str
339
+ title: Optional[str] = None
340
+ description: Optional[str] = None
341
+ category: Optional[str] = None
342
+
343
+ class SearchResultNews(BaseModel):
344
+ """A news search result with URL, title, snippet, date, image URL, and position."""
345
+ title: Optional[str] = None
346
+ url: Optional[str] = None
347
+ snippet: Optional[str] = None
348
+ date: Optional[str] = None
349
+ image_url: Optional[str] = None
350
+ position: Optional[int] = None
351
+ category: Optional[str] = None
352
+
353
+ class SearchResultImages(BaseModel):
354
+ """An image search result with URL, title, image URL, image width, image height, and position."""
355
+ title: Optional[str] = None
356
+ image_url: Optional[str] = None
357
+ image_width: Optional[int] = None
358
+ image_height: Optional[int] = None
359
+ url: Optional[str] = None
360
+ position: Optional[int] = None
361
+
362
+ class SearchData(BaseModel):
363
+ """Search results grouped by source type."""
364
+ web: Optional[List[Union[SearchResultWeb, Document]]] = None
365
+ news: Optional[List[Union[SearchResultNews, Document]]] = None
366
+ images: Optional[List[Union[SearchResultImages, Document]]] = None
367
+
368
+ class MapDocument(Document):
369
+ """A document from a map operation with URL and description."""
370
+ url: str
371
+ description: Optional[str] = None
372
+
373
+ # Crawl params types
374
+ class CrawlParamsRequest(BaseModel):
375
+ """Request for getting crawl parameters from LLM."""
376
+ url: str
377
+ prompt: str
378
+
379
+ class CrawlParamsData(BaseModel):
380
+ """Data returned from crawl params endpoint."""
381
+ include_paths: Optional[List[str]] = None
382
+ exclude_paths: Optional[List[str]] = None
383
+ max_discovery_depth: Optional[int] = None
384
+ ignore_sitemap: bool = False
385
+ ignore_query_parameters: bool = False
386
+ limit: Optional[int] = None
387
+ crawl_entire_domain: bool = False
388
+ allow_external_links: bool = False
389
+ allow_subdomains: bool = False
390
+ delay: Optional[int] = None
391
+ max_concurrency: Optional[int] = None
392
+ webhook: Optional[Union[str, WebhookConfig]] = None
393
+ scrape_options: Optional[ScrapeOptions] = None
394
+ zero_data_retention: bool = False
395
+ warning: Optional[str] = None
396
+
397
+ class CrawlParamsResponse(BaseResponse[CrawlParamsData]):
398
+ """Response from crawl params endpoint."""
399
+ pass
400
+
401
+ # Batch scrape types
402
+ class BatchScrapeRequest(BaseModel):
403
+ """Request for batch scraping multiple URLs (internal helper only)."""
404
+ urls: List[str]
405
+ options: Optional[ScrapeOptions] = None
406
+
407
+ class BatchScrapeResponse(BaseModel):
408
+ """Response from starting a batch scrape job (mirrors CrawlResponse naming)."""
409
+ id: str
410
+ url: str
411
+ invalid_urls: Optional[List[str]] = None
412
+
413
+ class BatchScrapeJob(BaseModel):
414
+ """Batch scrape job status and results."""
415
+ status: Literal["scraping", "completed", "failed", "cancelled"]
416
+ completed: int
417
+ total: int
418
+ credits_used: Optional[int] = None
419
+ expires_at: Optional[datetime] = None
420
+ next: Optional[str] = None
421
+ data: List[Document] = []
422
+
423
+ # Map types
424
+ class MapOptions(BaseModel):
425
+ """Options for mapping operations."""
426
+ search: Optional[str] = None
427
+ sitemap: Literal["only", "include", "skip"] = "include"
428
+ include_subdomains: Optional[bool] = None
429
+ limit: Optional[int] = None
430
+ timeout: Optional[int] = None
431
+
432
+ class MapRequest(BaseModel):
433
+ """Request for mapping a website."""
434
+ url: str
435
+ options: Optional[MapOptions] = None
436
+
437
+ class MapData(BaseModel):
438
+ """Map results data."""
439
+ links: List['SearchResult']
440
+
441
+ class MapResponse(BaseResponse[MapData]):
442
+ """Response for map operations."""
443
+ pass
444
+
445
+ # Extract types
446
+ class ExtractResponse(BaseModel):
447
+ """Response for extract operations (start/status/final)."""
448
+ success: Optional[bool] = None
449
+ id: Optional[str] = None
450
+ status: Optional[Literal["processing", "completed", "failed", "cancelled"]] = None
451
+ data: Optional[Any] = None
452
+ error: Optional[str] = None
453
+ warning: Optional[str] = None
454
+ sources: Optional[Dict[str, Any]] = None
455
+ expires_at: Optional[datetime] = None
456
+
457
+ # Usage/limits types
458
+ class ConcurrencyCheck(BaseModel):
459
+ """Current concurrency and limits for the team/API key."""
460
+ concurrency: int
461
+ max_concurrency: int
462
+
463
+ class CreditUsage(BaseModel):
464
+ """Remaining credits for the team/API key."""
465
+ remaining_credits: int
466
+
467
+ class TokenUsage(BaseModel):
468
+ """Recent token usage metrics (if available)."""
469
+ remaining_tokens: int
470
+
471
+ # Action types
472
+ class WaitAction(BaseModel):
473
+ """Wait action to perform during scraping."""
474
+ type: Literal["wait"] = "wait"
475
+ milliseconds: Optional[int] = None
476
+ selector: Optional[str] = None
477
+
478
+ class ScreenshotAction(BaseModel):
479
+ """Screenshot action to perform during scraping."""
480
+ type: Literal["screenshot"] = "screenshot"
481
+ full_page: Optional[bool] = None
482
+ quality: Optional[int] = None
483
+ viewport: Optional[Union[Dict[str, int], Viewport]] = None
484
+
485
+ class ClickAction(BaseModel):
486
+ """Click action to perform during scraping."""
487
+ type: Literal["click"] = "click"
488
+ selector: str
489
+
490
+ class WriteAction(BaseModel):
491
+ """Write action to perform during scraping."""
492
+ type: Literal["write"] = "write"
493
+ text: str
494
+
495
+ class PressAction(BaseModel):
496
+ """Press action to perform during scraping."""
497
+ type: Literal["press"] = "press"
498
+ key: str
499
+
500
+ class ScrollAction(BaseModel):
501
+ """Scroll action to perform during scraping."""
502
+ type: Literal["scroll"] = "scroll"
503
+ direction: Literal["up", "down"]
504
+ selector: Optional[str] = None
505
+
506
+ class ScrapeAction(BaseModel):
507
+ """Scrape action to perform during scraping."""
508
+ type: Literal["scrape"] = "scrape"
509
+
510
+ class ExecuteJavascriptAction(BaseModel):
511
+ """Execute javascript action to perform during scraping."""
512
+ type: Literal["executeJavascript"] = "executeJavascript"
513
+ script: str
514
+
515
+ class PDFAction(BaseModel):
516
+ """PDF action to perform during scraping."""
517
+ type: Literal["pdf"] = "pdf"
518
+ format: Optional[Literal["A0", "A1", "A2", "A3", "A4", "A5", "A6", "Letter", "Legal", "Tabloid", "Ledger"]] = None
519
+ landscape: Optional[bool] = None
520
+ scale: Optional[float] = None
521
+
522
+ # Location types
523
+ class Location(BaseModel):
524
+ """Location configuration for scraping."""
525
+ country: Optional[str] = None
526
+ languages: Optional[List[str]] = None
527
+
528
+ class SearchRequest(BaseModel):
529
+ """Request for search operations."""
530
+ query: str
531
+ sources: Optional[List[SourceOption]] = None
532
+ categories: Optional[List[CategoryOption]] = None
533
+ limit: Optional[int] = 5
534
+ tbs: Optional[str] = None
535
+ location: Optional[str] = None
536
+ ignore_invalid_urls: Optional[bool] = None
537
+ timeout: Optional[int] = 60000
538
+ scrape_options: Optional[ScrapeOptions] = None
539
+
540
+ @field_validator('sources')
541
+ @classmethod
542
+ def validate_sources(cls, v):
543
+ """Validate and normalize sources input."""
544
+ if v is None:
545
+ return v
546
+
547
+ normalized_sources = []
548
+ for source in v:
549
+ if isinstance(source, str):
550
+ normalized_sources.append(Source(type=source))
551
+ elif isinstance(source, dict):
552
+ normalized_sources.append(Source(**source))
553
+ elif isinstance(source, Source):
554
+ normalized_sources.append(source)
555
+ else:
556
+ raise ValueError(f"Invalid source format: {source}")
557
+
558
+ return normalized_sources
559
+
560
+ @field_validator('categories')
561
+ @classmethod
562
+ def validate_categories(cls, v):
563
+ """Validate and normalize categories input."""
564
+ if v is None:
565
+ return v
566
+
567
+ normalized_categories = []
568
+ for category in v:
569
+ if isinstance(category, str):
570
+ normalized_categories.append(Category(type=category))
571
+ elif isinstance(category, dict):
572
+ normalized_categories.append(Category(**category))
573
+ elif isinstance(category, Category):
574
+ normalized_categories.append(category)
575
+ else:
576
+ raise ValueError(f"Invalid category format: {category}")
577
+
578
+ return normalized_categories
579
+
580
+ class LinkResult(BaseModel):
581
+ """A generic link result with optional metadata (used by search and map)."""
582
+ url: str
583
+ title: Optional[str] = None
584
+ description: Optional[str] = None
585
+
586
+ # Backward-compatible alias for existing tests/usages
587
+ SearchResult = LinkResult
588
+
589
+ class SearchData(BaseModel):
590
+ """Search results grouped by source type."""
591
+ web: Optional[List[Union[SearchResultWeb, Document]]] = None
592
+ news: Optional[List[Union[SearchResultNews, Document]]] = None
593
+ images: Optional[List[Union[SearchResultImages, Document]]] = None
594
+
595
+ class SearchResponse(BaseResponse[SearchData]):
596
+ """Response from search operation."""
597
+ pass
598
+
599
+ # Error types
600
+ class ErrorDetails(BaseModel):
601
+ """Detailed error information."""
602
+ code: Optional[str] = None
603
+ message: str
604
+ details: Optional[Dict[str, Any]] = None
605
+
606
+ class ErrorResponse(BaseModel):
607
+ """Error response structure."""
608
+ success: bool = False
609
+ error: str
610
+ details: Optional[ErrorDetails] = None
611
+
612
+ # Job management types
613
+ class JobStatus(BaseModel):
614
+ """Generic job status information."""
615
+ id: str
616
+ status: Literal["pending", "scraping", "completed", "failed"]
617
+ current: Optional[int] = None
618
+ total: Optional[int] = None
619
+ created_at: Optional[datetime] = None
620
+ completed_at: Optional[datetime] = None
621
+ expires_at: Optional[datetime] = None
622
+
623
+ class CrawlError(BaseModel):
624
+ """A crawl error."""
625
+ id: str
626
+ timestamp: Optional[datetime] = None
627
+ url: str
628
+ code: Optional[str] = None
629
+ error: str
630
+
631
+ class CrawlErrorsResponse(BaseModel):
632
+ """Response from crawl error monitoring."""
633
+ errors: List[CrawlError]
634
+ robots_blocked: List[str]
635
+
636
+ class ActiveCrawl(BaseModel):
637
+ """Information about an active crawl job."""
638
+ id: str
639
+ team_id: str
640
+ url: str
641
+ options: Optional[Dict[str, Any]] = None
642
+
643
+ class ActiveCrawlsResponse(BaseModel):
644
+ """Response from active crawls endpoint."""
645
+ success: bool = True
646
+ crawls: List[ActiveCrawl]
647
+
648
+ # Configuration types
649
+ class ClientConfig(BaseModel):
650
+ """Configuration for the Firecrawl client."""
651
+ api_key: str
652
+ api_url: str = "https://api.firecrawl.dev"
653
+ timeout: Optional[float] = None
654
+ max_retries: int = 3
655
+ backoff_factor: float = 0.5
656
+
657
+ # Response union types
658
+ AnyResponse = Union[
659
+ ScrapeResponse,
660
+ CrawlResponse,
661
+ BatchScrapeResponse,
662
+ MapResponse,
663
+ SearchResponse,
664
+ ErrorResponse,
665
+ ]
@@ -0,0 +1,9 @@
1
+ """
2
+ Utility modules for v2 API client.
3
+ """
4
+
5
+ from .http_client import HttpClient
6
+ from .error_handler import FirecrawlError, handle_response_error
7
+ from .validation import validate_scrape_options, prepare_scrape_options
8
+
9
+ __all__ = ['HttpClient', 'FirecrawlError', 'handle_response_error', 'validate_scrape_options', 'prepare_scrape_options']