firecrawl 4.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. firecrawl/__init__.py +87 -0
  2. firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
  3. firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
  4. firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  5. firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
  6. firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
  7. firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
  8. firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
  9. firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
  10. firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  11. firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  12. firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  13. firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
  14. firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
  15. firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
  16. firecrawl/__tests__/e2e/v2/test_map.py +61 -0
  17. firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
  18. firecrawl/__tests__/e2e/v2/test_search.py +270 -0
  19. firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  20. firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  21. firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
  22. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  23. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
  24. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  25. firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
  26. firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  27. firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
  28. firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  29. firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  30. firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
  31. firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
  32. firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  33. firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
  34. firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  35. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  36. firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  37. firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
  38. firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
  39. firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
  40. firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
  41. firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
  42. firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  43. firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  44. firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
  45. firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
  46. firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
  47. firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
  48. firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  49. firecrawl/client.py +281 -0
  50. firecrawl/firecrawl.backup.py +4635 -0
  51. firecrawl/types.py +167 -0
  52. firecrawl/v1/__init__.py +14 -0
  53. firecrawl/v1/client.py +5164 -0
  54. firecrawl/v2/__init__.py +4 -0
  55. firecrawl/v2/client.py +967 -0
  56. firecrawl/v2/client_async.py +408 -0
  57. firecrawl/v2/methods/agent.py +144 -0
  58. firecrawl/v2/methods/aio/__init__.py +1 -0
  59. firecrawl/v2/methods/aio/agent.py +137 -0
  60. firecrawl/v2/methods/aio/batch.py +188 -0
  61. firecrawl/v2/methods/aio/crawl.py +351 -0
  62. firecrawl/v2/methods/aio/extract.py +133 -0
  63. firecrawl/v2/methods/aio/map.py +65 -0
  64. firecrawl/v2/methods/aio/scrape.py +33 -0
  65. firecrawl/v2/methods/aio/search.py +176 -0
  66. firecrawl/v2/methods/aio/usage.py +89 -0
  67. firecrawl/v2/methods/batch.py +499 -0
  68. firecrawl/v2/methods/crawl.py +592 -0
  69. firecrawl/v2/methods/extract.py +161 -0
  70. firecrawl/v2/methods/map.py +83 -0
  71. firecrawl/v2/methods/scrape.py +64 -0
  72. firecrawl/v2/methods/search.py +215 -0
  73. firecrawl/v2/methods/usage.py +84 -0
  74. firecrawl/v2/types.py +1143 -0
  75. firecrawl/v2/utils/__init__.py +9 -0
  76. firecrawl/v2/utils/error_handler.py +107 -0
  77. firecrawl/v2/utils/get_version.py +15 -0
  78. firecrawl/v2/utils/http_client.py +178 -0
  79. firecrawl/v2/utils/http_client_async.py +69 -0
  80. firecrawl/v2/utils/normalize.py +125 -0
  81. firecrawl/v2/utils/validation.py +692 -0
  82. firecrawl/v2/watcher.py +301 -0
  83. firecrawl/v2/watcher_async.py +243 -0
  84. firecrawl-4.12.0.dist-info/METADATA +234 -0
  85. firecrawl-4.12.0.dist-info/RECORD +92 -0
  86. firecrawl-4.12.0.dist-info/WHEEL +5 -0
  87. firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
  88. firecrawl-4.12.0.dist-info/top_level.txt +2 -0
  89. tests/test_agent_integration.py +277 -0
  90. tests/test_api_key_handling.py +44 -0
  91. tests/test_change_tracking.py +98 -0
  92. tests/test_timeout_conversion.py +117 -0
@@ -0,0 +1,408 @@
1
+ """
2
+ Async v2 client mirroring the regular client surface using true async HTTP transport.
3
+ """
4
+
5
+ import os
6
+ import asyncio
7
+ import time
8
+ from typing import Optional, List, Dict, Any, Union, Callable, Literal
9
+ from .types import (
10
+ ScrapeOptions,
11
+ CrawlRequest,
12
+ WebhookConfig,
13
+ SearchRequest,
14
+ SearchData,
15
+ SourceOption,
16
+ CrawlResponse,
17
+ CrawlJob,
18
+ CrawlParamsRequest,
19
+ CrawlParamsData,
20
+ CrawlErrorsResponse,
21
+ ActiveCrawlsResponse,
22
+ MapOptions,
23
+ MapData,
24
+ FormatOption,
25
+ WaitAction,
26
+ ScreenshotAction,
27
+ ClickAction,
28
+ WriteAction,
29
+ PressAction,
30
+ ScrollAction,
31
+ ScrapeAction,
32
+ ExecuteJavascriptAction,
33
+ PDFAction,
34
+ Location,
35
+ PaginationConfig,
36
+ )
37
+ from .utils.http_client import HttpClient
38
+ from .utils.http_client_async import AsyncHttpClient
39
+
40
+ from .methods.aio import scrape as async_scrape # type: ignore[attr-defined]
41
+ from .methods.aio import batch as async_batch # type: ignore[attr-defined]
42
+ from .methods.aio import crawl as async_crawl # type: ignore[attr-defined]
43
+ from .methods.aio import search as async_search # type: ignore[attr-defined]
44
+ from .methods.aio import map as async_map # type: ignore[attr-defined]
45
+ from .methods.aio import usage as async_usage # type: ignore[attr-defined]
46
+ from .methods.aio import extract as async_extract # type: ignore[attr-defined]
47
+ from .methods.aio import agent as async_agent # type: ignore[attr-defined]
48
+
49
+ from .watcher_async import AsyncWatcher
50
+
51
+ class AsyncFirecrawlClient:
52
+ @staticmethod
53
+ def _is_cloud_service(url: str) -> bool:
54
+ return "api.firecrawl.dev" in url.lower()
55
+
56
+ def __init__(self, api_key: Optional[str] = None, api_url: str = "https://api.firecrawl.dev"):
57
+ if api_key is None:
58
+ api_key = os.getenv("FIRECRAWL_API_KEY")
59
+ if self._is_cloud_service(api_url) and not api_key:
60
+ raise ValueError("API key is required for the cloud API. Set FIRECRAWL_API_KEY or pass api_key.")
61
+ self.http_client = HttpClient(api_key, api_url)
62
+ self.async_http_client = AsyncHttpClient(api_key, api_url)
63
+
64
+ # Scrape
65
+ async def scrape(
66
+ self,
67
+ url: str,
68
+ **kwargs,
69
+ ):
70
+ options = ScrapeOptions(**{k: v for k, v in kwargs.items() if v is not None}) if kwargs else None
71
+ return await async_scrape.scrape(self.async_http_client, url, options)
72
+
73
+ # Search
74
+ async def search(
75
+ self,
76
+ query: str,
77
+ **kwargs,
78
+ ) -> SearchData:
79
+ request = SearchRequest(query=query, **{k: v for k, v in kwargs.items() if v is not None})
80
+ return await async_search.search(self.async_http_client, request)
81
+
82
+ async def start_crawl(self, url: str, **kwargs) -> CrawlResponse:
83
+ request = CrawlRequest(url=url, **kwargs)
84
+ return await async_crawl.start_crawl(self.async_http_client, request)
85
+
86
+ async def wait_crawl(
87
+ self,
88
+ job_id: str,
89
+ poll_interval: int = 2,
90
+ timeout: Optional[int] = None,
91
+ *,
92
+ request_timeout: Optional[float] = None,
93
+ ) -> CrawlJob:
94
+ """
95
+ Polls the status of a crawl job until it reaches a terminal state.
96
+
97
+ Args:
98
+ job_id (str): The ID of the crawl job to poll.
99
+ poll_interval (int, optional): Number of seconds to wait between polling attempts. Defaults to 2.
100
+ timeout (Optional[int], optional): Maximum number of seconds to wait for the entire crawl job to complete before timing out. If None, waits indefinitely. Defaults to None.
101
+ request_timeout (Optional[float], optional): Timeout (in seconds) for each individual HTTP request, including pagination requests when fetching results. If there are multiple pages, each page request gets this timeout. If None, no per-request timeout is set. Defaults to None.
102
+
103
+ Returns:
104
+ CrawlJob: The final status of the crawl job when it reaches a terminal state.
105
+
106
+ Raises:
107
+ TimeoutError: If the crawl does not reach a terminal state within the specified timeout.
108
+
109
+ Terminal states:
110
+ - "completed": The crawl finished successfully.
111
+ - "failed": The crawl finished with an error.
112
+ - "cancelled": The crawl was cancelled.
113
+ """
114
+ start = time.monotonic()
115
+ while True:
116
+ status = await async_crawl.get_crawl_status(
117
+ self.async_http_client,
118
+ job_id,
119
+ request_timeout=request_timeout,
120
+ )
121
+ if status.status in ["completed", "failed", "cancelled"]:
122
+ return status
123
+ if timeout and (time.monotonic() - start) > timeout:
124
+ raise TimeoutError("Crawl wait timed out")
125
+ await asyncio.sleep(poll_interval)
126
+
127
+ async def crawl(self, **kwargs) -> CrawlJob:
128
+ # wrapper combining start and wait
129
+ resp = await self.start_crawl(
130
+ **{k: v for k, v in kwargs.items() if k not in ("poll_interval", "timeout", "request_timeout")}
131
+ )
132
+ poll_interval = kwargs.get("poll_interval", 2)
133
+ timeout = kwargs.get("timeout")
134
+ request_timeout = kwargs.get("request_timeout")
135
+ effective_request_timeout = request_timeout if request_timeout is not None else timeout
136
+ return await self.wait_crawl(
137
+ resp.id,
138
+ poll_interval=poll_interval,
139
+ timeout=timeout,
140
+ request_timeout=effective_request_timeout,
141
+ )
142
+
143
+ async def get_crawl_status(
144
+ self,
145
+ job_id: str,
146
+ pagination_config: Optional[PaginationConfig] = None,
147
+ *,
148
+ request_timeout: Optional[float] = None,
149
+ ) -> CrawlJob:
150
+ """
151
+ Get the status of a crawl job.
152
+
153
+ Args:
154
+ job_id: ID of the crawl job
155
+ pagination_config: Optional configuration for pagination behavior
156
+ request_timeout: Timeout (in seconds) for each individual HTTP request. When auto-pagination
157
+ is enabled (default) and there are multiple pages of results, this timeout applies to
158
+ each page request separately, not to the entire operation
159
+
160
+ Returns:
161
+ CrawlJob with current status and data
162
+
163
+ Raises:
164
+ Exception: If the status check fails
165
+ """
166
+ return await async_crawl.get_crawl_status(
167
+ self.async_http_client,
168
+ job_id,
169
+ pagination_config=pagination_config,
170
+ request_timeout=request_timeout,
171
+ )
172
+
173
+ async def cancel_crawl(self, job_id: str) -> bool:
174
+ return await async_crawl.cancel_crawl(self.async_http_client, job_id)
175
+
176
+ async def crawl_params_preview(self, url: str, prompt: str) -> CrawlParamsData:
177
+ req = CrawlParamsRequest(url=url, prompt=prompt)
178
+ return await async_crawl.crawl_params_preview(self.async_http_client, req)
179
+
180
+ async def get_crawl_errors(self, crawl_id: str) -> CrawlErrorsResponse:
181
+ return await async_crawl.get_crawl_errors(self.async_http_client, crawl_id)
182
+
183
+ async def get_active_crawls(self) -> ActiveCrawlsResponse:
184
+ return await async_crawl.get_active_crawls(self.async_http_client)
185
+
186
+ async def active_crawls(self) -> ActiveCrawlsResponse:
187
+ return await self.get_active_crawls()
188
+
189
+ # Map
190
+ async def map(
191
+ self,
192
+ url: str,
193
+ *,
194
+ search: Optional[str] = None,
195
+ include_subdomains: Optional[bool] = None,
196
+ limit: Optional[int] = None,
197
+ sitemap: Optional[Literal["only", "include", "skip"]] = None,
198
+ timeout: Optional[int] = None,
199
+ integration: Optional[str] = None,
200
+ ) -> MapData:
201
+ options = MapOptions(
202
+ search=search,
203
+ include_subdomains=include_subdomains,
204
+ limit=limit,
205
+ sitemap=sitemap if sitemap is not None else "include",
206
+ timeout=timeout,
207
+ integration=integration,
208
+ ) if any(v is not None for v in [search, include_subdomains, limit, sitemap, integration, timeout]) else None
209
+ return await async_map.map(self.async_http_client, url, options)
210
+
211
+ async def start_batch_scrape(self, urls: List[str], **kwargs) -> Any:
212
+ return await async_batch.start_batch_scrape(self.async_http_client, urls, **kwargs)
213
+
214
+ async def wait_batch_scrape(self, job_id: str, poll_interval: int = 2, timeout: Optional[int] = None) -> Any:
215
+ start = asyncio.get_event_loop().time()
216
+ while True:
217
+ status = await async_batch.get_batch_scrape_status(self.async_http_client, job_id)
218
+ if status.status in ["completed", "failed", "cancelled"]:
219
+ return status
220
+ if timeout and (asyncio.get_event_loop().time() - start) > timeout:
221
+ raise TimeoutError("Batch wait timed out")
222
+ await asyncio.sleep(poll_interval)
223
+
224
+ async def batch_scrape(self, urls: List[str], **kwargs) -> Any:
225
+ # waiter wrapper
226
+ start = await self.start_batch_scrape(urls, **{k: v for k, v in kwargs.items() if k not in ("poll_interval", "timeout")})
227
+ job_id = start.id
228
+ poll_interval = kwargs.get("poll_interval", 2)
229
+ timeout = kwargs.get("timeout")
230
+ return await self.wait_batch_scrape(job_id, poll_interval=poll_interval, timeout=timeout)
231
+
232
+ async def get_batch_scrape_status(
233
+ self,
234
+ job_id: str,
235
+ pagination_config: Optional[PaginationConfig] = None
236
+ ):
237
+ return await async_batch.get_batch_scrape_status(
238
+ self.async_http_client,
239
+ job_id,
240
+ pagination_config=pagination_config
241
+ )
242
+
243
+ async def cancel_batch_scrape(self, job_id: str) -> bool:
244
+ return await async_batch.cancel_batch_scrape(self.async_http_client, job_id)
245
+
246
+ async def get_batch_scrape_errors(self, job_id: str) -> CrawlErrorsResponse:
247
+ # Returns v2 errors structure; typed as CrawlErrorsResponse for parity
248
+ return await async_batch.get_batch_scrape_errors(self.async_http_client, job_id) # type: ignore[return-value]
249
+
250
+ # Extract (proxy to v1 async)
251
+ async def extract(
252
+ self,
253
+ urls: Optional[List[str]] = None,
254
+ *,
255
+ prompt: Optional[str] = None,
256
+ schema: Optional[Dict[str, Any]] = None,
257
+ system_prompt: Optional[str] = None,
258
+ allow_external_links: Optional[bool] = None,
259
+ enable_web_search: Optional[bool] = None,
260
+ show_sources: Optional[bool] = None,
261
+ scrape_options: Optional['ScrapeOptions'] = None,
262
+ ignore_invalid_urls: Optional[bool] = None,
263
+ poll_interval: int = 2,
264
+ timeout: Optional[int] = None,
265
+ integration: Optional[str] = None,
266
+ ):
267
+ return await async_extract.extract(
268
+ self.async_http_client,
269
+ urls,
270
+ prompt=prompt,
271
+ schema=schema,
272
+ system_prompt=system_prompt,
273
+ allow_external_links=allow_external_links,
274
+ enable_web_search=enable_web_search,
275
+ show_sources=show_sources,
276
+ scrape_options=scrape_options,
277
+ ignore_invalid_urls=ignore_invalid_urls,
278
+ poll_interval=poll_interval,
279
+ timeout=timeout,
280
+ integration=integration,
281
+ )
282
+
283
+ async def get_extract_status(self, job_id: str):
284
+ return await async_extract.get_extract_status(self.async_http_client, job_id)
285
+
286
+ async def start_extract(
287
+ self,
288
+ urls: Optional[List[str]] = None,
289
+ *,
290
+ prompt: Optional[str] = None,
291
+ schema: Optional[Dict[str, Any]] = None,
292
+ system_prompt: Optional[str] = None,
293
+ allow_external_links: Optional[bool] = None,
294
+ enable_web_search: Optional[bool] = None,
295
+ show_sources: Optional[bool] = None,
296
+ scrape_options: Optional['ScrapeOptions'] = None,
297
+ ignore_invalid_urls: Optional[bool] = None,
298
+ integration: Optional[str] = None,
299
+ ):
300
+ return await async_extract.start_extract(
301
+ self.async_http_client,
302
+ urls,
303
+ prompt=prompt,
304
+ schema=schema,
305
+ system_prompt=system_prompt,
306
+ allow_external_links=allow_external_links,
307
+ enable_web_search=enable_web_search,
308
+ show_sources=show_sources,
309
+ scrape_options=scrape_options,
310
+ ignore_invalid_urls=ignore_invalid_urls,
311
+ integration=integration,
312
+ )
313
+
314
+ # Agent
315
+ async def agent(
316
+ self,
317
+ urls: Optional[List[str]] = None,
318
+ *,
319
+ prompt: str,
320
+ schema: Optional[Any] = None,
321
+ integration: Optional[str] = None,
322
+ poll_interval: int = 2,
323
+ timeout: Optional[int] = None,
324
+ max_credits: Optional[int] = None,
325
+ strict_constrain_to_urls: Optional[bool] = None,
326
+ ):
327
+ return await async_agent.agent(
328
+ self.async_http_client,
329
+ urls,
330
+ prompt=prompt,
331
+ schema=schema,
332
+ integration=integration,
333
+ poll_interval=poll_interval,
334
+ timeout=timeout,
335
+ max_credits=max_credits,
336
+ strict_constrain_to_urls=strict_constrain_to_urls,
337
+ )
338
+
339
+ async def get_agent_status(self, job_id: str):
340
+ return await async_agent.get_agent_status(self.async_http_client, job_id)
341
+
342
+ async def start_agent(
343
+ self,
344
+ urls: Optional[List[str]] = None,
345
+ *,
346
+ prompt: str,
347
+ schema: Optional[Any] = None,
348
+ integration: Optional[str] = None,
349
+ max_credits: Optional[int] = None,
350
+ strict_constrain_to_urls: Optional[bool] = None,
351
+ ):
352
+ return await async_agent.start_agent(
353
+ self.async_http_client,
354
+ urls,
355
+ prompt=prompt,
356
+ schema=schema,
357
+ integration=integration,
358
+ max_credits=max_credits,
359
+ strict_constrain_to_urls=strict_constrain_to_urls,
360
+ )
361
+
362
+ async def cancel_agent(self, job_id: str) -> bool:
363
+ """Cancel a running agent job.
364
+
365
+ Args:
366
+ job_id: Agent job ID
367
+
368
+ Returns:
369
+ True if the agent was cancelled
370
+ """
371
+ return await async_agent.cancel_agent(self.async_http_client, job_id)
372
+
373
+ # Usage endpoints
374
+ async def get_concurrency(self):
375
+ from .methods.aio import usage as async_usage # type: ignore[attr-defined]
376
+ return await async_usage.get_concurrency(self.async_http_client)
377
+
378
+ async def get_credit_usage(self):
379
+ from .methods.aio import usage as async_usage # type: ignore[attr-defined]
380
+ return await async_usage.get_credit_usage(self.async_http_client)
381
+
382
+ async def get_token_usage(self):
383
+ from .methods.aio import usage as async_usage # type: ignore[attr-defined]
384
+ return await async_usage.get_token_usage(self.async_http_client)
385
+
386
+ async def get_credit_usage_historical(self, by_api_key: bool = False):
387
+ from .methods.aio import usage as async_usage # type: ignore[attr-defined]
388
+ return await async_usage.get_credit_usage_historical(self.async_http_client, by_api_key)
389
+
390
+ async def get_token_usage_historical(self, by_api_key: bool = False):
391
+ from .methods.aio import usage as async_usage # type: ignore[attr-defined]
392
+ return await async_usage.get_token_usage_historical(self.async_http_client, by_api_key)
393
+
394
+ async def get_queue_status(self):
395
+ from .methods.aio import usage as async_usage # type: ignore[attr-defined]
396
+ return await async_usage.get_queue_status(self.async_http_client)
397
+
398
+ # Watcher (sync object usable from async contexts)
399
+ def watcher(
400
+ self,
401
+ job_id: str,
402
+ *,
403
+ kind: Literal["crawl", "batch"] = "crawl",
404
+ poll_interval: int = 2,
405
+ timeout: Optional[int] = None,
406
+ ) -> AsyncWatcher:
407
+ return AsyncWatcher(self, job_id, kind=kind, poll_interval=poll_interval, timeout=timeout)
408
+
@@ -0,0 +1,144 @@
1
+ from typing import Any, Dict, List, Optional
2
+ import time
3
+
4
+ from ..types import AgentResponse
5
+ from ..utils.http_client import HttpClient
6
+ from ..utils.error_handler import handle_response_error
7
+ from ..utils.validation import _normalize_schema
8
+
9
+
10
+ def _prepare_agent_request(
11
+ urls: Optional[List[str]],
12
+ *,
13
+ prompt: str,
14
+ schema: Optional[Any] = None,
15
+ integration: Optional[str] = None,
16
+ max_credits: Optional[int] = None,
17
+ strict_constrain_to_urls: Optional[bool] = None,
18
+ ) -> Dict[str, Any]:
19
+ body: Dict[str, Any] = {}
20
+ if urls is not None:
21
+ body["urls"] = urls
22
+ body["prompt"] = prompt
23
+ if schema is not None:
24
+ normalized_schema = _normalize_schema(schema)
25
+ if normalized_schema is not None:
26
+ body["schema"] = normalized_schema
27
+ else:
28
+ raise ValueError(
29
+ f"Invalid schema type: {type(schema).__name__}. "
30
+ "Schema must be a dict, Pydantic BaseModel class, or Pydantic model instance."
31
+ )
32
+ if integration is not None and str(integration).strip():
33
+ body["integration"] = str(integration).strip()
34
+ if max_credits is not None and max_credits > 0:
35
+ body["maxCredits"] = max_credits
36
+ if strict_constrain_to_urls is not None and strict_constrain_to_urls:
37
+ body["strictConstrainToURLs"] = strict_constrain_to_urls
38
+ return body
39
+
40
+
41
+ def _normalize_agent_response_payload(payload: Dict[str, Any]) -> Dict[str, Any]:
42
+ out = dict(payload)
43
+ if "expiresAt" in out and "expires_at" not in out:
44
+ out["expires_at"] = out["expiresAt"]
45
+ if "creditsUsed" in out and "credits_used" not in out:
46
+ out["credits_used"] = out["creditsUsed"]
47
+ return out
48
+
49
+
50
+ def start_agent(
51
+ client: HttpClient,
52
+ urls: Optional[List[str]],
53
+ *,
54
+ prompt: str,
55
+ schema: Optional[Any] = None,
56
+ integration: Optional[str] = None,
57
+ max_credits: Optional[int] = None,
58
+ strict_constrain_to_urls: Optional[bool] = None,
59
+ ) -> AgentResponse:
60
+ body = _prepare_agent_request(
61
+ urls,
62
+ prompt=prompt,
63
+ schema=schema,
64
+ integration=integration,
65
+ max_credits=max_credits,
66
+ strict_constrain_to_urls=strict_constrain_to_urls,
67
+ )
68
+ resp = client.post("/v2/agent", body)
69
+ if not resp.ok:
70
+ handle_response_error(resp, "agent")
71
+ payload = _normalize_agent_response_payload(resp.json())
72
+ return AgentResponse(**payload)
73
+
74
+
75
+ def get_agent_status(client: HttpClient, job_id: str) -> AgentResponse:
76
+ resp = client.get(f"/v2/agent/{job_id}")
77
+ if not resp.ok:
78
+ handle_response_error(resp, "agent-status")
79
+ payload = _normalize_agent_response_payload(resp.json())
80
+ return AgentResponse(**payload)
81
+
82
+
83
+ def wait_agent(
84
+ client: HttpClient,
85
+ job_id: str,
86
+ *,
87
+ poll_interval: int = 2,
88
+ timeout: Optional[int] = None,
89
+ ) -> AgentResponse:
90
+ start_ts = time.time()
91
+ while True:
92
+ status = get_agent_status(client, job_id)
93
+ if status.status in ("completed", "failed", "cancelled"):
94
+ return status
95
+ if timeout is not None and (time.time() - start_ts) > timeout:
96
+ return status
97
+ time.sleep(max(1, poll_interval))
98
+
99
+
100
+ def agent(
101
+ client: HttpClient,
102
+ urls: Optional[List[str]],
103
+ *,
104
+ prompt: str,
105
+ schema: Optional[Any] = None,
106
+ integration: Optional[str] = None,
107
+ poll_interval: int = 2,
108
+ timeout: Optional[int] = None,
109
+ max_credits: Optional[int] = None,
110
+ strict_constrain_to_urls: Optional[bool] = None,
111
+ ) -> AgentResponse:
112
+ started = start_agent(
113
+ client,
114
+ urls,
115
+ prompt=prompt,
116
+ schema=schema,
117
+ integration=integration,
118
+ max_credits=max_credits,
119
+ strict_constrain_to_urls=strict_constrain_to_urls,
120
+ )
121
+ job_id = getattr(started, "id", None)
122
+ if not job_id:
123
+ return started
124
+ return wait_agent(client, job_id, poll_interval=poll_interval, timeout=timeout)
125
+
126
+
127
+ def cancel_agent(client: HttpClient, job_id: str) -> bool:
128
+ """
129
+ Cancel a running agent job.
130
+
131
+ Args:
132
+ client: HTTP client instance
133
+ job_id: ID of the agent job to cancel
134
+
135
+ Returns:
136
+ bool: True if the agent was cancelled, False otherwise
137
+
138
+ Raises:
139
+ Exception: If the cancellation fails
140
+ """
141
+ resp = client.delete(f"/v2/agent/{job_id}")
142
+ if not resp.ok:
143
+ handle_response_error(resp, "cancel agent")
144
+ return resp.json().get("success", False)
@@ -0,0 +1 @@
1
+ # Async (aio) method modules for v2
@@ -0,0 +1,137 @@
1
+ from typing import Any, Dict, List, Optional
2
+ import asyncio
3
+
4
+ from ...types import AgentResponse
5
+ from ...utils.http_client_async import AsyncHttpClient
6
+ from ...utils.validation import _normalize_schema
7
+
8
+
9
+ def _prepare_agent_request(
10
+ urls: Optional[List[str]],
11
+ *,
12
+ prompt: str,
13
+ schema: Optional[Any] = None,
14
+ integration: Optional[str] = None,
15
+ max_credits: Optional[int] = None,
16
+ strict_constrain_to_urls: Optional[bool] = None,
17
+ ) -> Dict[str, Any]:
18
+ body: Dict[str, Any] = {}
19
+ if urls is not None:
20
+ body["urls"] = urls
21
+ body["prompt"] = prompt
22
+ if schema is not None:
23
+ normalized_schema = _normalize_schema(schema)
24
+ if normalized_schema is not None:
25
+ body["schema"] = normalized_schema
26
+ else:
27
+ raise ValueError(
28
+ f"Invalid schema type: {type(schema).__name__}. "
29
+ "Schema must be a dict, Pydantic BaseModel class, or Pydantic model instance."
30
+ )
31
+ if integration is not None and str(integration).strip():
32
+ body["integration"] = str(integration).strip()
33
+ if max_credits is not None and max_credits > 0:
34
+ body["maxCredits"] = max_credits
35
+ if strict_constrain_to_urls is not None and strict_constrain_to_urls:
36
+ body["strictConstrainToURLs"] = strict_constrain_to_urls
37
+ return body
38
+
39
+
40
+ def _normalize_agent_response_payload(payload: Dict[str, Any]) -> Dict[str, Any]:
41
+ out = dict(payload)
42
+ if "expiresAt" in out and "expires_at" not in out:
43
+ out["expires_at"] = out["expiresAt"]
44
+ if "creditsUsed" in out and "credits_used" not in out:
45
+ out["credits_used"] = out["creditsUsed"]
46
+ return out
47
+
48
+
49
+ async def start_agent(
50
+ client: AsyncHttpClient,
51
+ urls: Optional[List[str]],
52
+ *,
53
+ prompt: str,
54
+ schema: Optional[Any] = None,
55
+ integration: Optional[str] = None,
56
+ max_credits: Optional[int] = None,
57
+ strict_constrain_to_urls: Optional[bool] = None,
58
+ ) -> AgentResponse:
59
+ body = _prepare_agent_request(
60
+ urls,
61
+ prompt=prompt,
62
+ schema=schema,
63
+ integration=integration,
64
+ max_credits=max_credits,
65
+ strict_constrain_to_urls=strict_constrain_to_urls,
66
+ )
67
+ resp = await client.post("/v2/agent", body)
68
+ payload = _normalize_agent_response_payload(resp.json())
69
+ return AgentResponse(**payload)
70
+
71
+
72
+ async def get_agent_status(client: AsyncHttpClient, job_id: str) -> AgentResponse:
73
+ resp = await client.get(f"/v2/agent/{job_id}")
74
+ payload = _normalize_agent_response_payload(resp.json())
75
+ return AgentResponse(**payload)
76
+
77
+
78
+ async def wait_agent(
79
+ client: AsyncHttpClient,
80
+ job_id: str,
81
+ *,
82
+ poll_interval: int = 2,
83
+ timeout: Optional[int] = None,
84
+ ) -> AgentResponse:
85
+ start_ts = asyncio.get_event_loop().time()
86
+ while True:
87
+ status = await get_agent_status(client, job_id)
88
+ if status.status in ("completed", "failed", "cancelled"):
89
+ return status
90
+ if timeout is not None and (asyncio.get_event_loop().time() - start_ts) > timeout:
91
+ return status
92
+ await asyncio.sleep(max(1, poll_interval))
93
+
94
+
95
+ async def agent(
96
+ client: AsyncHttpClient,
97
+ urls: Optional[List[str]],
98
+ *,
99
+ prompt: str,
100
+ schema: Optional[Any] = None,
101
+ integration: Optional[str] = None,
102
+ poll_interval: int = 2,
103
+ timeout: Optional[int] = None,
104
+ max_credits: Optional[int] = None,
105
+ strict_constrain_to_urls: Optional[bool] = None,
106
+ ) -> AgentResponse:
107
+ started = await start_agent(
108
+ client,
109
+ urls,
110
+ prompt=prompt,
111
+ schema=schema,
112
+ integration=integration,
113
+ max_credits=max_credits,
114
+ strict_constrain_to_urls=strict_constrain_to_urls,
115
+ )
116
+ job_id = getattr(started, "id", None)
117
+ if not job_id:
118
+ return started
119
+ return await wait_agent(client, job_id, poll_interval=poll_interval, timeout=timeout)
120
+
121
+
122
+ async def cancel_agent(client: AsyncHttpClient, job_id: str) -> bool:
123
+ """
124
+ Cancel a running agent job.
125
+
126
+ Args:
127
+ client: Async HTTP client instance
128
+ job_id: ID of the agent job to cancel
129
+
130
+ Returns:
131
+ bool: True if the agent was cancelled, False otherwise
132
+
133
+ Raises:
134
+ Exception: If the cancellation fails
135
+ """
136
+ resp = await client.delete(f"/v2/agent/{job_id}")
137
+ return resp.json().get("success", False)