firecrawl 4.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- firecrawl/__init__.py +87 -0
- firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
- firecrawl/__tests__/e2e/v2/conftest.py +73 -0
- firecrawl/__tests__/e2e/v2/test_async.py +73 -0
- firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
- firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
- firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
- firecrawl/__tests__/e2e/v2/test_map.py +61 -0
- firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
- firecrawl/__tests__/e2e/v2/test_search.py +270 -0
- firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
- firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
- firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
- firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
- firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
- firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
- firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
- firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
- firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
- firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
- firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
- firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
- firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
- firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
- firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
- firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
- firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
- firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
- firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
- firecrawl/client.py +281 -0
- firecrawl/firecrawl.backup.py +4635 -0
- firecrawl/types.py +167 -0
- firecrawl/v1/__init__.py +14 -0
- firecrawl/v1/client.py +5164 -0
- firecrawl/v2/__init__.py +4 -0
- firecrawl/v2/client.py +967 -0
- firecrawl/v2/client_async.py +408 -0
- firecrawl/v2/methods/agent.py +144 -0
- firecrawl/v2/methods/aio/__init__.py +1 -0
- firecrawl/v2/methods/aio/agent.py +137 -0
- firecrawl/v2/methods/aio/batch.py +188 -0
- firecrawl/v2/methods/aio/crawl.py +351 -0
- firecrawl/v2/methods/aio/extract.py +133 -0
- firecrawl/v2/methods/aio/map.py +65 -0
- firecrawl/v2/methods/aio/scrape.py +33 -0
- firecrawl/v2/methods/aio/search.py +176 -0
- firecrawl/v2/methods/aio/usage.py +89 -0
- firecrawl/v2/methods/batch.py +499 -0
- firecrawl/v2/methods/crawl.py +592 -0
- firecrawl/v2/methods/extract.py +161 -0
- firecrawl/v2/methods/map.py +83 -0
- firecrawl/v2/methods/scrape.py +64 -0
- firecrawl/v2/methods/search.py +215 -0
- firecrawl/v2/methods/usage.py +84 -0
- firecrawl/v2/types.py +1143 -0
- firecrawl/v2/utils/__init__.py +9 -0
- firecrawl/v2/utils/error_handler.py +107 -0
- firecrawl/v2/utils/get_version.py +15 -0
- firecrawl/v2/utils/http_client.py +178 -0
- firecrawl/v2/utils/http_client_async.py +69 -0
- firecrawl/v2/utils/normalize.py +125 -0
- firecrawl/v2/utils/validation.py +692 -0
- firecrawl/v2/watcher.py +301 -0
- firecrawl/v2/watcher_async.py +243 -0
- firecrawl-4.12.0.dist-info/METADATA +234 -0
- firecrawl-4.12.0.dist-info/RECORD +92 -0
- firecrawl-4.12.0.dist-info/WHEEL +5 -0
- firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
- firecrawl-4.12.0.dist-info/top_level.txt +2 -0
- tests/test_agent_integration.py +277 -0
- tests/test_api_key_handling.py +44 -0
- tests/test_change_tracking.py +98 -0
- tests/test_timeout_conversion.py +117 -0
|
@@ -0,0 +1,408 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Async v2 client mirroring the regular client surface using true async HTTP transport.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import asyncio
|
|
7
|
+
import time
|
|
8
|
+
from typing import Optional, List, Dict, Any, Union, Callable, Literal
|
|
9
|
+
from .types import (
|
|
10
|
+
ScrapeOptions,
|
|
11
|
+
CrawlRequest,
|
|
12
|
+
WebhookConfig,
|
|
13
|
+
SearchRequest,
|
|
14
|
+
SearchData,
|
|
15
|
+
SourceOption,
|
|
16
|
+
CrawlResponse,
|
|
17
|
+
CrawlJob,
|
|
18
|
+
CrawlParamsRequest,
|
|
19
|
+
CrawlParamsData,
|
|
20
|
+
CrawlErrorsResponse,
|
|
21
|
+
ActiveCrawlsResponse,
|
|
22
|
+
MapOptions,
|
|
23
|
+
MapData,
|
|
24
|
+
FormatOption,
|
|
25
|
+
WaitAction,
|
|
26
|
+
ScreenshotAction,
|
|
27
|
+
ClickAction,
|
|
28
|
+
WriteAction,
|
|
29
|
+
PressAction,
|
|
30
|
+
ScrollAction,
|
|
31
|
+
ScrapeAction,
|
|
32
|
+
ExecuteJavascriptAction,
|
|
33
|
+
PDFAction,
|
|
34
|
+
Location,
|
|
35
|
+
PaginationConfig,
|
|
36
|
+
)
|
|
37
|
+
from .utils.http_client import HttpClient
|
|
38
|
+
from .utils.http_client_async import AsyncHttpClient
|
|
39
|
+
|
|
40
|
+
from .methods.aio import scrape as async_scrape # type: ignore[attr-defined]
|
|
41
|
+
from .methods.aio import batch as async_batch # type: ignore[attr-defined]
|
|
42
|
+
from .methods.aio import crawl as async_crawl # type: ignore[attr-defined]
|
|
43
|
+
from .methods.aio import search as async_search # type: ignore[attr-defined]
|
|
44
|
+
from .methods.aio import map as async_map # type: ignore[attr-defined]
|
|
45
|
+
from .methods.aio import usage as async_usage # type: ignore[attr-defined]
|
|
46
|
+
from .methods.aio import extract as async_extract # type: ignore[attr-defined]
|
|
47
|
+
from .methods.aio import agent as async_agent # type: ignore[attr-defined]
|
|
48
|
+
|
|
49
|
+
from .watcher_async import AsyncWatcher
|
|
50
|
+
|
|
51
|
+
class AsyncFirecrawlClient:
|
|
52
|
+
@staticmethod
|
|
53
|
+
def _is_cloud_service(url: str) -> bool:
|
|
54
|
+
return "api.firecrawl.dev" in url.lower()
|
|
55
|
+
|
|
56
|
+
def __init__(self, api_key: Optional[str] = None, api_url: str = "https://api.firecrawl.dev"):
|
|
57
|
+
if api_key is None:
|
|
58
|
+
api_key = os.getenv("FIRECRAWL_API_KEY")
|
|
59
|
+
if self._is_cloud_service(api_url) and not api_key:
|
|
60
|
+
raise ValueError("API key is required for the cloud API. Set FIRECRAWL_API_KEY or pass api_key.")
|
|
61
|
+
self.http_client = HttpClient(api_key, api_url)
|
|
62
|
+
self.async_http_client = AsyncHttpClient(api_key, api_url)
|
|
63
|
+
|
|
64
|
+
# Scrape
|
|
65
|
+
async def scrape(
|
|
66
|
+
self,
|
|
67
|
+
url: str,
|
|
68
|
+
**kwargs,
|
|
69
|
+
):
|
|
70
|
+
options = ScrapeOptions(**{k: v for k, v in kwargs.items() if v is not None}) if kwargs else None
|
|
71
|
+
return await async_scrape.scrape(self.async_http_client, url, options)
|
|
72
|
+
|
|
73
|
+
# Search
|
|
74
|
+
async def search(
|
|
75
|
+
self,
|
|
76
|
+
query: str,
|
|
77
|
+
**kwargs,
|
|
78
|
+
) -> SearchData:
|
|
79
|
+
request = SearchRequest(query=query, **{k: v for k, v in kwargs.items() if v is not None})
|
|
80
|
+
return await async_search.search(self.async_http_client, request)
|
|
81
|
+
|
|
82
|
+
async def start_crawl(self, url: str, **kwargs) -> CrawlResponse:
|
|
83
|
+
request = CrawlRequest(url=url, **kwargs)
|
|
84
|
+
return await async_crawl.start_crawl(self.async_http_client, request)
|
|
85
|
+
|
|
86
|
+
async def wait_crawl(
|
|
87
|
+
self,
|
|
88
|
+
job_id: str,
|
|
89
|
+
poll_interval: int = 2,
|
|
90
|
+
timeout: Optional[int] = None,
|
|
91
|
+
*,
|
|
92
|
+
request_timeout: Optional[float] = None,
|
|
93
|
+
) -> CrawlJob:
|
|
94
|
+
"""
|
|
95
|
+
Polls the status of a crawl job until it reaches a terminal state.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
job_id (str): The ID of the crawl job to poll.
|
|
99
|
+
poll_interval (int, optional): Number of seconds to wait between polling attempts. Defaults to 2.
|
|
100
|
+
timeout (Optional[int], optional): Maximum number of seconds to wait for the entire crawl job to complete before timing out. If None, waits indefinitely. Defaults to None.
|
|
101
|
+
request_timeout (Optional[float], optional): Timeout (in seconds) for each individual HTTP request, including pagination requests when fetching results. If there are multiple pages, each page request gets this timeout. If None, no per-request timeout is set. Defaults to None.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
CrawlJob: The final status of the crawl job when it reaches a terminal state.
|
|
105
|
+
|
|
106
|
+
Raises:
|
|
107
|
+
TimeoutError: If the crawl does not reach a terminal state within the specified timeout.
|
|
108
|
+
|
|
109
|
+
Terminal states:
|
|
110
|
+
- "completed": The crawl finished successfully.
|
|
111
|
+
- "failed": The crawl finished with an error.
|
|
112
|
+
- "cancelled": The crawl was cancelled.
|
|
113
|
+
"""
|
|
114
|
+
start = time.monotonic()
|
|
115
|
+
while True:
|
|
116
|
+
status = await async_crawl.get_crawl_status(
|
|
117
|
+
self.async_http_client,
|
|
118
|
+
job_id,
|
|
119
|
+
request_timeout=request_timeout,
|
|
120
|
+
)
|
|
121
|
+
if status.status in ["completed", "failed", "cancelled"]:
|
|
122
|
+
return status
|
|
123
|
+
if timeout and (time.monotonic() - start) > timeout:
|
|
124
|
+
raise TimeoutError("Crawl wait timed out")
|
|
125
|
+
await asyncio.sleep(poll_interval)
|
|
126
|
+
|
|
127
|
+
async def crawl(self, **kwargs) -> CrawlJob:
|
|
128
|
+
# wrapper combining start and wait
|
|
129
|
+
resp = await self.start_crawl(
|
|
130
|
+
**{k: v for k, v in kwargs.items() if k not in ("poll_interval", "timeout", "request_timeout")}
|
|
131
|
+
)
|
|
132
|
+
poll_interval = kwargs.get("poll_interval", 2)
|
|
133
|
+
timeout = kwargs.get("timeout")
|
|
134
|
+
request_timeout = kwargs.get("request_timeout")
|
|
135
|
+
effective_request_timeout = request_timeout if request_timeout is not None else timeout
|
|
136
|
+
return await self.wait_crawl(
|
|
137
|
+
resp.id,
|
|
138
|
+
poll_interval=poll_interval,
|
|
139
|
+
timeout=timeout,
|
|
140
|
+
request_timeout=effective_request_timeout,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
async def get_crawl_status(
|
|
144
|
+
self,
|
|
145
|
+
job_id: str,
|
|
146
|
+
pagination_config: Optional[PaginationConfig] = None,
|
|
147
|
+
*,
|
|
148
|
+
request_timeout: Optional[float] = None,
|
|
149
|
+
) -> CrawlJob:
|
|
150
|
+
"""
|
|
151
|
+
Get the status of a crawl job.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
job_id: ID of the crawl job
|
|
155
|
+
pagination_config: Optional configuration for pagination behavior
|
|
156
|
+
request_timeout: Timeout (in seconds) for each individual HTTP request. When auto-pagination
|
|
157
|
+
is enabled (default) and there are multiple pages of results, this timeout applies to
|
|
158
|
+
each page request separately, not to the entire operation
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
CrawlJob with current status and data
|
|
162
|
+
|
|
163
|
+
Raises:
|
|
164
|
+
Exception: If the status check fails
|
|
165
|
+
"""
|
|
166
|
+
return await async_crawl.get_crawl_status(
|
|
167
|
+
self.async_http_client,
|
|
168
|
+
job_id,
|
|
169
|
+
pagination_config=pagination_config,
|
|
170
|
+
request_timeout=request_timeout,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
async def cancel_crawl(self, job_id: str) -> bool:
|
|
174
|
+
return await async_crawl.cancel_crawl(self.async_http_client, job_id)
|
|
175
|
+
|
|
176
|
+
async def crawl_params_preview(self, url: str, prompt: str) -> CrawlParamsData:
|
|
177
|
+
req = CrawlParamsRequest(url=url, prompt=prompt)
|
|
178
|
+
return await async_crawl.crawl_params_preview(self.async_http_client, req)
|
|
179
|
+
|
|
180
|
+
async def get_crawl_errors(self, crawl_id: str) -> CrawlErrorsResponse:
|
|
181
|
+
return await async_crawl.get_crawl_errors(self.async_http_client, crawl_id)
|
|
182
|
+
|
|
183
|
+
async def get_active_crawls(self) -> ActiveCrawlsResponse:
|
|
184
|
+
return await async_crawl.get_active_crawls(self.async_http_client)
|
|
185
|
+
|
|
186
|
+
async def active_crawls(self) -> ActiveCrawlsResponse:
|
|
187
|
+
return await self.get_active_crawls()
|
|
188
|
+
|
|
189
|
+
# Map
|
|
190
|
+
async def map(
|
|
191
|
+
self,
|
|
192
|
+
url: str,
|
|
193
|
+
*,
|
|
194
|
+
search: Optional[str] = None,
|
|
195
|
+
include_subdomains: Optional[bool] = None,
|
|
196
|
+
limit: Optional[int] = None,
|
|
197
|
+
sitemap: Optional[Literal["only", "include", "skip"]] = None,
|
|
198
|
+
timeout: Optional[int] = None,
|
|
199
|
+
integration: Optional[str] = None,
|
|
200
|
+
) -> MapData:
|
|
201
|
+
options = MapOptions(
|
|
202
|
+
search=search,
|
|
203
|
+
include_subdomains=include_subdomains,
|
|
204
|
+
limit=limit,
|
|
205
|
+
sitemap=sitemap if sitemap is not None else "include",
|
|
206
|
+
timeout=timeout,
|
|
207
|
+
integration=integration,
|
|
208
|
+
) if any(v is not None for v in [search, include_subdomains, limit, sitemap, integration, timeout]) else None
|
|
209
|
+
return await async_map.map(self.async_http_client, url, options)
|
|
210
|
+
|
|
211
|
+
async def start_batch_scrape(self, urls: List[str], **kwargs) -> Any:
|
|
212
|
+
return await async_batch.start_batch_scrape(self.async_http_client, urls, **kwargs)
|
|
213
|
+
|
|
214
|
+
async def wait_batch_scrape(self, job_id: str, poll_interval: int = 2, timeout: Optional[int] = None) -> Any:
|
|
215
|
+
start = asyncio.get_event_loop().time()
|
|
216
|
+
while True:
|
|
217
|
+
status = await async_batch.get_batch_scrape_status(self.async_http_client, job_id)
|
|
218
|
+
if status.status in ["completed", "failed", "cancelled"]:
|
|
219
|
+
return status
|
|
220
|
+
if timeout and (asyncio.get_event_loop().time() - start) > timeout:
|
|
221
|
+
raise TimeoutError("Batch wait timed out")
|
|
222
|
+
await asyncio.sleep(poll_interval)
|
|
223
|
+
|
|
224
|
+
async def batch_scrape(self, urls: List[str], **kwargs) -> Any:
|
|
225
|
+
# waiter wrapper
|
|
226
|
+
start = await self.start_batch_scrape(urls, **{k: v for k, v in kwargs.items() if k not in ("poll_interval", "timeout")})
|
|
227
|
+
job_id = start.id
|
|
228
|
+
poll_interval = kwargs.get("poll_interval", 2)
|
|
229
|
+
timeout = kwargs.get("timeout")
|
|
230
|
+
return await self.wait_batch_scrape(job_id, poll_interval=poll_interval, timeout=timeout)
|
|
231
|
+
|
|
232
|
+
async def get_batch_scrape_status(
|
|
233
|
+
self,
|
|
234
|
+
job_id: str,
|
|
235
|
+
pagination_config: Optional[PaginationConfig] = None
|
|
236
|
+
):
|
|
237
|
+
return await async_batch.get_batch_scrape_status(
|
|
238
|
+
self.async_http_client,
|
|
239
|
+
job_id,
|
|
240
|
+
pagination_config=pagination_config
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
async def cancel_batch_scrape(self, job_id: str) -> bool:
|
|
244
|
+
return await async_batch.cancel_batch_scrape(self.async_http_client, job_id)
|
|
245
|
+
|
|
246
|
+
async def get_batch_scrape_errors(self, job_id: str) -> CrawlErrorsResponse:
|
|
247
|
+
# Returns v2 errors structure; typed as CrawlErrorsResponse for parity
|
|
248
|
+
return await async_batch.get_batch_scrape_errors(self.async_http_client, job_id) # type: ignore[return-value]
|
|
249
|
+
|
|
250
|
+
# Extract (proxy to v1 async)
|
|
251
|
+
async def extract(
|
|
252
|
+
self,
|
|
253
|
+
urls: Optional[List[str]] = None,
|
|
254
|
+
*,
|
|
255
|
+
prompt: Optional[str] = None,
|
|
256
|
+
schema: Optional[Dict[str, Any]] = None,
|
|
257
|
+
system_prompt: Optional[str] = None,
|
|
258
|
+
allow_external_links: Optional[bool] = None,
|
|
259
|
+
enable_web_search: Optional[bool] = None,
|
|
260
|
+
show_sources: Optional[bool] = None,
|
|
261
|
+
scrape_options: Optional['ScrapeOptions'] = None,
|
|
262
|
+
ignore_invalid_urls: Optional[bool] = None,
|
|
263
|
+
poll_interval: int = 2,
|
|
264
|
+
timeout: Optional[int] = None,
|
|
265
|
+
integration: Optional[str] = None,
|
|
266
|
+
):
|
|
267
|
+
return await async_extract.extract(
|
|
268
|
+
self.async_http_client,
|
|
269
|
+
urls,
|
|
270
|
+
prompt=prompt,
|
|
271
|
+
schema=schema,
|
|
272
|
+
system_prompt=system_prompt,
|
|
273
|
+
allow_external_links=allow_external_links,
|
|
274
|
+
enable_web_search=enable_web_search,
|
|
275
|
+
show_sources=show_sources,
|
|
276
|
+
scrape_options=scrape_options,
|
|
277
|
+
ignore_invalid_urls=ignore_invalid_urls,
|
|
278
|
+
poll_interval=poll_interval,
|
|
279
|
+
timeout=timeout,
|
|
280
|
+
integration=integration,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
async def get_extract_status(self, job_id: str):
|
|
284
|
+
return await async_extract.get_extract_status(self.async_http_client, job_id)
|
|
285
|
+
|
|
286
|
+
async def start_extract(
|
|
287
|
+
self,
|
|
288
|
+
urls: Optional[List[str]] = None,
|
|
289
|
+
*,
|
|
290
|
+
prompt: Optional[str] = None,
|
|
291
|
+
schema: Optional[Dict[str, Any]] = None,
|
|
292
|
+
system_prompt: Optional[str] = None,
|
|
293
|
+
allow_external_links: Optional[bool] = None,
|
|
294
|
+
enable_web_search: Optional[bool] = None,
|
|
295
|
+
show_sources: Optional[bool] = None,
|
|
296
|
+
scrape_options: Optional['ScrapeOptions'] = None,
|
|
297
|
+
ignore_invalid_urls: Optional[bool] = None,
|
|
298
|
+
integration: Optional[str] = None,
|
|
299
|
+
):
|
|
300
|
+
return await async_extract.start_extract(
|
|
301
|
+
self.async_http_client,
|
|
302
|
+
urls,
|
|
303
|
+
prompt=prompt,
|
|
304
|
+
schema=schema,
|
|
305
|
+
system_prompt=system_prompt,
|
|
306
|
+
allow_external_links=allow_external_links,
|
|
307
|
+
enable_web_search=enable_web_search,
|
|
308
|
+
show_sources=show_sources,
|
|
309
|
+
scrape_options=scrape_options,
|
|
310
|
+
ignore_invalid_urls=ignore_invalid_urls,
|
|
311
|
+
integration=integration,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# Agent
|
|
315
|
+
async def agent(
|
|
316
|
+
self,
|
|
317
|
+
urls: Optional[List[str]] = None,
|
|
318
|
+
*,
|
|
319
|
+
prompt: str,
|
|
320
|
+
schema: Optional[Any] = None,
|
|
321
|
+
integration: Optional[str] = None,
|
|
322
|
+
poll_interval: int = 2,
|
|
323
|
+
timeout: Optional[int] = None,
|
|
324
|
+
max_credits: Optional[int] = None,
|
|
325
|
+
strict_constrain_to_urls: Optional[bool] = None,
|
|
326
|
+
):
|
|
327
|
+
return await async_agent.agent(
|
|
328
|
+
self.async_http_client,
|
|
329
|
+
urls,
|
|
330
|
+
prompt=prompt,
|
|
331
|
+
schema=schema,
|
|
332
|
+
integration=integration,
|
|
333
|
+
poll_interval=poll_interval,
|
|
334
|
+
timeout=timeout,
|
|
335
|
+
max_credits=max_credits,
|
|
336
|
+
strict_constrain_to_urls=strict_constrain_to_urls,
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
async def get_agent_status(self, job_id: str):
|
|
340
|
+
return await async_agent.get_agent_status(self.async_http_client, job_id)
|
|
341
|
+
|
|
342
|
+
async def start_agent(
|
|
343
|
+
self,
|
|
344
|
+
urls: Optional[List[str]] = None,
|
|
345
|
+
*,
|
|
346
|
+
prompt: str,
|
|
347
|
+
schema: Optional[Any] = None,
|
|
348
|
+
integration: Optional[str] = None,
|
|
349
|
+
max_credits: Optional[int] = None,
|
|
350
|
+
strict_constrain_to_urls: Optional[bool] = None,
|
|
351
|
+
):
|
|
352
|
+
return await async_agent.start_agent(
|
|
353
|
+
self.async_http_client,
|
|
354
|
+
urls,
|
|
355
|
+
prompt=prompt,
|
|
356
|
+
schema=schema,
|
|
357
|
+
integration=integration,
|
|
358
|
+
max_credits=max_credits,
|
|
359
|
+
strict_constrain_to_urls=strict_constrain_to_urls,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
async def cancel_agent(self, job_id: str) -> bool:
|
|
363
|
+
"""Cancel a running agent job.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
job_id: Agent job ID
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
True if the agent was cancelled
|
|
370
|
+
"""
|
|
371
|
+
return await async_agent.cancel_agent(self.async_http_client, job_id)
|
|
372
|
+
|
|
373
|
+
# Usage endpoints
|
|
374
|
+
async def get_concurrency(self):
|
|
375
|
+
from .methods.aio import usage as async_usage # type: ignore[attr-defined]
|
|
376
|
+
return await async_usage.get_concurrency(self.async_http_client)
|
|
377
|
+
|
|
378
|
+
async def get_credit_usage(self):
|
|
379
|
+
from .methods.aio import usage as async_usage # type: ignore[attr-defined]
|
|
380
|
+
return await async_usage.get_credit_usage(self.async_http_client)
|
|
381
|
+
|
|
382
|
+
async def get_token_usage(self):
|
|
383
|
+
from .methods.aio import usage as async_usage # type: ignore[attr-defined]
|
|
384
|
+
return await async_usage.get_token_usage(self.async_http_client)
|
|
385
|
+
|
|
386
|
+
async def get_credit_usage_historical(self, by_api_key: bool = False):
|
|
387
|
+
from .methods.aio import usage as async_usage # type: ignore[attr-defined]
|
|
388
|
+
return await async_usage.get_credit_usage_historical(self.async_http_client, by_api_key)
|
|
389
|
+
|
|
390
|
+
async def get_token_usage_historical(self, by_api_key: bool = False):
|
|
391
|
+
from .methods.aio import usage as async_usage # type: ignore[attr-defined]
|
|
392
|
+
return await async_usage.get_token_usage_historical(self.async_http_client, by_api_key)
|
|
393
|
+
|
|
394
|
+
async def get_queue_status(self):
|
|
395
|
+
from .methods.aio import usage as async_usage # type: ignore[attr-defined]
|
|
396
|
+
return await async_usage.get_queue_status(self.async_http_client)
|
|
397
|
+
|
|
398
|
+
# Watcher (sync object usable from async contexts)
|
|
399
|
+
def watcher(
|
|
400
|
+
self,
|
|
401
|
+
job_id: str,
|
|
402
|
+
*,
|
|
403
|
+
kind: Literal["crawl", "batch"] = "crawl",
|
|
404
|
+
poll_interval: int = 2,
|
|
405
|
+
timeout: Optional[int] = None,
|
|
406
|
+
) -> AsyncWatcher:
|
|
407
|
+
return AsyncWatcher(self, job_id, kind=kind, poll_interval=poll_interval, timeout=timeout)
|
|
408
|
+
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Optional
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from ..types import AgentResponse
|
|
5
|
+
from ..utils.http_client import HttpClient
|
|
6
|
+
from ..utils.error_handler import handle_response_error
|
|
7
|
+
from ..utils.validation import _normalize_schema
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _prepare_agent_request(
|
|
11
|
+
urls: Optional[List[str]],
|
|
12
|
+
*,
|
|
13
|
+
prompt: str,
|
|
14
|
+
schema: Optional[Any] = None,
|
|
15
|
+
integration: Optional[str] = None,
|
|
16
|
+
max_credits: Optional[int] = None,
|
|
17
|
+
strict_constrain_to_urls: Optional[bool] = None,
|
|
18
|
+
) -> Dict[str, Any]:
|
|
19
|
+
body: Dict[str, Any] = {}
|
|
20
|
+
if urls is not None:
|
|
21
|
+
body["urls"] = urls
|
|
22
|
+
body["prompt"] = prompt
|
|
23
|
+
if schema is not None:
|
|
24
|
+
normalized_schema = _normalize_schema(schema)
|
|
25
|
+
if normalized_schema is not None:
|
|
26
|
+
body["schema"] = normalized_schema
|
|
27
|
+
else:
|
|
28
|
+
raise ValueError(
|
|
29
|
+
f"Invalid schema type: {type(schema).__name__}. "
|
|
30
|
+
"Schema must be a dict, Pydantic BaseModel class, or Pydantic model instance."
|
|
31
|
+
)
|
|
32
|
+
if integration is not None and str(integration).strip():
|
|
33
|
+
body["integration"] = str(integration).strip()
|
|
34
|
+
if max_credits is not None and max_credits > 0:
|
|
35
|
+
body["maxCredits"] = max_credits
|
|
36
|
+
if strict_constrain_to_urls is not None and strict_constrain_to_urls:
|
|
37
|
+
body["strictConstrainToURLs"] = strict_constrain_to_urls
|
|
38
|
+
return body
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _normalize_agent_response_payload(payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
42
|
+
out = dict(payload)
|
|
43
|
+
if "expiresAt" in out and "expires_at" not in out:
|
|
44
|
+
out["expires_at"] = out["expiresAt"]
|
|
45
|
+
if "creditsUsed" in out and "credits_used" not in out:
|
|
46
|
+
out["credits_used"] = out["creditsUsed"]
|
|
47
|
+
return out
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def start_agent(
|
|
51
|
+
client: HttpClient,
|
|
52
|
+
urls: Optional[List[str]],
|
|
53
|
+
*,
|
|
54
|
+
prompt: str,
|
|
55
|
+
schema: Optional[Any] = None,
|
|
56
|
+
integration: Optional[str] = None,
|
|
57
|
+
max_credits: Optional[int] = None,
|
|
58
|
+
strict_constrain_to_urls: Optional[bool] = None,
|
|
59
|
+
) -> AgentResponse:
|
|
60
|
+
body = _prepare_agent_request(
|
|
61
|
+
urls,
|
|
62
|
+
prompt=prompt,
|
|
63
|
+
schema=schema,
|
|
64
|
+
integration=integration,
|
|
65
|
+
max_credits=max_credits,
|
|
66
|
+
strict_constrain_to_urls=strict_constrain_to_urls,
|
|
67
|
+
)
|
|
68
|
+
resp = client.post("/v2/agent", body)
|
|
69
|
+
if not resp.ok:
|
|
70
|
+
handle_response_error(resp, "agent")
|
|
71
|
+
payload = _normalize_agent_response_payload(resp.json())
|
|
72
|
+
return AgentResponse(**payload)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def get_agent_status(client: HttpClient, job_id: str) -> AgentResponse:
|
|
76
|
+
resp = client.get(f"/v2/agent/{job_id}")
|
|
77
|
+
if not resp.ok:
|
|
78
|
+
handle_response_error(resp, "agent-status")
|
|
79
|
+
payload = _normalize_agent_response_payload(resp.json())
|
|
80
|
+
return AgentResponse(**payload)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def wait_agent(
|
|
84
|
+
client: HttpClient,
|
|
85
|
+
job_id: str,
|
|
86
|
+
*,
|
|
87
|
+
poll_interval: int = 2,
|
|
88
|
+
timeout: Optional[int] = None,
|
|
89
|
+
) -> AgentResponse:
|
|
90
|
+
start_ts = time.time()
|
|
91
|
+
while True:
|
|
92
|
+
status = get_agent_status(client, job_id)
|
|
93
|
+
if status.status in ("completed", "failed", "cancelled"):
|
|
94
|
+
return status
|
|
95
|
+
if timeout is not None and (time.time() - start_ts) > timeout:
|
|
96
|
+
return status
|
|
97
|
+
time.sleep(max(1, poll_interval))
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def agent(
|
|
101
|
+
client: HttpClient,
|
|
102
|
+
urls: Optional[List[str]],
|
|
103
|
+
*,
|
|
104
|
+
prompt: str,
|
|
105
|
+
schema: Optional[Any] = None,
|
|
106
|
+
integration: Optional[str] = None,
|
|
107
|
+
poll_interval: int = 2,
|
|
108
|
+
timeout: Optional[int] = None,
|
|
109
|
+
max_credits: Optional[int] = None,
|
|
110
|
+
strict_constrain_to_urls: Optional[bool] = None,
|
|
111
|
+
) -> AgentResponse:
|
|
112
|
+
started = start_agent(
|
|
113
|
+
client,
|
|
114
|
+
urls,
|
|
115
|
+
prompt=prompt,
|
|
116
|
+
schema=schema,
|
|
117
|
+
integration=integration,
|
|
118
|
+
max_credits=max_credits,
|
|
119
|
+
strict_constrain_to_urls=strict_constrain_to_urls,
|
|
120
|
+
)
|
|
121
|
+
job_id = getattr(started, "id", None)
|
|
122
|
+
if not job_id:
|
|
123
|
+
return started
|
|
124
|
+
return wait_agent(client, job_id, poll_interval=poll_interval, timeout=timeout)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def cancel_agent(client: HttpClient, job_id: str) -> bool:
|
|
128
|
+
"""
|
|
129
|
+
Cancel a running agent job.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
client: HTTP client instance
|
|
133
|
+
job_id: ID of the agent job to cancel
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
bool: True if the agent was cancelled, False otherwise
|
|
137
|
+
|
|
138
|
+
Raises:
|
|
139
|
+
Exception: If the cancellation fails
|
|
140
|
+
"""
|
|
141
|
+
resp = client.delete(f"/v2/agent/{job_id}")
|
|
142
|
+
if not resp.ok:
|
|
143
|
+
handle_response_error(resp, "cancel agent")
|
|
144
|
+
return resp.json().get("success", False)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Async (aio) method modules for v2
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Optional
|
|
2
|
+
import asyncio
|
|
3
|
+
|
|
4
|
+
from ...types import AgentResponse
|
|
5
|
+
from ...utils.http_client_async import AsyncHttpClient
|
|
6
|
+
from ...utils.validation import _normalize_schema
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _prepare_agent_request(
|
|
10
|
+
urls: Optional[List[str]],
|
|
11
|
+
*,
|
|
12
|
+
prompt: str,
|
|
13
|
+
schema: Optional[Any] = None,
|
|
14
|
+
integration: Optional[str] = None,
|
|
15
|
+
max_credits: Optional[int] = None,
|
|
16
|
+
strict_constrain_to_urls: Optional[bool] = None,
|
|
17
|
+
) -> Dict[str, Any]:
|
|
18
|
+
body: Dict[str, Any] = {}
|
|
19
|
+
if urls is not None:
|
|
20
|
+
body["urls"] = urls
|
|
21
|
+
body["prompt"] = prompt
|
|
22
|
+
if schema is not None:
|
|
23
|
+
normalized_schema = _normalize_schema(schema)
|
|
24
|
+
if normalized_schema is not None:
|
|
25
|
+
body["schema"] = normalized_schema
|
|
26
|
+
else:
|
|
27
|
+
raise ValueError(
|
|
28
|
+
f"Invalid schema type: {type(schema).__name__}. "
|
|
29
|
+
"Schema must be a dict, Pydantic BaseModel class, or Pydantic model instance."
|
|
30
|
+
)
|
|
31
|
+
if integration is not None and str(integration).strip():
|
|
32
|
+
body["integration"] = str(integration).strip()
|
|
33
|
+
if max_credits is not None and max_credits > 0:
|
|
34
|
+
body["maxCredits"] = max_credits
|
|
35
|
+
if strict_constrain_to_urls is not None and strict_constrain_to_urls:
|
|
36
|
+
body["strictConstrainToURLs"] = strict_constrain_to_urls
|
|
37
|
+
return body
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _normalize_agent_response_payload(payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
41
|
+
out = dict(payload)
|
|
42
|
+
if "expiresAt" in out and "expires_at" not in out:
|
|
43
|
+
out["expires_at"] = out["expiresAt"]
|
|
44
|
+
if "creditsUsed" in out and "credits_used" not in out:
|
|
45
|
+
out["credits_used"] = out["creditsUsed"]
|
|
46
|
+
return out
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
async def start_agent(
|
|
50
|
+
client: AsyncHttpClient,
|
|
51
|
+
urls: Optional[List[str]],
|
|
52
|
+
*,
|
|
53
|
+
prompt: str,
|
|
54
|
+
schema: Optional[Any] = None,
|
|
55
|
+
integration: Optional[str] = None,
|
|
56
|
+
max_credits: Optional[int] = None,
|
|
57
|
+
strict_constrain_to_urls: Optional[bool] = None,
|
|
58
|
+
) -> AgentResponse:
|
|
59
|
+
body = _prepare_agent_request(
|
|
60
|
+
urls,
|
|
61
|
+
prompt=prompt,
|
|
62
|
+
schema=schema,
|
|
63
|
+
integration=integration,
|
|
64
|
+
max_credits=max_credits,
|
|
65
|
+
strict_constrain_to_urls=strict_constrain_to_urls,
|
|
66
|
+
)
|
|
67
|
+
resp = await client.post("/v2/agent", body)
|
|
68
|
+
payload = _normalize_agent_response_payload(resp.json())
|
|
69
|
+
return AgentResponse(**payload)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
async def get_agent_status(client: AsyncHttpClient, job_id: str) -> AgentResponse:
|
|
73
|
+
resp = await client.get(f"/v2/agent/{job_id}")
|
|
74
|
+
payload = _normalize_agent_response_payload(resp.json())
|
|
75
|
+
return AgentResponse(**payload)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
async def wait_agent(
|
|
79
|
+
client: AsyncHttpClient,
|
|
80
|
+
job_id: str,
|
|
81
|
+
*,
|
|
82
|
+
poll_interval: int = 2,
|
|
83
|
+
timeout: Optional[int] = None,
|
|
84
|
+
) -> AgentResponse:
|
|
85
|
+
start_ts = asyncio.get_event_loop().time()
|
|
86
|
+
while True:
|
|
87
|
+
status = await get_agent_status(client, job_id)
|
|
88
|
+
if status.status in ("completed", "failed", "cancelled"):
|
|
89
|
+
return status
|
|
90
|
+
if timeout is not None and (asyncio.get_event_loop().time() - start_ts) > timeout:
|
|
91
|
+
return status
|
|
92
|
+
await asyncio.sleep(max(1, poll_interval))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
async def agent(
|
|
96
|
+
client: AsyncHttpClient,
|
|
97
|
+
urls: Optional[List[str]],
|
|
98
|
+
*,
|
|
99
|
+
prompt: str,
|
|
100
|
+
schema: Optional[Any] = None,
|
|
101
|
+
integration: Optional[str] = None,
|
|
102
|
+
poll_interval: int = 2,
|
|
103
|
+
timeout: Optional[int] = None,
|
|
104
|
+
max_credits: Optional[int] = None,
|
|
105
|
+
strict_constrain_to_urls: Optional[bool] = None,
|
|
106
|
+
) -> AgentResponse:
|
|
107
|
+
started = await start_agent(
|
|
108
|
+
client,
|
|
109
|
+
urls,
|
|
110
|
+
prompt=prompt,
|
|
111
|
+
schema=schema,
|
|
112
|
+
integration=integration,
|
|
113
|
+
max_credits=max_credits,
|
|
114
|
+
strict_constrain_to_urls=strict_constrain_to_urls,
|
|
115
|
+
)
|
|
116
|
+
job_id = getattr(started, "id", None)
|
|
117
|
+
if not job_id:
|
|
118
|
+
return started
|
|
119
|
+
return await wait_agent(client, job_id, poll_interval=poll_interval, timeout=timeout)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
async def cancel_agent(client: AsyncHttpClient, job_id: str) -> bool:
|
|
123
|
+
"""
|
|
124
|
+
Cancel a running agent job.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
client: Async HTTP client instance
|
|
128
|
+
job_id: ID of the agent job to cancel
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
bool: True if the agent was cancelled, False otherwise
|
|
132
|
+
|
|
133
|
+
Raises:
|
|
134
|
+
Exception: If the cancellation fails
|
|
135
|
+
"""
|
|
136
|
+
resp = await client.delete(f"/v2/agent/{job_id}")
|
|
137
|
+
return resp.json().get("success", False)
|