cartography-client 0.8.1__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography-client might be problematic. Click here for more details.
- cartography/__init__.py +3 -1
- cartography/_base_client.py +12 -12
- cartography/_client.py +8 -8
- cartography/_compat.py +48 -48
- cartography/_models.py +50 -44
- cartography/_qs.py +7 -7
- cartography/_types.py +53 -12
- cartography/_utils/__init__.py +9 -2
- cartography/_utils/_compat.py +45 -0
- cartography/_utils/_datetime_parse.py +136 -0
- cartography/_utils/_transform.py +13 -3
- cartography/_utils/_typing.py +6 -1
- cartography/_utils/_utils.py +4 -5
- cartography/_version.py +1 -1
- cartography/resources/api_info.py +3 -3
- cartography/resources/crawl.py +25 -21
- cartography/resources/download.py +26 -26
- cartography/resources/health.py +3 -3
- cartography/resources/scrape.py +14 -14
- cartography/resources/workflows/request/crawl.py +46 -26
- cartography/resources/workflows/request/request.py +5 -7
- cartography/resources/workflows/workflows.py +5 -5
- cartography/types/crawl_create_graph_params.py +3 -1
- cartography/types/download_create_bulk_params.py +2 -2
- cartography/types/scrape_engine_param.py +3 -1
- cartography/types/scrape_scrape_bulk_params.py +3 -2
- cartography/types/workflows/request/crawl_create_params.py +12 -2
- cartography/types/workflows/request/crawl_request_param.py +12 -2
- cartography/types/workflows/request_create_download_params.py +3 -2
- {cartography_client-0.8.1.dist-info → cartography_client-0.9.1.dist-info}/METADATA +1 -1
- {cartography_client-0.8.1.dist-info → cartography_client-0.9.1.dist-info}/RECORD +33 -31
- {cartography_client-0.8.1.dist-info → cartography_client-0.9.1.dist-info}/WHEEL +0 -0
- {cartography_client-0.8.1.dist-info → cartography_client-0.9.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import Optional
|
|
6
6
|
|
|
7
7
|
import httpx
|
|
8
8
|
|
|
9
9
|
from ..types import WaitUntil, DownloaderType, download_create_bulk_params, download_create_single_params
|
|
10
|
-
from .._types import
|
|
10
|
+
from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
|
|
11
11
|
from .._utils import maybe_transform, async_maybe_transform
|
|
12
12
|
from .._compat import cached_property
|
|
13
13
|
from .._resource import SyncAPIResource, AsyncAPIResource
|
|
@@ -51,18 +51,18 @@ class DownloadResource(SyncAPIResource):
|
|
|
51
51
|
*,
|
|
52
52
|
crawl_id: str,
|
|
53
53
|
s3_bucket: str,
|
|
54
|
-
urls:
|
|
55
|
-
batch_size: int |
|
|
56
|
-
debug: bool |
|
|
57
|
-
downloader_type: DownloaderType |
|
|
58
|
-
max_workers: int |
|
|
59
|
-
wait_until: WaitUntil |
|
|
54
|
+
urls: SequenceNotStr[str],
|
|
55
|
+
batch_size: int | Omit = omit,
|
|
56
|
+
debug: bool | Omit = omit,
|
|
57
|
+
downloader_type: DownloaderType | Omit = omit,
|
|
58
|
+
max_workers: int | Omit = omit,
|
|
59
|
+
wait_until: WaitUntil | Omit = omit,
|
|
60
60
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
61
61
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
62
62
|
extra_headers: Headers | None = None,
|
|
63
63
|
extra_query: Query | None = None,
|
|
64
64
|
extra_body: Body | None = None,
|
|
65
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
65
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
66
66
|
) -> DownloadCreateBulkResponse:
|
|
67
67
|
"""
|
|
68
68
|
Bulk download multiple files with checkpointing to S3
|
|
@@ -120,16 +120,16 @@ class DownloadResource(SyncAPIResource):
|
|
|
120
120
|
*,
|
|
121
121
|
s3_bucket: str,
|
|
122
122
|
url: str,
|
|
123
|
-
downloader_type: DownloaderType |
|
|
124
|
-
s3_key: Optional[str] |
|
|
125
|
-
timeout_ms: int |
|
|
126
|
-
wait_until: WaitUntil |
|
|
123
|
+
downloader_type: DownloaderType | Omit = omit,
|
|
124
|
+
s3_key: Optional[str] | Omit = omit,
|
|
125
|
+
timeout_ms: int | Omit = omit,
|
|
126
|
+
wait_until: WaitUntil | Omit = omit,
|
|
127
127
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
128
128
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
129
129
|
extra_headers: Headers | None = None,
|
|
130
130
|
extra_query: Query | None = None,
|
|
131
131
|
extra_body: Body | None = None,
|
|
132
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
132
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
133
133
|
) -> DownloadCreateSingleResponse:
|
|
134
134
|
"""
|
|
135
135
|
Download a single file to S3
|
|
@@ -202,18 +202,18 @@ class AsyncDownloadResource(AsyncAPIResource):
|
|
|
202
202
|
*,
|
|
203
203
|
crawl_id: str,
|
|
204
204
|
s3_bucket: str,
|
|
205
|
-
urls:
|
|
206
|
-
batch_size: int |
|
|
207
|
-
debug: bool |
|
|
208
|
-
downloader_type: DownloaderType |
|
|
209
|
-
max_workers: int |
|
|
210
|
-
wait_until: WaitUntil |
|
|
205
|
+
urls: SequenceNotStr[str],
|
|
206
|
+
batch_size: int | Omit = omit,
|
|
207
|
+
debug: bool | Omit = omit,
|
|
208
|
+
downloader_type: DownloaderType | Omit = omit,
|
|
209
|
+
max_workers: int | Omit = omit,
|
|
210
|
+
wait_until: WaitUntil | Omit = omit,
|
|
211
211
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
212
212
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
213
213
|
extra_headers: Headers | None = None,
|
|
214
214
|
extra_query: Query | None = None,
|
|
215
215
|
extra_body: Body | None = None,
|
|
216
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
216
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
217
217
|
) -> DownloadCreateBulkResponse:
|
|
218
218
|
"""
|
|
219
219
|
Bulk download multiple files with checkpointing to S3
|
|
@@ -271,16 +271,16 @@ class AsyncDownloadResource(AsyncAPIResource):
|
|
|
271
271
|
*,
|
|
272
272
|
s3_bucket: str,
|
|
273
273
|
url: str,
|
|
274
|
-
downloader_type: DownloaderType |
|
|
275
|
-
s3_key: Optional[str] |
|
|
276
|
-
timeout_ms: int |
|
|
277
|
-
wait_until: WaitUntil |
|
|
274
|
+
downloader_type: DownloaderType | Omit = omit,
|
|
275
|
+
s3_key: Optional[str] | Omit = omit,
|
|
276
|
+
timeout_ms: int | Omit = omit,
|
|
277
|
+
wait_until: WaitUntil | Omit = omit,
|
|
278
278
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
279
279
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
280
280
|
extra_headers: Headers | None = None,
|
|
281
281
|
extra_query: Query | None = None,
|
|
282
282
|
extra_body: Body | None = None,
|
|
283
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
283
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
284
284
|
) -> DownloadCreateSingleResponse:
|
|
285
285
|
"""
|
|
286
286
|
Download a single file to S3
|
cartography/resources/health.py
CHANGED
|
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import httpx
|
|
6
6
|
|
|
7
|
-
from .._types import
|
|
7
|
+
from .._types import Body, Query, Headers, NotGiven, not_given
|
|
8
8
|
from .._compat import cached_property
|
|
9
9
|
from .._resource import SyncAPIResource, AsyncAPIResource
|
|
10
10
|
from .._response import (
|
|
@@ -47,7 +47,7 @@ class HealthResource(SyncAPIResource):
|
|
|
47
47
|
extra_headers: Headers | None = None,
|
|
48
48
|
extra_query: Query | None = None,
|
|
49
49
|
extra_body: Body | None = None,
|
|
50
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
50
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
51
51
|
) -> HealthCheckResponse:
|
|
52
52
|
"""
|
|
53
53
|
Health check endpoint
|
|
@@ -91,7 +91,7 @@ class AsyncHealthResource(AsyncAPIResource):
|
|
|
91
91
|
extra_headers: Headers | None = None,
|
|
92
92
|
extra_query: Query | None = None,
|
|
93
93
|
extra_body: Body | None = None,
|
|
94
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
94
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
95
95
|
) -> HealthCheckResponse:
|
|
96
96
|
"""
|
|
97
97
|
Health check endpoint
|
cartography/resources/scrape.py
CHANGED
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import Iterable
|
|
6
6
|
|
|
7
7
|
import httpx
|
|
8
8
|
|
|
9
9
|
from ..types import scrape_scrape_bulk_params, scrape_scrape_single_params
|
|
10
|
-
from .._types import
|
|
10
|
+
from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
|
|
11
11
|
from .._utils import maybe_transform, async_maybe_transform
|
|
12
12
|
from .._compat import cached_property
|
|
13
13
|
from .._resource import SyncAPIResource, AsyncAPIResource
|
|
@@ -51,16 +51,16 @@ class ScrapeResource(SyncAPIResource):
|
|
|
51
51
|
crawl_id: str,
|
|
52
52
|
engines: Iterable[ScrapeEngineParam],
|
|
53
53
|
s3_bucket: str,
|
|
54
|
-
urls:
|
|
55
|
-
batch_size: int |
|
|
56
|
-
debug: bool |
|
|
57
|
-
max_workers: int |
|
|
54
|
+
urls: SequenceNotStr[str],
|
|
55
|
+
batch_size: int | Omit = omit,
|
|
56
|
+
debug: bool | Omit = omit,
|
|
57
|
+
max_workers: int | Omit = omit,
|
|
58
58
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
59
59
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
60
60
|
extra_headers: Headers | None = None,
|
|
61
61
|
extra_query: Query | None = None,
|
|
62
62
|
extra_body: Body | None = None,
|
|
63
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
63
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
64
64
|
) -> ScrapeScrapeBulkResponse:
|
|
65
65
|
"""
|
|
66
66
|
Bulk scrape multiple URLs with checkpointing to S3
|
|
@@ -120,7 +120,7 @@ class ScrapeResource(SyncAPIResource):
|
|
|
120
120
|
extra_headers: Headers | None = None,
|
|
121
121
|
extra_query: Query | None = None,
|
|
122
122
|
extra_body: Body | None = None,
|
|
123
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
123
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
124
124
|
) -> ScrapeScrapeSingleResponse:
|
|
125
125
|
"""
|
|
126
126
|
Scrape a single URL using the specified engines
|
|
@@ -180,16 +180,16 @@ class AsyncScrapeResource(AsyncAPIResource):
|
|
|
180
180
|
crawl_id: str,
|
|
181
181
|
engines: Iterable[ScrapeEngineParam],
|
|
182
182
|
s3_bucket: str,
|
|
183
|
-
urls:
|
|
184
|
-
batch_size: int |
|
|
185
|
-
debug: bool |
|
|
186
|
-
max_workers: int |
|
|
183
|
+
urls: SequenceNotStr[str],
|
|
184
|
+
batch_size: int | Omit = omit,
|
|
185
|
+
debug: bool | Omit = omit,
|
|
186
|
+
max_workers: int | Omit = omit,
|
|
187
187
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
188
188
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
189
189
|
extra_headers: Headers | None = None,
|
|
190
190
|
extra_query: Query | None = None,
|
|
191
191
|
extra_body: Body | None = None,
|
|
192
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
192
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
193
193
|
) -> ScrapeScrapeBulkResponse:
|
|
194
194
|
"""
|
|
195
195
|
Bulk scrape multiple URLs with checkpointing to S3
|
|
@@ -249,7 +249,7 @@ class AsyncScrapeResource(AsyncAPIResource):
|
|
|
249
249
|
extra_headers: Headers | None = None,
|
|
250
250
|
extra_query: Query | None = None,
|
|
251
251
|
extra_body: Body | None = None,
|
|
252
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
252
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
253
253
|
) -> ScrapeScrapeSingleResponse:
|
|
254
254
|
"""
|
|
255
255
|
Scrape a single URL using the specified engines
|
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import List, Iterable
|
|
5
|
+
from typing import List, Iterable, Optional
|
|
6
6
|
from typing_extensions import Literal
|
|
7
7
|
|
|
8
8
|
import httpx
|
|
9
9
|
|
|
10
|
-
from ...._types import
|
|
10
|
+
from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
|
|
11
11
|
from ...._utils import maybe_transform, async_maybe_transform
|
|
12
12
|
from ...._compat import cached_property
|
|
13
13
|
from ...._resource import SyncAPIResource, AsyncAPIResource
|
|
@@ -51,23 +51,29 @@ class CrawlResource(SyncAPIResource):
|
|
|
51
51
|
*,
|
|
52
52
|
bucket_name: str,
|
|
53
53
|
crawl_id: str,
|
|
54
|
-
engines: List[
|
|
54
|
+
engines: List[
|
|
55
|
+
Literal["FLEET", "ZENROWS", "SCRAPINGBEE", "FLEET_ASYNC", "FLEET_WORKFLOW", "ASYNC_FLEET_STICKY"]
|
|
56
|
+
],
|
|
55
57
|
url: str,
|
|
56
|
-
absolute_only: bool |
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
58
|
+
absolute_only: bool | Omit = omit,
|
|
59
|
+
agentic: bool | Omit = omit,
|
|
60
|
+
batch_size: int | Omit = omit,
|
|
61
|
+
camo: bool | Omit = omit,
|
|
62
|
+
depth: int | Omit = omit,
|
|
63
|
+
keep_external: bool | Omit = omit,
|
|
64
|
+
max_urls: int | Omit = omit,
|
|
65
|
+
max_workers: int | Omit = omit,
|
|
66
|
+
proxy_url: Optional[str] | Omit = omit,
|
|
67
|
+
session_id: Optional[str] | Omit = omit,
|
|
68
|
+
stealth: bool | Omit = omit,
|
|
69
|
+
teardown: bool | Omit = omit,
|
|
70
|
+
visit_external: bool | Omit = omit,
|
|
65
71
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
66
72
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
67
73
|
extra_headers: Headers | None = None,
|
|
68
74
|
extra_query: Query | None = None,
|
|
69
75
|
extra_body: Body | None = None,
|
|
70
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
76
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
71
77
|
) -> WorkflowResult:
|
|
72
78
|
"""
|
|
73
79
|
Make a request to temporal worker :param request: crawl request data :param
|
|
@@ -91,13 +97,17 @@ class CrawlResource(SyncAPIResource):
|
|
|
91
97
|
"engines": engines,
|
|
92
98
|
"url": url,
|
|
93
99
|
"absolute_only": absolute_only,
|
|
100
|
+
"agentic": agentic,
|
|
94
101
|
"batch_size": batch_size,
|
|
95
102
|
"camo": camo,
|
|
96
103
|
"depth": depth,
|
|
97
104
|
"keep_external": keep_external,
|
|
98
105
|
"max_urls": max_urls,
|
|
99
106
|
"max_workers": max_workers,
|
|
107
|
+
"proxy_url": proxy_url,
|
|
108
|
+
"session_id": session_id,
|
|
100
109
|
"stealth": stealth,
|
|
110
|
+
"teardown": teardown,
|
|
101
111
|
"visit_external": visit_external,
|
|
102
112
|
},
|
|
103
113
|
crawl_create_params.CrawlCreateParams,
|
|
@@ -117,7 +127,7 @@ class CrawlResource(SyncAPIResource):
|
|
|
117
127
|
extra_headers: Headers | None = None,
|
|
118
128
|
extra_query: Query | None = None,
|
|
119
129
|
extra_body: Body | None = None,
|
|
120
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
130
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
121
131
|
) -> CrawlCreateBulkResponse:
|
|
122
132
|
"""
|
|
123
133
|
Make up to 50 requests to temporal crawl worker
|
|
@@ -166,23 +176,29 @@ class AsyncCrawlResource(AsyncAPIResource):
|
|
|
166
176
|
*,
|
|
167
177
|
bucket_name: str,
|
|
168
178
|
crawl_id: str,
|
|
169
|
-
engines: List[
|
|
179
|
+
engines: List[
|
|
180
|
+
Literal["FLEET", "ZENROWS", "SCRAPINGBEE", "FLEET_ASYNC", "FLEET_WORKFLOW", "ASYNC_FLEET_STICKY"]
|
|
181
|
+
],
|
|
170
182
|
url: str,
|
|
171
|
-
absolute_only: bool |
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
183
|
+
absolute_only: bool | Omit = omit,
|
|
184
|
+
agentic: bool | Omit = omit,
|
|
185
|
+
batch_size: int | Omit = omit,
|
|
186
|
+
camo: bool | Omit = omit,
|
|
187
|
+
depth: int | Omit = omit,
|
|
188
|
+
keep_external: bool | Omit = omit,
|
|
189
|
+
max_urls: int | Omit = omit,
|
|
190
|
+
max_workers: int | Omit = omit,
|
|
191
|
+
proxy_url: Optional[str] | Omit = omit,
|
|
192
|
+
session_id: Optional[str] | Omit = omit,
|
|
193
|
+
stealth: bool | Omit = omit,
|
|
194
|
+
teardown: bool | Omit = omit,
|
|
195
|
+
visit_external: bool | Omit = omit,
|
|
180
196
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
181
197
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
182
198
|
extra_headers: Headers | None = None,
|
|
183
199
|
extra_query: Query | None = None,
|
|
184
200
|
extra_body: Body | None = None,
|
|
185
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
201
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
186
202
|
) -> WorkflowResult:
|
|
187
203
|
"""
|
|
188
204
|
Make a request to temporal worker :param request: crawl request data :param
|
|
@@ -206,13 +222,17 @@ class AsyncCrawlResource(AsyncAPIResource):
|
|
|
206
222
|
"engines": engines,
|
|
207
223
|
"url": url,
|
|
208
224
|
"absolute_only": absolute_only,
|
|
225
|
+
"agentic": agentic,
|
|
209
226
|
"batch_size": batch_size,
|
|
210
227
|
"camo": camo,
|
|
211
228
|
"depth": depth,
|
|
212
229
|
"keep_external": keep_external,
|
|
213
230
|
"max_urls": max_urls,
|
|
214
231
|
"max_workers": max_workers,
|
|
232
|
+
"proxy_url": proxy_url,
|
|
233
|
+
"session_id": session_id,
|
|
215
234
|
"stealth": stealth,
|
|
235
|
+
"teardown": teardown,
|
|
216
236
|
"visit_external": visit_external,
|
|
217
237
|
},
|
|
218
238
|
crawl_create_params.CrawlCreateParams,
|
|
@@ -232,7 +252,7 @@ class AsyncCrawlResource(AsyncAPIResource):
|
|
|
232
252
|
extra_headers: Headers | None = None,
|
|
233
253
|
extra_query: Query | None = None,
|
|
234
254
|
extra_body: Body | None = None,
|
|
235
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
255
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
236
256
|
) -> CrawlCreateBulkResponse:
|
|
237
257
|
"""
|
|
238
258
|
Make up to 50 requests to temporal crawl worker
|
|
@@ -2,8 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import List
|
|
6
|
-
|
|
7
5
|
import httpx
|
|
8
6
|
|
|
9
7
|
from .crawl import (
|
|
@@ -14,7 +12,7 @@ from .crawl import (
|
|
|
14
12
|
CrawlResourceWithStreamingResponse,
|
|
15
13
|
AsyncCrawlResourceWithStreamingResponse,
|
|
16
14
|
)
|
|
17
|
-
from ...._types import
|
|
15
|
+
from ...._types import Body, Query, Headers, NotGiven, SequenceNotStr, not_given
|
|
18
16
|
from ...._utils import maybe_transform, async_maybe_transform
|
|
19
17
|
from ...._compat import cached_property
|
|
20
18
|
from ...._resource import SyncAPIResource, AsyncAPIResource
|
|
@@ -61,13 +59,13 @@ class RequestResource(SyncAPIResource):
|
|
|
61
59
|
bucket_name: str,
|
|
62
60
|
crawl_id: str,
|
|
63
61
|
downloader_type: str,
|
|
64
|
-
urls:
|
|
62
|
+
urls: SequenceNotStr[str],
|
|
65
63
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
66
64
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
67
65
|
extra_headers: Headers | None = None,
|
|
68
66
|
extra_query: Query | None = None,
|
|
69
67
|
extra_body: Body | None = None,
|
|
70
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
68
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
71
69
|
) -> RequestCreateDownloadResponse:
|
|
72
70
|
"""
|
|
73
71
|
Make a request to temporal worker :param request: crawl request data :param
|
|
@@ -130,13 +128,13 @@ class AsyncRequestResource(AsyncAPIResource):
|
|
|
130
128
|
bucket_name: str,
|
|
131
129
|
crawl_id: str,
|
|
132
130
|
downloader_type: str,
|
|
133
|
-
urls:
|
|
131
|
+
urls: SequenceNotStr[str],
|
|
134
132
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
135
133
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
136
134
|
extra_headers: Headers | None = None,
|
|
137
135
|
extra_query: Query | None = None,
|
|
138
136
|
extra_body: Body | None = None,
|
|
139
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
137
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
140
138
|
) -> RequestCreateDownloadResponse:
|
|
141
139
|
"""
|
|
142
140
|
Make a request to temporal worker :param request: crawl request data :param
|
|
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import httpx
|
|
6
6
|
|
|
7
|
-
from ..._types import
|
|
7
|
+
from ..._types import Body, Query, Headers, NotGiven, not_given
|
|
8
8
|
from ..._compat import cached_property
|
|
9
9
|
from ..._resource import SyncAPIResource, AsyncAPIResource
|
|
10
10
|
from ..._response import (
|
|
@@ -61,7 +61,7 @@ class WorkflowsResource(SyncAPIResource):
|
|
|
61
61
|
extra_headers: Headers | None = None,
|
|
62
62
|
extra_query: Query | None = None,
|
|
63
63
|
extra_body: Body | None = None,
|
|
64
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
64
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
65
65
|
) -> WorkflowDescribeResponse:
|
|
66
66
|
"""
|
|
67
67
|
Get Workflow Description
|
|
@@ -94,7 +94,7 @@ class WorkflowsResource(SyncAPIResource):
|
|
|
94
94
|
extra_headers: Headers | None = None,
|
|
95
95
|
extra_query: Query | None = None,
|
|
96
96
|
extra_body: Body | None = None,
|
|
97
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
97
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
98
98
|
) -> WorkflowResultsResponse:
|
|
99
99
|
"""
|
|
100
100
|
Get Workflow Results
|
|
@@ -152,7 +152,7 @@ class AsyncWorkflowsResource(AsyncAPIResource):
|
|
|
152
152
|
extra_headers: Headers | None = None,
|
|
153
153
|
extra_query: Query | None = None,
|
|
154
154
|
extra_body: Body | None = None,
|
|
155
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
155
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
156
156
|
) -> WorkflowDescribeResponse:
|
|
157
157
|
"""
|
|
158
158
|
Get Workflow Description
|
|
@@ -185,7 +185,7 @@ class AsyncWorkflowsResource(AsyncAPIResource):
|
|
|
185
185
|
extra_headers: Headers | None = None,
|
|
186
186
|
extra_query: Query | None = None,
|
|
187
187
|
extra_body: Body | None = None,
|
|
188
|
-
timeout: float | httpx.Timeout | None | NotGiven =
|
|
188
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
189
189
|
) -> WorkflowResultsResponse:
|
|
190
190
|
"""
|
|
191
191
|
Get Workflow Results
|
|
@@ -12,7 +12,9 @@ class CrawlCreateGraphParams(TypedDict, total=False):
|
|
|
12
12
|
crawl_id: Required[str]
|
|
13
13
|
"""Unique identifier for this crawl"""
|
|
14
14
|
|
|
15
|
-
engines: Required[
|
|
15
|
+
engines: Required[
|
|
16
|
+
List[Literal["FLEET", "ZENROWS", "SCRAPINGBEE", "FLEET_ASYNC", "FLEET_WORKFLOW", "ASYNC_FLEET_STICKY"]]
|
|
17
|
+
]
|
|
16
18
|
"""List of engines to use"""
|
|
17
19
|
|
|
18
20
|
s3_bucket: Required[str]
|
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import List
|
|
6
5
|
from typing_extensions import Required, TypedDict
|
|
7
6
|
|
|
7
|
+
from .._types import SequenceNotStr
|
|
8
8
|
from .wait_until import WaitUntil
|
|
9
9
|
from .downloader_type import DownloaderType
|
|
10
10
|
|
|
@@ -18,7 +18,7 @@ class DownloadCreateBulkParams(TypedDict, total=False):
|
|
|
18
18
|
s3_bucket: Required[str]
|
|
19
19
|
"""S3 bucket for storage and checkpoints"""
|
|
20
20
|
|
|
21
|
-
urls: Required[
|
|
21
|
+
urls: Required[SequenceNotStr[str]]
|
|
22
22
|
"""List of URLs to download"""
|
|
23
23
|
|
|
24
24
|
batch_size: int
|
|
@@ -9,7 +9,9 @@ __all__ = ["ScrapeEngineParam"]
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class ScrapeEngineParam(TypedDict, total=False):
|
|
12
|
-
engine_type: Required[
|
|
12
|
+
engine_type: Required[
|
|
13
|
+
Literal["FLEET", "ZENROWS", "SCRAPINGBEE", "FLEET_ASYNC", "FLEET_WORKFLOW", "ASYNC_FLEET_STICKY"]
|
|
14
|
+
]
|
|
13
15
|
|
|
14
16
|
headers: Optional[Dict[str, str]]
|
|
15
17
|
"""Custom headers"""
|
|
@@ -2,9 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import Iterable
|
|
6
6
|
from typing_extensions import Required, TypedDict
|
|
7
7
|
|
|
8
|
+
from .._types import SequenceNotStr
|
|
8
9
|
from .scrape_engine_param import ScrapeEngineParam
|
|
9
10
|
|
|
10
11
|
__all__ = ["ScrapeScrapeBulkParams"]
|
|
@@ -20,7 +21,7 @@ class ScrapeScrapeBulkParams(TypedDict, total=False):
|
|
|
20
21
|
s3_bucket: Required[str]
|
|
21
22
|
"""S3 bucket for checkpointing"""
|
|
22
23
|
|
|
23
|
-
urls: Required[
|
|
24
|
+
urls: Required[SequenceNotStr[str]]
|
|
24
25
|
"""List of URLs to scrape"""
|
|
25
26
|
|
|
26
27
|
batch_size: int
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import List
|
|
5
|
+
from typing import List, Optional
|
|
6
6
|
from typing_extensions import Literal, Required, TypedDict
|
|
7
7
|
|
|
8
8
|
__all__ = ["CrawlCreateParams"]
|
|
@@ -13,12 +13,16 @@ class CrawlCreateParams(TypedDict, total=False):
|
|
|
13
13
|
|
|
14
14
|
crawl_id: Required[str]
|
|
15
15
|
|
|
16
|
-
engines: Required[
|
|
16
|
+
engines: Required[
|
|
17
|
+
List[Literal["FLEET", "ZENROWS", "SCRAPINGBEE", "FLEET_ASYNC", "FLEET_WORKFLOW", "ASYNC_FLEET_STICKY"]]
|
|
18
|
+
]
|
|
17
19
|
|
|
18
20
|
url: Required[str]
|
|
19
21
|
|
|
20
22
|
absolute_only: bool
|
|
21
23
|
|
|
24
|
+
agentic: bool
|
|
25
|
+
|
|
22
26
|
batch_size: int
|
|
23
27
|
|
|
24
28
|
camo: bool
|
|
@@ -31,6 +35,12 @@ class CrawlCreateParams(TypedDict, total=False):
|
|
|
31
35
|
|
|
32
36
|
max_workers: int
|
|
33
37
|
|
|
38
|
+
proxy_url: Optional[str]
|
|
39
|
+
|
|
40
|
+
session_id: Optional[str]
|
|
41
|
+
|
|
34
42
|
stealth: bool
|
|
35
43
|
|
|
44
|
+
teardown: bool
|
|
45
|
+
|
|
36
46
|
visit_external: bool
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import List
|
|
5
|
+
from typing import List, Optional
|
|
6
6
|
from typing_extensions import Literal, Required, TypedDict
|
|
7
7
|
|
|
8
8
|
__all__ = ["CrawlRequestParam"]
|
|
@@ -13,12 +13,16 @@ class CrawlRequestParam(TypedDict, total=False):
|
|
|
13
13
|
|
|
14
14
|
crawl_id: Required[str]
|
|
15
15
|
|
|
16
|
-
engines: Required[
|
|
16
|
+
engines: Required[
|
|
17
|
+
List[Literal["FLEET", "ZENROWS", "SCRAPINGBEE", "FLEET_ASYNC", "FLEET_WORKFLOW", "ASYNC_FLEET_STICKY"]]
|
|
18
|
+
]
|
|
17
19
|
|
|
18
20
|
url: Required[str]
|
|
19
21
|
|
|
20
22
|
absolute_only: bool
|
|
21
23
|
|
|
24
|
+
agentic: bool
|
|
25
|
+
|
|
22
26
|
batch_size: int
|
|
23
27
|
|
|
24
28
|
camo: bool
|
|
@@ -31,6 +35,12 @@ class CrawlRequestParam(TypedDict, total=False):
|
|
|
31
35
|
|
|
32
36
|
max_workers: int
|
|
33
37
|
|
|
38
|
+
proxy_url: Optional[str]
|
|
39
|
+
|
|
40
|
+
session_id: Optional[str]
|
|
41
|
+
|
|
34
42
|
stealth: bool
|
|
35
43
|
|
|
44
|
+
teardown: bool
|
|
45
|
+
|
|
36
46
|
visit_external: bool
|
|
@@ -2,9 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import List
|
|
6
5
|
from typing_extensions import Required, TypedDict
|
|
7
6
|
|
|
7
|
+
from ..._types import SequenceNotStr
|
|
8
|
+
|
|
8
9
|
__all__ = ["RequestCreateDownloadParams"]
|
|
9
10
|
|
|
10
11
|
|
|
@@ -15,4 +16,4 @@ class RequestCreateDownloadParams(TypedDict, total=False):
|
|
|
15
16
|
|
|
16
17
|
downloader_type: Required[str]
|
|
17
18
|
|
|
18
|
-
urls: Required[
|
|
19
|
+
urls: Required[SequenceNotStr[str]]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: cartography-client
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.1
|
|
4
4
|
Summary: The official Python library for the cartography API
|
|
5
5
|
Project-URL: Homepage, https://github.com/evrimai/cartography-client
|
|
6
6
|
Project-URL: Repository, https://github.com/evrimai/cartography-client
|