cartography-client 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography-client might be problematic. Click here for more details.
- cartography/__init__.py +100 -0
- cartography/_base_client.py +1995 -0
- cartography/_client.py +444 -0
- cartography/_compat.py +219 -0
- cartography/_constants.py +14 -0
- cartography/_exceptions.py +108 -0
- cartography/_files.py +123 -0
- cartography/_models.py +829 -0
- cartography/_qs.py +150 -0
- cartography/_resource.py +43 -0
- cartography/_response.py +832 -0
- cartography/_streaming.py +333 -0
- cartography/_types.py +219 -0
- cartography/_utils/__init__.py +57 -0
- cartography/_utils/_logs.py +25 -0
- cartography/_utils/_proxy.py +65 -0
- cartography/_utils/_reflection.py +42 -0
- cartography/_utils/_resources_proxy.py +24 -0
- cartography/_utils/_streams.py +12 -0
- cartography/_utils/_sync.py +86 -0
- cartography/_utils/_transform.py +447 -0
- cartography/_utils/_typing.py +151 -0
- cartography/_utils/_utils.py +422 -0
- cartography/_version.py +4 -0
- cartography/lib/.keep +4 -0
- cartography/py.typed +0 -0
- cartography/resources/__init__.py +89 -0
- cartography/resources/api_info.py +135 -0
- cartography/resources/crawl.py +279 -0
- cartography/resources/download.py +376 -0
- cartography/resources/health.py +143 -0
- cartography/resources/scrape.py +331 -0
- cartography/resources/workflows/__init__.py +33 -0
- cartography/resources/workflows/request/__init__.py +33 -0
- cartography/resources/workflows/request/crawl.py +295 -0
- cartography/resources/workflows/request/request.py +221 -0
- cartography/resources/workflows/workflows.py +274 -0
- cartography/types/__init__.py +23 -0
- cartography/types/api_info_retrieve_response.py +8 -0
- cartography/types/bulk_download_result.py +23 -0
- cartography/types/bulk_scrape_result.py +19 -0
- cartography/types/crawl_create_graph_params.py +46 -0
- cartography/types/crawl_create_graph_response.py +37 -0
- cartography/types/download_create_bulk_params.py +37 -0
- cartography/types/download_create_bulk_response.py +41 -0
- cartography/types/download_create_single_params.py +32 -0
- cartography/types/download_create_single_response.py +21 -0
- cartography/types/downloader_type.py +7 -0
- cartography/types/health_check_response.py +8 -0
- cartography/types/scrape_engine_param.py +28 -0
- cartography/types/scrape_scrape_bulk_params.py +33 -0
- cartography/types/scrape_scrape_bulk_response.py +41 -0
- cartography/types/scrape_scrape_single_params.py +17 -0
- cartography/types/scrape_scrape_single_response.py +23 -0
- cartography/types/wait_until.py +7 -0
- cartography/types/workflow_describe_response.py +8 -0
- cartography/types/workflow_results_response.py +8 -0
- cartography/types/workflows/__init__.py +6 -0
- cartography/types/workflows/request/__init__.py +9 -0
- cartography/types/workflows/request/crawl_create_bulk_params.py +14 -0
- cartography/types/workflows/request/crawl_create_bulk_response.py +22 -0
- cartography/types/workflows/request/crawl_create_params.py +32 -0
- cartography/types/workflows/request/crawl_request_param.py +32 -0
- cartography/types/workflows/request/workflow_result.py +11 -0
- cartography/types/workflows/request_create_download_params.py +18 -0
- cartography/types/workflows/request_create_download_response.py +8 -0
- cartography_client-0.0.1.dist-info/METADATA +399 -0
- cartography_client-0.0.1.dist-info/RECORD +70 -0
- cartography_client-0.0.1.dist-info/WHEEL +4 -0
- cartography_client-0.0.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import List
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
from .crawl import (
|
|
10
|
+
CrawlResource,
|
|
11
|
+
AsyncCrawlResource,
|
|
12
|
+
CrawlResourceWithRawResponse,
|
|
13
|
+
AsyncCrawlResourceWithRawResponse,
|
|
14
|
+
CrawlResourceWithStreamingResponse,
|
|
15
|
+
AsyncCrawlResourceWithStreamingResponse,
|
|
16
|
+
)
|
|
17
|
+
from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
|
|
18
|
+
from ...._utils import maybe_transform, async_maybe_transform
|
|
19
|
+
from ...._compat import cached_property
|
|
20
|
+
from ...._resource import SyncAPIResource, AsyncAPIResource
|
|
21
|
+
from ...._response import (
|
|
22
|
+
to_raw_response_wrapper,
|
|
23
|
+
to_streamed_response_wrapper,
|
|
24
|
+
async_to_raw_response_wrapper,
|
|
25
|
+
async_to_streamed_response_wrapper,
|
|
26
|
+
)
|
|
27
|
+
from ...._base_client import make_request_options
|
|
28
|
+
from ....types.workflows import request_create_download_params
|
|
29
|
+
from ....types.workflows.request_create_download_response import RequestCreateDownloadResponse
|
|
30
|
+
|
|
31
|
+
__all__ = ["RequestResource", "AsyncRequestResource"]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class RequestResource(SyncAPIResource):
|
|
35
|
+
@cached_property
|
|
36
|
+
def crawl(self) -> CrawlResource:
|
|
37
|
+
return CrawlResource(self._client)
|
|
38
|
+
|
|
39
|
+
@cached_property
|
|
40
|
+
def with_raw_response(self) -> RequestResourceWithRawResponse:
|
|
41
|
+
"""
|
|
42
|
+
This property can be used as a prefix for any HTTP method call to return
|
|
43
|
+
the raw response object instead of the parsed content.
|
|
44
|
+
|
|
45
|
+
For more information, see https://www.github.com/evrimai/cartography-client#accessing-raw-response-data-eg-headers
|
|
46
|
+
"""
|
|
47
|
+
return RequestResourceWithRawResponse(self)
|
|
48
|
+
|
|
49
|
+
@cached_property
|
|
50
|
+
def with_streaming_response(self) -> RequestResourceWithStreamingResponse:
|
|
51
|
+
"""
|
|
52
|
+
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
53
|
+
|
|
54
|
+
For more information, see https://www.github.com/evrimai/cartography-client#with_streaming_response
|
|
55
|
+
"""
|
|
56
|
+
return RequestResourceWithStreamingResponse(self)
|
|
57
|
+
|
|
58
|
+
def create_download(
|
|
59
|
+
self,
|
|
60
|
+
*,
|
|
61
|
+
bucket_name: str,
|
|
62
|
+
crawl_id: str,
|
|
63
|
+
downloader_type: str,
|
|
64
|
+
urls: List[str],
|
|
65
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
66
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
67
|
+
extra_headers: Headers | None = None,
|
|
68
|
+
extra_query: Query | None = None,
|
|
69
|
+
extra_body: Body | None = None,
|
|
70
|
+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
71
|
+
) -> RequestCreateDownloadResponse:
|
|
72
|
+
"""
|
|
73
|
+
Make a request to temporal worker :param request: crawl request data :param
|
|
74
|
+
token_data: api token :return: response
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
extra_headers: Send extra headers
|
|
78
|
+
|
|
79
|
+
extra_query: Add additional query parameters to the request
|
|
80
|
+
|
|
81
|
+
extra_body: Add additional JSON properties to the request
|
|
82
|
+
|
|
83
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
84
|
+
"""
|
|
85
|
+
return self._post(
|
|
86
|
+
"/workflows/request/download",
|
|
87
|
+
body=maybe_transform(
|
|
88
|
+
{
|
|
89
|
+
"bucket_name": bucket_name,
|
|
90
|
+
"crawl_id": crawl_id,
|
|
91
|
+
"downloader_type": downloader_type,
|
|
92
|
+
"urls": urls,
|
|
93
|
+
},
|
|
94
|
+
request_create_download_params.RequestCreateDownloadParams,
|
|
95
|
+
),
|
|
96
|
+
options=make_request_options(
|
|
97
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
98
|
+
),
|
|
99
|
+
cast_to=RequestCreateDownloadResponse,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class AsyncRequestResource(AsyncAPIResource):
|
|
104
|
+
@cached_property
|
|
105
|
+
def crawl(self) -> AsyncCrawlResource:
|
|
106
|
+
return AsyncCrawlResource(self._client)
|
|
107
|
+
|
|
108
|
+
@cached_property
|
|
109
|
+
def with_raw_response(self) -> AsyncRequestResourceWithRawResponse:
|
|
110
|
+
"""
|
|
111
|
+
This property can be used as a prefix for any HTTP method call to return
|
|
112
|
+
the raw response object instead of the parsed content.
|
|
113
|
+
|
|
114
|
+
For more information, see https://www.github.com/evrimai/cartography-client#accessing-raw-response-data-eg-headers
|
|
115
|
+
"""
|
|
116
|
+
return AsyncRequestResourceWithRawResponse(self)
|
|
117
|
+
|
|
118
|
+
@cached_property
|
|
119
|
+
def with_streaming_response(self) -> AsyncRequestResourceWithStreamingResponse:
|
|
120
|
+
"""
|
|
121
|
+
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
122
|
+
|
|
123
|
+
For more information, see https://www.github.com/evrimai/cartography-client#with_streaming_response
|
|
124
|
+
"""
|
|
125
|
+
return AsyncRequestResourceWithStreamingResponse(self)
|
|
126
|
+
|
|
127
|
+
async def create_download(
|
|
128
|
+
self,
|
|
129
|
+
*,
|
|
130
|
+
bucket_name: str,
|
|
131
|
+
crawl_id: str,
|
|
132
|
+
downloader_type: str,
|
|
133
|
+
urls: List[str],
|
|
134
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
135
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
136
|
+
extra_headers: Headers | None = None,
|
|
137
|
+
extra_query: Query | None = None,
|
|
138
|
+
extra_body: Body | None = None,
|
|
139
|
+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
140
|
+
) -> RequestCreateDownloadResponse:
|
|
141
|
+
"""
|
|
142
|
+
Make a request to temporal worker :param request: crawl request data :param
|
|
143
|
+
token_data: api token :return: response
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
extra_headers: Send extra headers
|
|
147
|
+
|
|
148
|
+
extra_query: Add additional query parameters to the request
|
|
149
|
+
|
|
150
|
+
extra_body: Add additional JSON properties to the request
|
|
151
|
+
|
|
152
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
153
|
+
"""
|
|
154
|
+
return await self._post(
|
|
155
|
+
"/workflows/request/download",
|
|
156
|
+
body=await async_maybe_transform(
|
|
157
|
+
{
|
|
158
|
+
"bucket_name": bucket_name,
|
|
159
|
+
"crawl_id": crawl_id,
|
|
160
|
+
"downloader_type": downloader_type,
|
|
161
|
+
"urls": urls,
|
|
162
|
+
},
|
|
163
|
+
request_create_download_params.RequestCreateDownloadParams,
|
|
164
|
+
),
|
|
165
|
+
options=make_request_options(
|
|
166
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
167
|
+
),
|
|
168
|
+
cast_to=RequestCreateDownloadResponse,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class RequestResourceWithRawResponse:
|
|
173
|
+
def __init__(self, request: RequestResource) -> None:
|
|
174
|
+
self._request = request
|
|
175
|
+
|
|
176
|
+
self.create_download = to_raw_response_wrapper(
|
|
177
|
+
request.create_download,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
@cached_property
|
|
181
|
+
def crawl(self) -> CrawlResourceWithRawResponse:
|
|
182
|
+
return CrawlResourceWithRawResponse(self._request.crawl)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class AsyncRequestResourceWithRawResponse:
|
|
186
|
+
def __init__(self, request: AsyncRequestResource) -> None:
|
|
187
|
+
self._request = request
|
|
188
|
+
|
|
189
|
+
self.create_download = async_to_raw_response_wrapper(
|
|
190
|
+
request.create_download,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
@cached_property
|
|
194
|
+
def crawl(self) -> AsyncCrawlResourceWithRawResponse:
|
|
195
|
+
return AsyncCrawlResourceWithRawResponse(self._request.crawl)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class RequestResourceWithStreamingResponse:
|
|
199
|
+
def __init__(self, request: RequestResource) -> None:
|
|
200
|
+
self._request = request
|
|
201
|
+
|
|
202
|
+
self.create_download = to_streamed_response_wrapper(
|
|
203
|
+
request.create_download,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
@cached_property
|
|
207
|
+
def crawl(self) -> CrawlResourceWithStreamingResponse:
|
|
208
|
+
return CrawlResourceWithStreamingResponse(self._request.crawl)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class AsyncRequestResourceWithStreamingResponse:
|
|
212
|
+
def __init__(self, request: AsyncRequestResource) -> None:
|
|
213
|
+
self._request = request
|
|
214
|
+
|
|
215
|
+
self.create_download = async_to_streamed_response_wrapper(
|
|
216
|
+
request.create_download,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
@cached_property
|
|
220
|
+
def crawl(self) -> AsyncCrawlResourceWithStreamingResponse:
|
|
221
|
+
return AsyncCrawlResourceWithStreamingResponse(self._request.crawl)
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
|
|
8
|
+
from ..._compat import cached_property
|
|
9
|
+
from ..._resource import SyncAPIResource, AsyncAPIResource
|
|
10
|
+
from ..._response import (
|
|
11
|
+
to_raw_response_wrapper,
|
|
12
|
+
to_streamed_response_wrapper,
|
|
13
|
+
async_to_raw_response_wrapper,
|
|
14
|
+
async_to_streamed_response_wrapper,
|
|
15
|
+
)
|
|
16
|
+
from ..._base_client import make_request_options
|
|
17
|
+
from .request.request import (
|
|
18
|
+
RequestResource,
|
|
19
|
+
AsyncRequestResource,
|
|
20
|
+
RequestResourceWithRawResponse,
|
|
21
|
+
AsyncRequestResourceWithRawResponse,
|
|
22
|
+
RequestResourceWithStreamingResponse,
|
|
23
|
+
AsyncRequestResourceWithStreamingResponse,
|
|
24
|
+
)
|
|
25
|
+
from ...types.workflow_results_response import WorkflowResultsResponse
|
|
26
|
+
from ...types.workflow_describe_response import WorkflowDescribeResponse
|
|
27
|
+
|
|
28
|
+
__all__ = ["WorkflowsResource", "AsyncWorkflowsResource"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class WorkflowsResource(SyncAPIResource):
|
|
32
|
+
@cached_property
|
|
33
|
+
def request(self) -> RequestResource:
|
|
34
|
+
return RequestResource(self._client)
|
|
35
|
+
|
|
36
|
+
@cached_property
|
|
37
|
+
def with_raw_response(self) -> WorkflowsResourceWithRawResponse:
|
|
38
|
+
"""
|
|
39
|
+
This property can be used as a prefix for any HTTP method call to return
|
|
40
|
+
the raw response object instead of the parsed content.
|
|
41
|
+
|
|
42
|
+
For more information, see https://www.github.com/evrimai/cartography-client#accessing-raw-response-data-eg-headers
|
|
43
|
+
"""
|
|
44
|
+
return WorkflowsResourceWithRawResponse(self)
|
|
45
|
+
|
|
46
|
+
@cached_property
|
|
47
|
+
def with_streaming_response(self) -> WorkflowsResourceWithStreamingResponse:
|
|
48
|
+
"""
|
|
49
|
+
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
50
|
+
|
|
51
|
+
For more information, see https://www.github.com/evrimai/cartography-client#with_streaming_response
|
|
52
|
+
"""
|
|
53
|
+
return WorkflowsResourceWithStreamingResponse(self)
|
|
54
|
+
|
|
55
|
+
def describe(
|
|
56
|
+
self,
|
|
57
|
+
workflow_id: str,
|
|
58
|
+
*,
|
|
59
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
60
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
61
|
+
extra_headers: Headers | None = None,
|
|
62
|
+
extra_query: Query | None = None,
|
|
63
|
+
extra_body: Body | None = None,
|
|
64
|
+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
65
|
+
) -> WorkflowDescribeResponse:
|
|
66
|
+
"""
|
|
67
|
+
Get Workflow Description
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
extra_headers: Send extra headers
|
|
71
|
+
|
|
72
|
+
extra_query: Add additional query parameters to the request
|
|
73
|
+
|
|
74
|
+
extra_body: Add additional JSON properties to the request
|
|
75
|
+
|
|
76
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
77
|
+
"""
|
|
78
|
+
if not workflow_id:
|
|
79
|
+
raise ValueError(f"Expected a non-empty value for `workflow_id` but received {workflow_id!r}")
|
|
80
|
+
return self._get(
|
|
81
|
+
f"/workflows/describe/{workflow_id}",
|
|
82
|
+
options=make_request_options(
|
|
83
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
84
|
+
),
|
|
85
|
+
cast_to=WorkflowDescribeResponse,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def results(
|
|
89
|
+
self,
|
|
90
|
+
workflow_id: str,
|
|
91
|
+
*,
|
|
92
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
93
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
94
|
+
extra_headers: Headers | None = None,
|
|
95
|
+
extra_query: Query | None = None,
|
|
96
|
+
extra_body: Body | None = None,
|
|
97
|
+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
98
|
+
) -> WorkflowResultsResponse:
|
|
99
|
+
"""
|
|
100
|
+
Get Workflow Results
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
extra_headers: Send extra headers
|
|
104
|
+
|
|
105
|
+
extra_query: Add additional query parameters to the request
|
|
106
|
+
|
|
107
|
+
extra_body: Add additional JSON properties to the request
|
|
108
|
+
|
|
109
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
110
|
+
"""
|
|
111
|
+
if not workflow_id:
|
|
112
|
+
raise ValueError(f"Expected a non-empty value for `workflow_id` but received {workflow_id!r}")
|
|
113
|
+
return self._get(
|
|
114
|
+
f"/workflows/results/{workflow_id}",
|
|
115
|
+
options=make_request_options(
|
|
116
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
117
|
+
),
|
|
118
|
+
cast_to=WorkflowResultsResponse,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class AsyncWorkflowsResource(AsyncAPIResource):
|
|
123
|
+
@cached_property
|
|
124
|
+
def request(self) -> AsyncRequestResource:
|
|
125
|
+
return AsyncRequestResource(self._client)
|
|
126
|
+
|
|
127
|
+
@cached_property
|
|
128
|
+
def with_raw_response(self) -> AsyncWorkflowsResourceWithRawResponse:
|
|
129
|
+
"""
|
|
130
|
+
This property can be used as a prefix for any HTTP method call to return
|
|
131
|
+
the raw response object instead of the parsed content.
|
|
132
|
+
|
|
133
|
+
For more information, see https://www.github.com/evrimai/cartography-client#accessing-raw-response-data-eg-headers
|
|
134
|
+
"""
|
|
135
|
+
return AsyncWorkflowsResourceWithRawResponse(self)
|
|
136
|
+
|
|
137
|
+
@cached_property
|
|
138
|
+
def with_streaming_response(self) -> AsyncWorkflowsResourceWithStreamingResponse:
|
|
139
|
+
"""
|
|
140
|
+
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
|
141
|
+
|
|
142
|
+
For more information, see https://www.github.com/evrimai/cartography-client#with_streaming_response
|
|
143
|
+
"""
|
|
144
|
+
return AsyncWorkflowsResourceWithStreamingResponse(self)
|
|
145
|
+
|
|
146
|
+
async def describe(
|
|
147
|
+
self,
|
|
148
|
+
workflow_id: str,
|
|
149
|
+
*,
|
|
150
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
151
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
152
|
+
extra_headers: Headers | None = None,
|
|
153
|
+
extra_query: Query | None = None,
|
|
154
|
+
extra_body: Body | None = None,
|
|
155
|
+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
156
|
+
) -> WorkflowDescribeResponse:
|
|
157
|
+
"""
|
|
158
|
+
Get Workflow Description
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
extra_headers: Send extra headers
|
|
162
|
+
|
|
163
|
+
extra_query: Add additional query parameters to the request
|
|
164
|
+
|
|
165
|
+
extra_body: Add additional JSON properties to the request
|
|
166
|
+
|
|
167
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
168
|
+
"""
|
|
169
|
+
if not workflow_id:
|
|
170
|
+
raise ValueError(f"Expected a non-empty value for `workflow_id` but received {workflow_id!r}")
|
|
171
|
+
return await self._get(
|
|
172
|
+
f"/workflows/describe/{workflow_id}",
|
|
173
|
+
options=make_request_options(
|
|
174
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
175
|
+
),
|
|
176
|
+
cast_to=WorkflowDescribeResponse,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
async def results(
|
|
180
|
+
self,
|
|
181
|
+
workflow_id: str,
|
|
182
|
+
*,
|
|
183
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
184
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
185
|
+
extra_headers: Headers | None = None,
|
|
186
|
+
extra_query: Query | None = None,
|
|
187
|
+
extra_body: Body | None = None,
|
|
188
|
+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
|
189
|
+
) -> WorkflowResultsResponse:
|
|
190
|
+
"""
|
|
191
|
+
Get Workflow Results
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
extra_headers: Send extra headers
|
|
195
|
+
|
|
196
|
+
extra_query: Add additional query parameters to the request
|
|
197
|
+
|
|
198
|
+
extra_body: Add additional JSON properties to the request
|
|
199
|
+
|
|
200
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
201
|
+
"""
|
|
202
|
+
if not workflow_id:
|
|
203
|
+
raise ValueError(f"Expected a non-empty value for `workflow_id` but received {workflow_id!r}")
|
|
204
|
+
return await self._get(
|
|
205
|
+
f"/workflows/results/{workflow_id}",
|
|
206
|
+
options=make_request_options(
|
|
207
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
208
|
+
),
|
|
209
|
+
cast_to=WorkflowResultsResponse,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class WorkflowsResourceWithRawResponse:
|
|
214
|
+
def __init__(self, workflows: WorkflowsResource) -> None:
|
|
215
|
+
self._workflows = workflows
|
|
216
|
+
|
|
217
|
+
self.describe = to_raw_response_wrapper(
|
|
218
|
+
workflows.describe,
|
|
219
|
+
)
|
|
220
|
+
self.results = to_raw_response_wrapper(
|
|
221
|
+
workflows.results,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
@cached_property
|
|
225
|
+
def request(self) -> RequestResourceWithRawResponse:
|
|
226
|
+
return RequestResourceWithRawResponse(self._workflows.request)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class AsyncWorkflowsResourceWithRawResponse:
|
|
230
|
+
def __init__(self, workflows: AsyncWorkflowsResource) -> None:
|
|
231
|
+
self._workflows = workflows
|
|
232
|
+
|
|
233
|
+
self.describe = async_to_raw_response_wrapper(
|
|
234
|
+
workflows.describe,
|
|
235
|
+
)
|
|
236
|
+
self.results = async_to_raw_response_wrapper(
|
|
237
|
+
workflows.results,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
@cached_property
|
|
241
|
+
def request(self) -> AsyncRequestResourceWithRawResponse:
|
|
242
|
+
return AsyncRequestResourceWithRawResponse(self._workflows.request)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
class WorkflowsResourceWithStreamingResponse:
|
|
246
|
+
def __init__(self, workflows: WorkflowsResource) -> None:
|
|
247
|
+
self._workflows = workflows
|
|
248
|
+
|
|
249
|
+
self.describe = to_streamed_response_wrapper(
|
|
250
|
+
workflows.describe,
|
|
251
|
+
)
|
|
252
|
+
self.results = to_streamed_response_wrapper(
|
|
253
|
+
workflows.results,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
@cached_property
|
|
257
|
+
def request(self) -> RequestResourceWithStreamingResponse:
|
|
258
|
+
return RequestResourceWithStreamingResponse(self._workflows.request)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class AsyncWorkflowsResourceWithStreamingResponse:
|
|
262
|
+
def __init__(self, workflows: AsyncWorkflowsResource) -> None:
|
|
263
|
+
self._workflows = workflows
|
|
264
|
+
|
|
265
|
+
self.describe = async_to_streamed_response_wrapper(
|
|
266
|
+
workflows.describe,
|
|
267
|
+
)
|
|
268
|
+
self.results = async_to_streamed_response_wrapper(
|
|
269
|
+
workflows.results,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
@cached_property
|
|
273
|
+
def request(self) -> AsyncRequestResourceWithStreamingResponse:
|
|
274
|
+
return AsyncRequestResourceWithStreamingResponse(self._workflows.request)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .wait_until import WaitUntil as WaitUntil
|
|
6
|
+
from .downloader_type import DownloaderType as DownloaderType
|
|
7
|
+
from .bulk_scrape_result import BulkScrapeResult as BulkScrapeResult
|
|
8
|
+
from .scrape_engine_param import ScrapeEngineParam as ScrapeEngineParam
|
|
9
|
+
from .bulk_download_result import BulkDownloadResult as BulkDownloadResult
|
|
10
|
+
from .health_check_response import HealthCheckResponse as HealthCheckResponse
|
|
11
|
+
from .crawl_create_graph_params import CrawlCreateGraphParams as CrawlCreateGraphParams
|
|
12
|
+
from .scrape_scrape_bulk_params import ScrapeScrapeBulkParams as ScrapeScrapeBulkParams
|
|
13
|
+
from .workflow_results_response import WorkflowResultsResponse as WorkflowResultsResponse
|
|
14
|
+
from .api_info_retrieve_response import APIInfoRetrieveResponse as APIInfoRetrieveResponse
|
|
15
|
+
from .workflow_describe_response import WorkflowDescribeResponse as WorkflowDescribeResponse
|
|
16
|
+
from .crawl_create_graph_response import CrawlCreateGraphResponse as CrawlCreateGraphResponse
|
|
17
|
+
from .download_create_bulk_params import DownloadCreateBulkParams as DownloadCreateBulkParams
|
|
18
|
+
from .scrape_scrape_bulk_response import ScrapeScrapeBulkResponse as ScrapeScrapeBulkResponse
|
|
19
|
+
from .scrape_scrape_single_params import ScrapeScrapeSingleParams as ScrapeScrapeSingleParams
|
|
20
|
+
from .download_create_bulk_response import DownloadCreateBulkResponse as DownloadCreateBulkResponse
|
|
21
|
+
from .download_create_single_params import DownloadCreateSingleParams as DownloadCreateSingleParams
|
|
22
|
+
from .scrape_scrape_single_response import ScrapeScrapeSingleResponse as ScrapeScrapeSingleResponse
|
|
23
|
+
from .download_create_single_response import DownloadCreateSingleResponse as DownloadCreateSingleResponse
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
from typing_extensions import Literal
|
|
5
|
+
|
|
6
|
+
from .._models import BaseModel
|
|
7
|
+
|
|
8
|
+
__all__ = ["BulkDownloadResult"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BulkDownloadResult(BaseModel):
|
|
12
|
+
status: Literal["success", "cached", "failed", "save_failed"]
|
|
13
|
+
"""Status of bulk download result"""
|
|
14
|
+
|
|
15
|
+
url: str
|
|
16
|
+
|
|
17
|
+
error: Optional[str] = None
|
|
18
|
+
|
|
19
|
+
job_id: Optional[str] = None
|
|
20
|
+
|
|
21
|
+
s3_bucket: Optional[str] = None
|
|
22
|
+
|
|
23
|
+
s3_key: Optional[str] = None
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from .._models import BaseModel
|
|
6
|
+
|
|
7
|
+
__all__ = ["BulkScrapeResult"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BulkScrapeResult(BaseModel):
|
|
11
|
+
status: str
|
|
12
|
+
|
|
13
|
+
url: str
|
|
14
|
+
|
|
15
|
+
composite_hash: Optional[str] = None
|
|
16
|
+
|
|
17
|
+
error: Optional[str] = None
|
|
18
|
+
|
|
19
|
+
status_code: Optional[int] = None
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict, Iterable, Optional
|
|
6
|
+
from typing_extensions import Required, TypedDict
|
|
7
|
+
|
|
8
|
+
__all__ = ["CrawlCreateGraphParams"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CrawlCreateGraphParams(TypedDict, total=False):
|
|
12
|
+
crawl_id: Required[str]
|
|
13
|
+
"""Unique identifier for this crawl"""
|
|
14
|
+
|
|
15
|
+
engines: Required[Iterable[Dict[str, object]]]
|
|
16
|
+
"""List of engines to use"""
|
|
17
|
+
|
|
18
|
+
s3_bucket: Required[str]
|
|
19
|
+
"""S3 bucket for checkpointing"""
|
|
20
|
+
|
|
21
|
+
url: Required[str]
|
|
22
|
+
"""Root URL to start crawling from"""
|
|
23
|
+
|
|
24
|
+
absolute_only: bool
|
|
25
|
+
"""Only extract absolute URLs"""
|
|
26
|
+
|
|
27
|
+
batch_size: int
|
|
28
|
+
"""URLs per batch"""
|
|
29
|
+
|
|
30
|
+
debug: bool
|
|
31
|
+
"""Enable debug information"""
|
|
32
|
+
|
|
33
|
+
depth: Optional[int]
|
|
34
|
+
"""Maximum crawl depth"""
|
|
35
|
+
|
|
36
|
+
keep_external: bool
|
|
37
|
+
"""Keep external URLs in results"""
|
|
38
|
+
|
|
39
|
+
max_urls: int
|
|
40
|
+
"""Maximum URLs to crawl"""
|
|
41
|
+
|
|
42
|
+
max_workers: int
|
|
43
|
+
"""Maximum concurrent workers"""
|
|
44
|
+
|
|
45
|
+
visit_external: bool
|
|
46
|
+
"""Visit external URLs"""
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
from .._models import BaseModel
|
|
6
|
+
|
|
7
|
+
__all__ = ["CrawlCreateGraphResponse"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CrawlCreateGraphResponse(BaseModel):
|
|
11
|
+
absolute_only: bool
|
|
12
|
+
|
|
13
|
+
batch_size: int
|
|
14
|
+
|
|
15
|
+
crawl_id: str
|
|
16
|
+
|
|
17
|
+
depth_reached: int
|
|
18
|
+
|
|
19
|
+
keep_external: bool
|
|
20
|
+
|
|
21
|
+
max_depth: int
|
|
22
|
+
|
|
23
|
+
max_urls: int
|
|
24
|
+
|
|
25
|
+
max_workers: int
|
|
26
|
+
|
|
27
|
+
root_url: str
|
|
28
|
+
|
|
29
|
+
timestamp: str
|
|
30
|
+
|
|
31
|
+
total_pages_visited: int
|
|
32
|
+
|
|
33
|
+
visit_external: bool
|
|
34
|
+
|
|
35
|
+
visited_urls: List[str]
|
|
36
|
+
|
|
37
|
+
debug_frame: Optional[List[Dict[str, object]]] = None
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import List
|
|
6
|
+
from typing_extensions import Required, TypedDict
|
|
7
|
+
|
|
8
|
+
from .wait_until import WaitUntil
|
|
9
|
+
from .downloader_type import DownloaderType
|
|
10
|
+
|
|
11
|
+
__all__ = ["DownloadCreateBulkParams"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DownloadCreateBulkParams(TypedDict, total=False):
|
|
15
|
+
crawl_id: Required[str]
|
|
16
|
+
"""Unique identifier for this crawl"""
|
|
17
|
+
|
|
18
|
+
s3_bucket: Required[str]
|
|
19
|
+
"""S3 bucket for storage and checkpoints"""
|
|
20
|
+
|
|
21
|
+
urls: Required[List[str]]
|
|
22
|
+
"""List of URLs to download"""
|
|
23
|
+
|
|
24
|
+
batch_size: int
|
|
25
|
+
"""URLs per batch"""
|
|
26
|
+
|
|
27
|
+
debug: bool
|
|
28
|
+
"""Enable debug information"""
|
|
29
|
+
|
|
30
|
+
downloader_type: DownloaderType
|
|
31
|
+
"""Available downloader types"""
|
|
32
|
+
|
|
33
|
+
max_workers: int
|
|
34
|
+
"""Maximum concurrent workers"""
|
|
35
|
+
|
|
36
|
+
wait_until: WaitUntil
|
|
37
|
+
"""When to consider downloads complete"""
|