crawlora 1.5.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crawlora/__init__.py +24 -0
- crawlora/_pagination.py +44 -0
- crawlora/_transport_sync.py +114 -0
- crawlora/async_client.py +321 -0
- crawlora/client.py +671 -0
- crawlora/client.pyi +20710 -0
- crawlora/operations.py +6784 -0
- crawlora/py.typed +1 -0
- crawlora-1.5.0.dev1.dist-info/METADATA +213 -0
- crawlora-1.5.0.dev1.dist-info/RECORD +13 -0
- crawlora-1.5.0.dev1.dist-info/WHEEL +5 -0
- crawlora-1.5.0.dev1.dist-info/licenses/LICENSE +21 -0
- crawlora-1.5.0.dev1.dist-info/top_level.txt +1 -0
crawlora/py.typed
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: crawlora
|
|
3
|
+
Version: 1.5.0.dev1
|
|
4
|
+
Summary: Python SDK for the public Crawlora API.
|
|
5
|
+
Author: Crawlora
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Crawlora-org/crawlora-python-sdk
|
|
8
|
+
Project-URL: Repository, https://github.com/Crawlora-org/crawlora-python-sdk
|
|
9
|
+
Project-URL: Issues, https://github.com/Crawlora-org/crawlora-python-sdk/issues
|
|
10
|
+
Keywords: crawlora,sdk,web-scraping,api-client
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Typing :: Typed
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: typing-extensions>=4.8; python_version < "3.11"
|
|
22
|
+
Provides-Extra: async
|
|
23
|
+
Requires-Dist: httpx>=0.27; extra == "async"
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
# Crawlora Python SDK
|
|
27
|
+
|
|
28
|
+
Python client for the public Crawlora API. Use it to call Crawlora scraping,
|
|
29
|
+
search, marketplace, media, maps, finance, and usage endpoints with generated
|
|
30
|
+
type stubs for editor and type-checker support.
|
|
31
|
+
|
|
32
|
+
- Runtime: Python 3.10+
|
|
33
|
+
- Auth: `x-api-key`
|
|
34
|
+
- Default API base URL: `https://api.crawlora.net/api/v1`
|
|
35
|
+
- Reference: [operations](docs/operations.md) and [recipes](docs/recipes.md)
|
|
36
|
+
|
|
37
|
+
## Install
|
|
38
|
+
|
|
39
|
+
The Python SDK is currently distributed from Git beta tags:
|
|
40
|
+
|
|
41
|
+
```sh
|
|
42
|
+
pip install "git+https://github.com/Crawlora-org/crawlora-python-sdk.git@latest"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
For reproducible builds, pin a released tag:
|
|
46
|
+
|
|
47
|
+
```sh
|
|
48
|
+
pip install "git+https://github.com/Crawlora-org/crawlora-python-sdk.git@TAG"
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## API Key
|
|
52
|
+
|
|
53
|
+
Create or sign in to your Crawlora account at [crawlora.net](https://crawlora.net),
|
|
54
|
+
then create an API key in the dashboard.
|
|
55
|
+
|
|
56
|
+
```sh
|
|
57
|
+
read -r CRAWLORA_API_KEY
|
|
58
|
+
export CRAWLORA_API_KEY
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## First Request
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
import os
|
|
65
|
+
from crawlora import CrawloraClient
|
|
66
|
+
|
|
67
|
+
crawlora = CrawloraClient(api_key=os.environ["CRAWLORA_API_KEY"])
|
|
68
|
+
|
|
69
|
+
response = crawlora.bing.search(
|
|
70
|
+
q="coffee shops",
|
|
71
|
+
count=10,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
print(response["data"]["results"][0])
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Endpoint groups are generated from the public API contract, so common calls are
|
|
78
|
+
available as methods such as `crawlora.bing.search(...)`,
|
|
79
|
+
`crawlora.youtube.transcript(...)`, and `crawlora.google.map_search(...)`.
|
|
80
|
+
|
|
81
|
+
## Typed Dynamic Calls
|
|
82
|
+
|
|
83
|
+
You can also call by operation id. Literal operation ids are covered by the
|
|
84
|
+
generated `.pyi` stubs, so type checkers can infer the matching parameter and
|
|
85
|
+
response aliases:
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
response = crawlora.request("bing-search", {
|
|
89
|
+
"q": "coffee shops",
|
|
90
|
+
"count": 10,
|
|
91
|
+
})
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Generated stubs include operation ids, endpoint groups, keyword parameters,
|
|
95
|
+
enum values, response aliases, and reserved request options.
|
|
96
|
+
|
|
97
|
+
## Configuration
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
crawlora = CrawloraClient(
|
|
101
|
+
api_key=os.environ["CRAWLORA_API_KEY"],
|
|
102
|
+
base_url="https://api.crawlora.net/api/v1",
|
|
103
|
+
timeout=30,
|
|
104
|
+
retries=2,
|
|
105
|
+
retry_delay=0.25,
|
|
106
|
+
headers={"x-client": "my-app"},
|
|
107
|
+
)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Per-request options are available through reserved keyword arguments. Header
|
|
111
|
+
names are matched case-insensitively, so request headers can override default
|
|
112
|
+
auth, user-agent, and content headers without duplicating variants such as
|
|
113
|
+
`x-api-key` and `X-API-KEY`:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
response = crawlora.bing.search(
|
|
117
|
+
q="coffee shops",
|
|
118
|
+
_timeout=10,
|
|
119
|
+
_headers={"x-request-id": "search-001"},
|
|
120
|
+
)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Text Responses
|
|
124
|
+
|
|
125
|
+
Most endpoints return JSON. `_response_type` must be `auto`, `json`, or
|
|
126
|
+
`text`. Endpoints that support alternate text output, such as YouTube
|
|
127
|
+
transcripts, can opt into text mode:
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
transcript = crawlora.youtube.transcript(
|
|
131
|
+
id="VIDEO_ID",
|
|
132
|
+
format="text",
|
|
133
|
+
_response_type="text",
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
print(transcript)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Errors
|
|
140
|
+
|
|
141
|
+
Failed API calls raise `CrawloraError`:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from crawlora import CrawloraError
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
crawlora.bing.search(q="coffee shops")
|
|
148
|
+
except CrawloraError as error:
|
|
149
|
+
print(error.status, error.code, error.body)
|
|
150
|
+
raise
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
The error includes `status`, optional API `code`, parsed `body`, `raw_body`,
|
|
154
|
+
response `headers`, and the underlying parser or transport exception as
|
|
155
|
+
`__cause__` when available. Retryable responses honor positive `Retry-After`
|
|
156
|
+
headers, capped at 30 seconds. Timeout-like transport failures use the
|
|
157
|
+
`Crawlora request timed out` SDK message.
|
|
158
|
+
|
|
159
|
+
`CrawloraError` has three subclasses for branching on the failure kind:
|
|
160
|
+
`CrawloraClientError` (4xx, request rejected), `CrawloraServerError` (5xx), and
|
|
161
|
+
`CrawloraNetworkError` (transport failure or timeout before a response).
|
|
162
|
+
|
|
163
|
+
## Async
|
|
164
|
+
|
|
165
|
+
`AsyncCrawloraClient` mirrors the synchronous client for asyncio applications:
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
from crawlora import AsyncCrawloraClient
|
|
169
|
+
|
|
170
|
+
crawlora = AsyncCrawloraClient(api_key="YOUR_API_KEY")
|
|
171
|
+
result = await crawlora.bing.search(q="coffee shops")
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
It reuses the same validation, retries, and `Retry-After` handling, running each
|
|
175
|
+
request in a worker thread so the package stays dependency-free.
|
|
176
|
+
|
|
177
|
+
## Pagination
|
|
178
|
+
|
|
179
|
+
`client.paginate` yields successive pages, advancing the page/offset query
|
|
180
|
+
parameter and stopping when a page returns no data:
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
for page in crawlora.paginate("ebay-seller-feedback", {"seller": "acme"}):
|
|
184
|
+
for review in page["data"]:
|
|
185
|
+
print(review)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
`AsyncCrawloraClient.paginate` is the `async for` equivalent. Override detection
|
|
189
|
+
with `page_param`, `start`, `step`, and `max_pages`.
|
|
190
|
+
|
|
191
|
+
## Examples
|
|
192
|
+
|
|
193
|
+
Runnable examples live under `examples/` and skip cleanly when required
|
|
194
|
+
environment variables are missing:
|
|
195
|
+
|
|
196
|
+
```sh
|
|
197
|
+
python3 examples/bing_search.py
|
|
198
|
+
python3 examples/youtube_transcript.py
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
Set `CRAWLORA_BASE_URL` to point examples at a staging or local API.
|
|
202
|
+
|
|
203
|
+
## Package Notes
|
|
204
|
+
|
|
205
|
+
The import name is `crawlora`:
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
from crawlora import CrawloraClient
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
The future PyPI package target is also `crawlora`, but registry publication is
|
|
212
|
+
not enabled yet. Until then, install from an explicit Git beta tag or the
|
|
213
|
+
moving `latest` tag as shown above.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
crawlora/__init__.py,sha256=6iBg2JTKRkQjm4fwoHcJ_s0vWRL1Z8jHuU9ttC_Movs,527
|
|
2
|
+
crawlora/_pagination.py,sha256=EzD7JNJ4UpA-CnQOoXoXKQ0RFCOyNUQ7PQohz9VxDuM,1362
|
|
3
|
+
crawlora/_transport_sync.py,sha256=Vel66LuNbN9S-lXLu5aYRzanHD3kGt61ukDu0NLqlPI,4294
|
|
4
|
+
crawlora/async_client.py,sha256=tDX3WuQQUDBph84XpIlk2YHAAGRD5s66NMSzcH41TgY,13691
|
|
5
|
+
crawlora/client.py,sha256=IV-MtaWnetnUgEI_ZgEAoV-kZPhie5CmKrVcesrlZyg,27244
|
|
6
|
+
crawlora/client.pyi,sha256=pQjgADZG2Qjs-qAML20XgTTCd7vfep48Yc5H8P64F6k,812162
|
|
7
|
+
crawlora/operations.py,sha256=7i8gprWKTnY1S03CSjQ4bKaLpyQqgHSJoKMAIdN2NLo,407101
|
|
8
|
+
crawlora/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
9
|
+
crawlora-1.5.0.dev1.dist-info/licenses/LICENSE,sha256=AJCfINUOcsaheBDv_3ZGzXaEnoibR9-WgzVfXKXxeuQ,1065
|
|
10
|
+
crawlora-1.5.0.dev1.dist-info/METADATA,sha256=mme0m4hVwCuHaTz3ScYBYLqTe_a0LmhvVyoi4lJ0Vd0,5970
|
|
11
|
+
crawlora-1.5.0.dev1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
12
|
+
crawlora-1.5.0.dev1.dist-info/top_level.txt,sha256=1-n_HDMojz5VLZUwBTeXGo1QKAOnBLmKIo4cKwaoqcY,9
|
|
13
|
+
crawlora-1.5.0.dev1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Crawlora
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
crawlora
|