scrape-do-python 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrape_do_python-0.1.0/LICENSE +21 -0
- scrape_do_python-0.1.0/PKG-INFO +134 -0
- scrape_do_python-0.1.0/README.md +102 -0
- scrape_do_python-0.1.0/pyproject.toml +76 -0
- scrape_do_python-0.1.0/setup.cfg +4 -0
- scrape_do_python-0.1.0/src/scrape_do/__init__.py +0 -0
- scrape_do_python-0.1.0/src/scrape_do/abc.py +0 -0
- scrape_do_python-0.1.0/src/scrape_do/async_client.py +0 -0
- scrape_do_python-0.1.0/src/scrape_do/client.py +804 -0
- scrape_do_python-0.1.0/src/scrape_do/constants.py +84 -0
- scrape_do_python-0.1.0/src/scrape_do/exceptions.py +238 -0
- scrape_do_python-0.1.0/src/scrape_do/models/__init__.py +79 -0
- scrape_do_python-0.1.0/src/scrape_do/models/browser_actions.py +332 -0
- scrape_do_python-0.1.0/src/scrape_do/models/enums.py +76 -0
- scrape_do_python-0.1.0/src/scrape_do/models/parameters.py +840 -0
- scrape_do_python-0.1.0/src/scrape_do/models/request.py +232 -0
- scrape_do_python-0.1.0/src/scrape_do/models/response.py +890 -0
- scrape_do_python-0.1.0/src/scrape_do/namespaces/__init__.py +0 -0
- scrape_do_python-0.1.0/src/scrape_do/namespaces/amazon.py +0 -0
- scrape_do_python-0.1.0/src/scrape_do/namespaces/google.py +0 -0
- scrape_do_python-0.1.0/src/scrape_do/namespaces/jobs.py +0 -0
- scrape_do_python-0.1.0/src/scrape_do_python.egg-info/PKG-INFO +134 -0
- scrape_do_python-0.1.0/src/scrape_do_python.egg-info/SOURCES.txt +24 -0
- scrape_do_python-0.1.0/src/scrape_do_python.egg-info/dependency_links.txt +1 -0
- scrape_do_python-0.1.0/src/scrape_do_python.egg-info/requires.txt +19 -0
- scrape_do_python-0.1.0/src/scrape_do_python.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 svdc
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: scrape-do-python
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python SDK for the Scrape.do API
|
|
5
|
+
Author-email: svdC1 <svdc1mail@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Documentation, https://svdc1.github.io/scrape-do-python
|
|
8
|
+
Project-URL: Repository, https://github.com/svdC1/scrape-do-python
|
|
9
|
+
Project-URL: Issues, https://github.com/svdC1/scrape-do-python/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/svdC1/scrape-do-python/blob/main/.github/CHANGELOG.md
|
|
11
|
+
Requires-Python: >=3.9
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: httpx>=0.25.0
|
|
15
|
+
Requires-Dist: pydantic>=2.0.0
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
18
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
19
|
+
Requires-Dist: pytest-mock; extra == "dev"
|
|
20
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
21
|
+
Requires-Dist: respx; extra == "dev"
|
|
22
|
+
Requires-Dist: mypy; extra == "dev"
|
|
23
|
+
Requires-Dist: ruff; extra == "dev"
|
|
24
|
+
Provides-Extra: docs
|
|
25
|
+
Requires-Dist: mkdocs; extra == "docs"
|
|
26
|
+
Requires-Dist: mkdocs-material; extra == "docs"
|
|
27
|
+
Requires-Dist: mkdocstrings; extra == "docs"
|
|
28
|
+
Requires-Dist: mkdocstrings-python; extra == "docs"
|
|
29
|
+
Requires-Dist: mkdocs-autorefs; extra == "docs"
|
|
30
|
+
Requires-Dist: black; extra == "docs"
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
|
|
33
|
+
[](https://pypi.org/project/scrape-do-python/)
|
|
34
|
+
[](https://pypi.org/project/scrape-do-python/)
|
|
35
|
+
[](LICENSE)
|
|
36
|
+
[](https://svdc1.github.io/scrape-do-python)
|
|
37
|
+
[](https://app.codecov.io/gh/svdC1/scrape-do-python?flags%5B0%5D=unit)
|
|
38
|
+
[](https://app.codecov.io/gh/svdC1/scrape-do-python?flags%5B0%5D=integration)
|
|
39
|
+
|
|
40
|
+
# scrape-do-python
|
|
41
|
+
|
|
42
|
+
A Python SDK for the [`Scrape.do`](https://scrape.do) web-scraping proxy API.
|
|
43
|
+
|
|
44
|
+
Built on [`httpx`](https://github.com/encode/httpx) and [`pydantic v2`](https://github.com/pydantic/pydantic), with strict request validation, automatic retries on gateway errors, sticky-session validation, and SDK-native lifecycle hooks.
|
|
45
|
+
|
|
46
|
+
## Status
|
|
47
|
+
> `v0.1.0` → Early but functional
|
|
48
|
+
|
|
49
|
+
> Synchronous client is shipped
|
|
50
|
+
|
|
51
|
+
> Async and Proxy-Mode clients are on the [`Roadmap`](.github/ROADMAP.md)
|
|
52
|
+
|
|
53
|
+
> Breaking changes are possible between `0.x` minor versions.
|
|
54
|
+
|
|
55
|
+
## Installation
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install scrape-do-python
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Quickstart
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from scrape_do.client import ScrapeDoClient
|
|
65
|
+
|
|
66
|
+
# API Token pulled from SCRAPE_DO_API_KEY env variable
|
|
67
|
+
# Can also be provided via 'api_token' argument
|
|
68
|
+
|
|
69
|
+
with ScrapeDoClient() as client:
|
|
70
|
+
response = client.get(
|
|
71
|
+
"https://example.com",
|
|
72
|
+
super=True,
|
|
73
|
+
render=True,
|
|
74
|
+
return_json=True,
|
|
75
|
+
show_frames=True,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
print(response.is_proxy_error)
|
|
79
|
+
|
|
80
|
+
print(response.frames[0].url)
|
|
81
|
+
|
|
82
|
+
print(response.remaining_credits)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Features
|
|
86
|
+
|
|
87
|
+
### Type-Checked Request Parameters
|
|
88
|
+
|
|
89
|
+
Request parameters are fully type-checked and automatically validated via the [`RequestParameters`](https://svdc1.github.io/scrape-do-python/models/parameters/#scrape_do.models.parameters.RequestParameters) pydantic model
|
|
90
|
+
|
|
91
|
+
### Smart Routing
|
|
92
|
+
[`ScrapeDoClient.request()`](https://svdc1.github.io/scrape-do-python/client/#scrape_do.client.ScrapeDoClient.request) accepts either `**api_kwargs`, a pre-built `RequestParameters`, or a raw `api.scrape.do` URL for request parameters
|
|
93
|
+
|
|
94
|
+
### Automatic Retries
|
|
95
|
+
|
|
96
|
+
[`ScrapeDoClient`]((https://svdc1.github.io/scrape-do-python/client/#scrape_do.client.ScrapeDoClient.session_validator)) can automatically retry requests on Scrape.do gateway errors (`429` / `502` / `510`) with `customizable backoff` (static or callable)
|
|
97
|
+
|
|
98
|
+
### Sticky-Session Validation
|
|
99
|
+
|
|
100
|
+
Supply a [`session_validator`](https://svdc1.github.io/scrape-do-python/client/#scrape_do.client.SyncSessionValidator) callback to detect proxy node rotations and raise [`RotatedSessionError`](https://svdc1.github.io/scrape-do-python/exceptions/#scrape_do.exceptions.RotatedSessionError)
|
|
101
|
+
|
|
102
|
+
### SDK-Native Event Hooks
|
|
103
|
+
|
|
104
|
+
[`request / response / retry`](https://svdc1.github.io/scrape-do-python/client/#scrape_do.client.SyncClientEventHooks) lifecycle hooks, distinct from httpx's transport-level hooks.
|
|
105
|
+
|
|
106
|
+
### Strongly-Typed Responses
|
|
107
|
+
|
|
108
|
+
[`ScrapeDoResponse`](https://svdc1.github.io/scrape-do-python/models/response/#scrape_do.models.response.ScrapeDoResponse) exposes the parsed JSON envelope, browser action results, screenshots, and network/websocket logs.
|
|
109
|
+
|
|
110
|
+
### Browser Automation
|
|
111
|
+
|
|
112
|
+
Pydantic models for [`Browser Actions`](https://svdc1.github.io/scrape-do-python/models/browser_actions/) providing validation and type-hinting for the `playWithBrowser` API parameter
|
|
113
|
+
|
|
114
|
+
## Documentation
|
|
115
|
+
|
|
116
|
+
[`Full API Reference`](https://svdc1.github.io/scrape-do-python)
|
|
117
|
+
|
|
118
|
+
## Roadmap
|
|
119
|
+
|
|
120
|
+
See [`ROADMAP`](.github/ROADMAP.md) for the upcoming `Async Client`, `Proxy-Mode Clients`, `Async-API Support`, and `Plugin Support`
|
|
121
|
+
|
|
122
|
+
## Contributing
|
|
123
|
+
|
|
124
|
+
Pull Requests, Bug Reports, and Feature Requests are all welcome.
|
|
125
|
+
|
|
126
|
+
See [`CONTRIBUTING`](.github/CONTRIBUTING.md) for local setup, test commands, and PR conventions
|
|
127
|
+
|
|
128
|
+
## Community
|
|
129
|
+
|
|
130
|
+
Participation is governed by our [`Code of Conduct`](.github/CODE_OF_CONDUCT.md). To privately report a security issue, see the [`Security Policy`](.github/SECURITY.md).
|
|
131
|
+
|
|
132
|
+
## License
|
|
133
|
+
|
|
134
|
+
`scrape-do-python` is released under the [`MIT License`](LICENSE)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
[](https://pypi.org/project/scrape-do-python/)
|
|
2
|
+
[](https://pypi.org/project/scrape-do-python/)
|
|
3
|
+
[](LICENSE)
|
|
4
|
+
[](https://svdc1.github.io/scrape-do-python)
|
|
5
|
+
[](https://app.codecov.io/gh/svdC1/scrape-do-python?flags%5B0%5D=unit)
|
|
6
|
+
[](https://app.codecov.io/gh/svdC1/scrape-do-python?flags%5B0%5D=integration)
|
|
7
|
+
|
|
8
|
+
# scrape-do-python
|
|
9
|
+
|
|
10
|
+
A Python SDK for the [`Scrape.do`](https://scrape.do) web-scraping proxy API.
|
|
11
|
+
|
|
12
|
+
Built on [`httpx`](https://github.com/encode/httpx) and [`pydantic v2`](https://github.com/pydantic/pydantic), with strict request validation, automatic retries on gateway errors, sticky-session validation, and SDK-native lifecycle hooks.
|
|
13
|
+
|
|
14
|
+
## Status
|
|
15
|
+
> `v0.1.0` → Early but functional
|
|
16
|
+
|
|
17
|
+
> Synchronous client is shipped
|
|
18
|
+
|
|
19
|
+
> Async and Proxy-Mode clients are on the [`Roadmap`](.github/ROADMAP.md)
|
|
20
|
+
|
|
21
|
+
> Breaking changes are possible between `0.x` minor versions.
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install scrape-do-python
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Quickstart
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from scrape_do.client import ScrapeDoClient
|
|
33
|
+
|
|
34
|
+
# API Token pulled from SCRAPE_DO_API_KEY env variable
|
|
35
|
+
# Can also be provided via 'api_token' argument
|
|
36
|
+
|
|
37
|
+
with ScrapeDoClient() as client:
|
|
38
|
+
response = client.get(
|
|
39
|
+
"https://example.com",
|
|
40
|
+
super=True,
|
|
41
|
+
render=True,
|
|
42
|
+
return_json=True,
|
|
43
|
+
show_frames=True,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
print(response.is_proxy_error)
|
|
47
|
+
|
|
48
|
+
print(response.frames[0].url)
|
|
49
|
+
|
|
50
|
+
print(response.remaining_credits)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Features
|
|
54
|
+
|
|
55
|
+
### Type-Checked Request Parameters
|
|
56
|
+
|
|
57
|
+
Request parameters are fully type-checked and automatically validated via the [`RequestParameters`](https://svdc1.github.io/scrape-do-python/models/parameters/#scrape_do.models.parameters.RequestParameters) pydantic model
|
|
58
|
+
|
|
59
|
+
### Smart Routing
|
|
60
|
+
[`ScrapeDoClient.request()`](https://svdc1.github.io/scrape-do-python/client/#scrape_do.client.ScrapeDoClient.request) accepts either `**api_kwargs`, a pre-built `RequestParameters`, or a raw `api.scrape.do` URL for request parameters
|
|
61
|
+
|
|
62
|
+
### Automatic Retries
|
|
63
|
+
|
|
64
|
+
[`ScrapeDoClient`]((https://svdc1.github.io/scrape-do-python/client/#scrape_do.client.ScrapeDoClient.session_validator)) can automatically retry requests on Scrape.do gateway errors (`429` / `502` / `510`) with `customizable backoff` (static or callable)
|
|
65
|
+
|
|
66
|
+
### Sticky-Session Validation
|
|
67
|
+
|
|
68
|
+
Supply a [`session_validator`](https://svdc1.github.io/scrape-do-python/client/#scrape_do.client.SyncSessionValidator) callback to detect proxy node rotations and raise [`RotatedSessionError`](https://svdc1.github.io/scrape-do-python/exceptions/#scrape_do.exceptions.RotatedSessionError)
|
|
69
|
+
|
|
70
|
+
### SDK-Native Event Hooks
|
|
71
|
+
|
|
72
|
+
[`request / response / retry`](https://svdc1.github.io/scrape-do-python/client/#scrape_do.client.SyncClientEventHooks) lifecycle hooks, distinct from httpx's transport-level hooks.
|
|
73
|
+
|
|
74
|
+
### Strongly-Typed Responses
|
|
75
|
+
|
|
76
|
+
[`ScrapeDoResponse`](https://svdc1.github.io/scrape-do-python/models/response/#scrape_do.models.response.ScrapeDoResponse) exposes the parsed JSON envelope, browser action results, screenshots, and network/websocket logs.
|
|
77
|
+
|
|
78
|
+
### Browser Automation
|
|
79
|
+
|
|
80
|
+
Pydantic models for [`Browser Actions`](https://svdc1.github.io/scrape-do-python/models/browser_actions/) providing validation and type-hinting for the `playWithBrowser` API parameter
|
|
81
|
+
|
|
82
|
+
## Documentation
|
|
83
|
+
|
|
84
|
+
[`Full API Reference`](https://svdc1.github.io/scrape-do-python)
|
|
85
|
+
|
|
86
|
+
## Roadmap
|
|
87
|
+
|
|
88
|
+
See [`ROADMAP`](.github/ROADMAP.md) for the upcoming `Async Client`, `Proxy-Mode Clients`, `Async-API Support`, and `Plugin Support`
|
|
89
|
+
|
|
90
|
+
## Contributing
|
|
91
|
+
|
|
92
|
+
Pull Requests, Bug Reports, and Feature Requests are all welcome.
|
|
93
|
+
|
|
94
|
+
See [`CONTRIBUTING`](.github/CONTRIBUTING.md) for local setup, test commands, and PR conventions
|
|
95
|
+
|
|
96
|
+
## Community
|
|
97
|
+
|
|
98
|
+
Participation is governed by our [`Code of Conduct`](.github/CODE_OF_CONDUCT.md). To privately report a security issue, see the [`Security Policy`](.github/SECURITY.md).
|
|
99
|
+
|
|
100
|
+
## License
|
|
101
|
+
|
|
102
|
+
`scrape-do-python` is released under the [`MIT License`](LICENSE)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "scrape-do-python"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A Python SDK for the Scrape.do API"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "svdC1", email = "svdc1mail@gmail.com" }
|
|
14
|
+
]
|
|
15
|
+
dependencies = [
|
|
16
|
+
"httpx>=0.25.0",
|
|
17
|
+
"pydantic>=2.0.0"
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[project.urls]
|
|
21
|
+
Documentation = "https://svdc1.github.io/scrape-do-python"
|
|
22
|
+
Repository = "https://github.com/svdC1/scrape-do-python"
|
|
23
|
+
Issues = "https://github.com/svdC1/scrape-do-python/issues"
|
|
24
|
+
Changelog = "https://github.com/svdC1/scrape-do-python/blob/main/.github/CHANGELOG.md"
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
dev = [
|
|
28
|
+
"pytest>=7.0",
|
|
29
|
+
"pytest-cov",
|
|
30
|
+
"pytest-mock",
|
|
31
|
+
"pytest-asyncio",
|
|
32
|
+
"respx",
|
|
33
|
+
"mypy",
|
|
34
|
+
"ruff"
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
docs = [
|
|
38
|
+
"mkdocs",
|
|
39
|
+
"mkdocs-material",
|
|
40
|
+
"mkdocstrings",
|
|
41
|
+
"mkdocstrings-python",
|
|
42
|
+
"mkdocs-autorefs",
|
|
43
|
+
"black"
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
[tool.setuptools.packages.find]
|
|
47
|
+
where = ["src"]
|
|
48
|
+
|
|
49
|
+
[tool.pytest.ini_options]
|
|
50
|
+
markers = [
|
|
51
|
+
"unit: marks tests as unit tests",
|
|
52
|
+
"integration: marks tests as integration tests",
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
[tool.coverage.run]
|
|
56
|
+
branch = true
|
|
57
|
+
source = ["scrape_do"]
|
|
58
|
+
relative_files = true
|
|
59
|
+
|
|
60
|
+
[tool.coverage.paths]
|
|
61
|
+
source = [
|
|
62
|
+
"src/scrape_do",
|
|
63
|
+
"*/site-packages/scrape_do",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
[tool.coverage.report]
|
|
67
|
+
show_missing = true
|
|
68
|
+
skip_covered = false
|
|
69
|
+
precision = 2
|
|
70
|
+
exclude_lines = [
|
|
71
|
+
"pragma: no cover",
|
|
72
|
+
"raise NotImplementedError",
|
|
73
|
+
"if TYPE_CHECKING:",
|
|
74
|
+
"if __name__ == .__main__.:",
|
|
75
|
+
"@(abc\\.)?abstractmethod",
|
|
76
|
+
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|