capture-sdk 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- capture/__init__.py +4 -0
- capture/client.py +112 -0
- capture_sdk-1.0.0.dist-info/METADATA +232 -0
- capture_sdk-1.0.0.dist-info/RECORD +6 -0
- capture_sdk-1.0.0.dist-info/WHEEL +4 -0
- capture_sdk-1.0.0.dist-info/licenses/LICENSE +21 -0
capture/__init__.py
ADDED
capture/client.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
from typing import Any, Dict, Literal, Optional, Union
|
|
3
|
+
from urllib.parse import urlencode
|
|
4
|
+
|
|
5
|
+
import aiohttp
|
|
6
|
+
from yarl import URL
|
|
7
|
+
|
|
8
|
+
RequestType = Literal["image", "pdf", "content", "metadata", "animated"]
|
|
9
|
+
RequestOptions = Dict[str, Union[str, int, bool]]
|
|
10
|
+
|
|
11
|
+
class Capture:
|
|
12
|
+
API_URL = "https://cdn.capture.page"
|
|
13
|
+
EDGE_URL = "https://edge.capture.page"
|
|
14
|
+
|
|
15
|
+
def __init__(
|
|
16
|
+
self, key: str, secret: str, options: Optional[Dict[str, bool]] = None
|
|
17
|
+
) -> None:
|
|
18
|
+
self.key = key
|
|
19
|
+
self.secret = secret
|
|
20
|
+
self.options = options or {}
|
|
21
|
+
|
|
22
|
+
def _generate_token(self, secret: str, url: str) -> str:
|
|
23
|
+
token_string = f"{secret}{url}"
|
|
24
|
+
return hashlib.md5(token_string.encode()).hexdigest()
|
|
25
|
+
|
|
26
|
+
def _encode_query_string(self, params: Dict[str, Any]) -> str:
|
|
27
|
+
filtered_params = {}
|
|
28
|
+
for k, v in params.items():
|
|
29
|
+
if v is None:
|
|
30
|
+
continue
|
|
31
|
+
if isinstance(v, bool):
|
|
32
|
+
filtered_params[k] = str(v).lower()
|
|
33
|
+
else:
|
|
34
|
+
filtered_params[k] = v
|
|
35
|
+
|
|
36
|
+
return urlencode(filtered_params, safe="")
|
|
37
|
+
|
|
38
|
+
def _build_url(
|
|
39
|
+
self, url: str, request_type: RequestType, options: Optional[RequestOptions] = None
|
|
40
|
+
) -> str:
|
|
41
|
+
if not self.key or not self.secret:
|
|
42
|
+
raise ValueError("Key and Secret is required")
|
|
43
|
+
|
|
44
|
+
if url is None:
|
|
45
|
+
raise ValueError("url is required")
|
|
46
|
+
|
|
47
|
+
if not isinstance(url, str):
|
|
48
|
+
raise TypeError("url should be of type string (something like www.google.com)")
|
|
49
|
+
|
|
50
|
+
params = options.copy() if options else {}
|
|
51
|
+
params["url"] = url
|
|
52
|
+
|
|
53
|
+
query_string = self._encode_query_string(params)
|
|
54
|
+
token = self._generate_token(self.secret, query_string)
|
|
55
|
+
|
|
56
|
+
base_url = self.EDGE_URL if self.options.get("useEdge") else self.API_URL
|
|
57
|
+
|
|
58
|
+
return f"{base_url}/{self.key}/{token}/{request_type}?{query_string}"
|
|
59
|
+
|
|
60
|
+
def build_image_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
|
|
61
|
+
return self._build_url(url, "image", options)
|
|
62
|
+
|
|
63
|
+
def build_pdf_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
|
|
64
|
+
return self._build_url(url, "pdf", options)
|
|
65
|
+
|
|
66
|
+
def build_content_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
|
|
67
|
+
return self._build_url(url, "content", options)
|
|
68
|
+
|
|
69
|
+
def build_metadata_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
|
|
70
|
+
return self._build_url(url, "metadata", options)
|
|
71
|
+
|
|
72
|
+
def build_animated_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
|
|
73
|
+
return self._build_url(url, "animated", options)
|
|
74
|
+
|
|
75
|
+
async def fetch_image(self, url: str, options: Optional[RequestOptions] = None) -> bytes:
|
|
76
|
+
fetch_url = self.build_image_url(url, options)
|
|
77
|
+
async with aiohttp.ClientSession() as session:
|
|
78
|
+
async with session.get(URL(fetch_url, encoded=True)) as response:
|
|
79
|
+
response.raise_for_status()
|
|
80
|
+
return await response.read()
|
|
81
|
+
|
|
82
|
+
async def fetch_pdf(self, url: str, options: Optional[RequestOptions] = None) -> bytes:
|
|
83
|
+
fetch_url = self.build_pdf_url(url, options)
|
|
84
|
+
async with aiohttp.ClientSession() as session:
|
|
85
|
+
async with session.get(URL(fetch_url, encoded=True)) as response:
|
|
86
|
+
response.raise_for_status()
|
|
87
|
+
return await response.read()
|
|
88
|
+
|
|
89
|
+
async def fetch_content(
|
|
90
|
+
self, url: str, options: Optional[RequestOptions] = None
|
|
91
|
+
) -> Dict[str, Union[bool, str]]:
|
|
92
|
+
fetch_url = self.build_content_url(url, options)
|
|
93
|
+
async with aiohttp.ClientSession() as session:
|
|
94
|
+
async with session.get(URL(fetch_url, encoded=True)) as response:
|
|
95
|
+
response.raise_for_status()
|
|
96
|
+
return await response.json()
|
|
97
|
+
|
|
98
|
+
async def fetch_metadata(
|
|
99
|
+
self, url: str, options: Optional[RequestOptions] = None
|
|
100
|
+
) -> Dict[str, Union[bool, Dict[str, Union[str, int]]]]:
|
|
101
|
+
fetch_url = self.build_metadata_url(url, options)
|
|
102
|
+
async with aiohttp.ClientSession() as session:
|
|
103
|
+
async with session.get(URL(fetch_url, encoded=True)) as response:
|
|
104
|
+
response.raise_for_status()
|
|
105
|
+
return await response.json()
|
|
106
|
+
|
|
107
|
+
async def fetch_animated(self, url: str, options: Optional[RequestOptions] = None) -> bytes:
|
|
108
|
+
fetch_url = self.build_animated_url(url, options)
|
|
109
|
+
async with aiohttp.ClientSession() as session:
|
|
110
|
+
async with session.get(URL(fetch_url, encoded=True)) as response:
|
|
111
|
+
response.raise_for_status()
|
|
112
|
+
return await response.read()
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: capture-sdk
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Python SDK for Capture - Screenshot and content extraction API
|
|
5
|
+
Project-URL: Homepage, https://capture.page
|
|
6
|
+
Project-URL: Documentation, https://docs.capture.page
|
|
7
|
+
Project-URL: Repository, https://github.com/techulus/capture-py
|
|
8
|
+
Author-email: Capture Team <support@capture.page>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: capture,content extraction,pdf,screenshot,web scraping
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Requires-Python: >=3.8
|
|
23
|
+
Requires-Dist: aiohttp>=3.8.0
|
|
24
|
+
Requires-Dist: yarl>=1.9.0
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
# Capture Python SDK
|
|
34
|
+
|
|
35
|
+
Official Python SDK for [Capture](https://capture.page) - Screenshot and content extraction API.
|
|
36
|
+
|
|
37
|
+
## Installation
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install capture-sdk
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Quick Start
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
from capture import Capture
|
|
47
|
+
|
|
48
|
+
client = Capture("your-api-key", "your-api-secret")
|
|
49
|
+
|
|
50
|
+
image_url = client.build_image_url("https://example.com")
|
|
51
|
+
print(image_url)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Features
|
|
55
|
+
|
|
56
|
+
- **Screenshot Capture**: Capture full-page or viewport screenshots as PNG/JPG
|
|
57
|
+
- **PDF Generation**: Convert web pages to PDF documents
|
|
58
|
+
- **Content Extraction**: Extract HTML and text content from web pages
|
|
59
|
+
- **Metadata Extraction**: Get page metadata (title, description, og tags, etc.)
|
|
60
|
+
- **Animated GIFs**: Create animated GIFs of page interactions
|
|
61
|
+
- **Async Support**: Built-in async/await support for all fetch methods
|
|
62
|
+
- **Type Hints**: Full type hint support for better IDE integration
|
|
63
|
+
|
|
64
|
+
## Usage
|
|
65
|
+
|
|
66
|
+
### Initialize the Client
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from capture import Capture
|
|
70
|
+
|
|
71
|
+
client = Capture("your-api-key", "your-api-secret")
|
|
72
|
+
|
|
73
|
+
client_with_edge = Capture("your-api-key", "your-api-secret", {"useEdge": True})
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Building URLs
|
|
77
|
+
|
|
78
|
+
The SDK provides URL builders for each capture type:
|
|
79
|
+
|
|
80
|
+
#### Image Capture
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
image_url = client.build_image_url("https://example.com")
|
|
84
|
+
|
|
85
|
+
image_url_with_options = client.build_image_url(
|
|
86
|
+
"https://example.com",
|
|
87
|
+
{
|
|
88
|
+
"full": True,
|
|
89
|
+
"delay": 2,
|
|
90
|
+
"width": 1920,
|
|
91
|
+
"height": 1080,
|
|
92
|
+
"quality": 90
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
#### PDF Capture
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
pdf_url = client.build_pdf_url("https://example.com")
|
|
101
|
+
|
|
102
|
+
pdf_url_with_options = client.build_pdf_url(
|
|
103
|
+
"https://example.com",
|
|
104
|
+
{
|
|
105
|
+
"full": True,
|
|
106
|
+
"delay": 1
|
|
107
|
+
}
|
|
108
|
+
)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
#### Content Extraction
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
content_url = client.build_content_url("https://example.com")
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
#### Metadata Extraction
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
metadata_url = client.build_metadata_url("https://example.com")
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
#### Animated GIF
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
animated_url = client.build_animated_url("https://example.com")
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Fetching Data (Async)
|
|
130
|
+
|
|
131
|
+
The SDK provides async methods to fetch data directly:
|
|
132
|
+
|
|
133
|
+
#### Fetch Image
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
import asyncio
|
|
137
|
+
|
|
138
|
+
async def main():
|
|
139
|
+
image_data = await client.fetch_image("https://example.com")
|
|
140
|
+
with open("screenshot.png", "wb") as f:
|
|
141
|
+
f.write(image_data)
|
|
142
|
+
|
|
143
|
+
asyncio.run(main())
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
#### Fetch PDF
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
async def main():
|
|
150
|
+
pdf_data = await client.fetch_pdf("https://example.com", {"full": True})
|
|
151
|
+
with open("page.pdf", "wb") as f:
|
|
152
|
+
f.write(pdf_data)
|
|
153
|
+
|
|
154
|
+
asyncio.run(main())
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
#### Fetch Content
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
async def main():
|
|
161
|
+
content = await client.fetch_content("https://example.com")
|
|
162
|
+
print(content["html"])
|
|
163
|
+
print(content["textContent"])
|
|
164
|
+
|
|
165
|
+
asyncio.run(main())
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
#### Fetch Metadata
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
async def main():
|
|
172
|
+
metadata = await client.fetch_metadata("https://example.com")
|
|
173
|
+
print(metadata["metadata"])
|
|
174
|
+
|
|
175
|
+
asyncio.run(main())
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
#### Fetch Animated GIF
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
async def main():
|
|
182
|
+
gif_data = await client.fetch_animated("https://example.com")
|
|
183
|
+
with open("animation.gif", "wb") as f:
|
|
184
|
+
f.write(gif_data)
|
|
185
|
+
|
|
186
|
+
asyncio.run(main())
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## Configuration Options
|
|
190
|
+
|
|
191
|
+
### Constructor Options
|
|
192
|
+
|
|
193
|
+
- `useEdge` (bool): Use edge.capture.page instead of cdn.capture.page for faster response times
|
|
194
|
+
|
|
195
|
+
## API Endpoints
|
|
196
|
+
|
|
197
|
+
The SDK supports two base URLs:
|
|
198
|
+
|
|
199
|
+
- **CDN**: `https://cdn.capture.page` (default)
|
|
200
|
+
- **Edge**: `https://edge.capture.page` (when `useEdge: True`)
|
|
201
|
+
|
|
202
|
+
## Type Hints
|
|
203
|
+
|
|
204
|
+
The SDK includes full type hint support:
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
from capture import Capture, RequestOptions
|
|
208
|
+
|
|
209
|
+
options: RequestOptions = {
|
|
210
|
+
"full": True,
|
|
211
|
+
"delay": 2,
|
|
212
|
+
"width": 1920
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
client = Capture("key", "secret")
|
|
216
|
+
url: str = client.build_image_url("https://example.com", options)
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## License
|
|
220
|
+
|
|
221
|
+
MIT
|
|
222
|
+
|
|
223
|
+
## Links
|
|
224
|
+
|
|
225
|
+
- [Website](https://capture.page)
|
|
226
|
+
- [Documentation](https://docs.capture.page)
|
|
227
|
+
- [API Reference](https://docs.capture.page/api)
|
|
228
|
+
- [GitHub](https://github.com/techulus/capture-py)
|
|
229
|
+
|
|
230
|
+
## Support
|
|
231
|
+
|
|
232
|
+
For support, please visit [capture.page](https://capture.page) or open an issue on GitHub.
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
capture/__init__.py,sha256=3RpAQG7CO8xHURLGCra60xZer5NAW7tXfQr-MEAZWJs,135
|
|
2
|
+
capture/client.py,sha256=0rP8PSPp8fGFk6zWgTq5Z6HoaQoHpUcsfQZOp7CFeTc,4614
|
|
3
|
+
capture_sdk-1.0.0.dist-info/METADATA,sha256=At4FktGn1V0Nik0_y1DbPIvE-QIJA5PM9HyOkYCwTUM,5296
|
|
4
|
+
capture_sdk-1.0.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
5
|
+
capture_sdk-1.0.0.dist-info/licenses/LICENSE,sha256=qz-tHQmZDgzuwpEY4svMP2ZdakvId2QPv-VYoU-zybA,1069
|
|
6
|
+
capture_sdk-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024-2025 Capture
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|