capture-sdk 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
capture/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .client import Capture, RequestOptions, RequestType
2
+
3
+ __version__ = "1.0.0"
4
+ __all__ = ["Capture", "RequestOptions", "RequestType"]
capture/client.py ADDED
@@ -0,0 +1,112 @@
1
+ import hashlib
2
+ from typing import Any, Dict, Literal, Optional, Union
3
+ from urllib.parse import urlencode
4
+
5
+ import aiohttp
6
+ from yarl import URL
7
+
8
+ RequestType = Literal["image", "pdf", "content", "metadata", "animated"]
9
+ RequestOptions = Dict[str, Union[str, int, bool]]
10
+
11
+ class Capture:
12
+ API_URL = "https://cdn.capture.page"
13
+ EDGE_URL = "https://edge.capture.page"
14
+
15
+ def __init__(
16
+ self, key: str, secret: str, options: Optional[Dict[str, bool]] = None
17
+ ) -> None:
18
+ self.key = key
19
+ self.secret = secret
20
+ self.options = options or {}
21
+
22
+ def _generate_token(self, secret: str, url: str) -> str:
23
+ token_string = f"{secret}{url}"
24
+ return hashlib.md5(token_string.encode()).hexdigest()
25
+
26
+ def _encode_query_string(self, params: Dict[str, Any]) -> str:
27
+ filtered_params = {}
28
+ for k, v in params.items():
29
+ if v is None:
30
+ continue
31
+ if isinstance(v, bool):
32
+ filtered_params[k] = str(v).lower()
33
+ else:
34
+ filtered_params[k] = v
35
+
36
+ return urlencode(filtered_params, safe="")
37
+
38
+ def _build_url(
39
+ self, url: str, request_type: RequestType, options: Optional[RequestOptions] = None
40
+ ) -> str:
41
+ if not self.key or not self.secret:
42
+ raise ValueError("Key and Secret is required")
43
+
44
+ if url is None:
45
+ raise ValueError("url is required")
46
+
47
+ if not isinstance(url, str):
48
+ raise TypeError("url should be of type string (something like www.google.com)")
49
+
50
+ params = options.copy() if options else {}
51
+ params["url"] = url
52
+
53
+ query_string = self._encode_query_string(params)
54
+ token = self._generate_token(self.secret, query_string)
55
+
56
+ base_url = self.EDGE_URL if self.options.get("useEdge") else self.API_URL
57
+
58
+ return f"{base_url}/{self.key}/{token}/{request_type}?{query_string}"
59
+
60
+ def build_image_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
61
+ return self._build_url(url, "image", options)
62
+
63
+ def build_pdf_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
64
+ return self._build_url(url, "pdf", options)
65
+
66
+ def build_content_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
67
+ return self._build_url(url, "content", options)
68
+
69
+ def build_metadata_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
70
+ return self._build_url(url, "metadata", options)
71
+
72
+ def build_animated_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
73
+ return self._build_url(url, "animated", options)
74
+
75
+ async def fetch_image(self, url: str, options: Optional[RequestOptions] = None) -> bytes:
76
+ fetch_url = self.build_image_url(url, options)
77
+ async with aiohttp.ClientSession() as session:
78
+ async with session.get(URL(fetch_url, encoded=True)) as response:
79
+ response.raise_for_status()
80
+ return await response.read()
81
+
82
+ async def fetch_pdf(self, url: str, options: Optional[RequestOptions] = None) -> bytes:
83
+ fetch_url = self.build_pdf_url(url, options)
84
+ async with aiohttp.ClientSession() as session:
85
+ async with session.get(URL(fetch_url, encoded=True)) as response:
86
+ response.raise_for_status()
87
+ return await response.read()
88
+
89
+ async def fetch_content(
90
+ self, url: str, options: Optional[RequestOptions] = None
91
+ ) -> Dict[str, Union[bool, str]]:
92
+ fetch_url = self.build_content_url(url, options)
93
+ async with aiohttp.ClientSession() as session:
94
+ async with session.get(URL(fetch_url, encoded=True)) as response:
95
+ response.raise_for_status()
96
+ return await response.json()
97
+
98
+ async def fetch_metadata(
99
+ self, url: str, options: Optional[RequestOptions] = None
100
+ ) -> Dict[str, Union[bool, Dict[str, Union[str, int]]]]:
101
+ fetch_url = self.build_metadata_url(url, options)
102
+ async with aiohttp.ClientSession() as session:
103
+ async with session.get(URL(fetch_url, encoded=True)) as response:
104
+ response.raise_for_status()
105
+ return await response.json()
106
+
107
+ async def fetch_animated(self, url: str, options: Optional[RequestOptions] = None) -> bytes:
108
+ fetch_url = self.build_animated_url(url, options)
109
+ async with aiohttp.ClientSession() as session:
110
+ async with session.get(URL(fetch_url, encoded=True)) as response:
111
+ response.raise_for_status()
112
+ return await response.read()
@@ -0,0 +1,232 @@
1
+ Metadata-Version: 2.4
2
+ Name: capture-sdk
3
+ Version: 1.0.0
4
+ Summary: Python SDK for Capture - Screenshot and content extraction API
5
+ Project-URL: Homepage, https://capture.page
6
+ Project-URL: Documentation, https://docs.capture.page
7
+ Project-URL: Repository, https://github.com/techulus/capture-py
8
+ Author-email: Capture Team <support@capture.page>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: capture,content extraction,pdf,screenshot,web scraping
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Requires-Python: >=3.8
23
+ Requires-Dist: aiohttp>=3.8.0
24
+ Requires-Dist: yarl>=1.9.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: black>=23.0.0; extra == 'dev'
27
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
28
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
29
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
30
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
31
+ Description-Content-Type: text/markdown
32
+
33
+ # Capture Python SDK
34
+
35
+ Official Python SDK for [Capture](https://capture.page) - Screenshot and content extraction API.
36
+
37
+ ## Installation
38
+
39
+ ```bash
40
+ pip install capture-sdk
41
+ ```
42
+
43
+ ## Quick Start
44
+
45
+ ```python
46
+ from capture import Capture
47
+
48
+ client = Capture("your-api-key", "your-api-secret")
49
+
50
+ image_url = client.build_image_url("https://example.com")
51
+ print(image_url)
52
+ ```
53
+
54
+ ## Features
55
+
56
+ - **Screenshot Capture**: Capture full-page or viewport screenshots as PNG/JPG
57
+ - **PDF Generation**: Convert web pages to PDF documents
58
+ - **Content Extraction**: Extract HTML and text content from web pages
59
+ - **Metadata Extraction**: Get page metadata (title, description, og tags, etc.)
60
+ - **Animated GIFs**: Create animated GIFs of page interactions
61
+ - **Async Support**: Built-in async/await support for all fetch methods
62
+ - **Type Hints**: Full type hint support for better IDE integration
63
+
64
+ ## Usage
65
+
66
+ ### Initialize the Client
67
+
68
+ ```python
69
+ from capture import Capture
70
+
71
+ client = Capture("your-api-key", "your-api-secret")
72
+
73
+ client_with_edge = Capture("your-api-key", "your-api-secret", {"useEdge": True})
74
+ ```
75
+
76
+ ### Building URLs
77
+
78
+ The SDK provides URL builders for each capture type:
79
+
80
+ #### Image Capture
81
+
82
+ ```python
83
+ image_url = client.build_image_url("https://example.com")
84
+
85
+ image_url_with_options = client.build_image_url(
86
+ "https://example.com",
87
+ {
88
+ "full": True,
89
+ "delay": 2,
90
+ "width": 1920,
91
+ "height": 1080,
92
+ "quality": 90
93
+ }
94
+ )
95
+ ```
96
+
97
+ #### PDF Capture
98
+
99
+ ```python
100
+ pdf_url = client.build_pdf_url("https://example.com")
101
+
102
+ pdf_url_with_options = client.build_pdf_url(
103
+ "https://example.com",
104
+ {
105
+ "full": True,
106
+ "delay": 1
107
+ }
108
+ )
109
+ ```
110
+
111
+ #### Content Extraction
112
+
113
+ ```python
114
+ content_url = client.build_content_url("https://example.com")
115
+ ```
116
+
117
+ #### Metadata Extraction
118
+
119
+ ```python
120
+ metadata_url = client.build_metadata_url("https://example.com")
121
+ ```
122
+
123
+ #### Animated GIF
124
+
125
+ ```python
126
+ animated_url = client.build_animated_url("https://example.com")
127
+ ```
128
+
129
+ ### Fetching Data (Async)
130
+
131
+ The SDK provides async methods to fetch data directly:
132
+
133
+ #### Fetch Image
134
+
135
+ ```python
136
+ import asyncio
137
+
138
+ async def main():
139
+ image_data = await client.fetch_image("https://example.com")
140
+ with open("screenshot.png", "wb") as f:
141
+ f.write(image_data)
142
+
143
+ asyncio.run(main())
144
+ ```
145
+
146
+ #### Fetch PDF
147
+
148
+ ```python
149
+ async def main():
150
+ pdf_data = await client.fetch_pdf("https://example.com", {"full": True})
151
+ with open("page.pdf", "wb") as f:
152
+ f.write(pdf_data)
153
+
154
+ asyncio.run(main())
155
+ ```
156
+
157
+ #### Fetch Content
158
+
159
+ ```python
160
+ async def main():
161
+ content = await client.fetch_content("https://example.com")
162
+ print(content["html"])
163
+ print(content["textContent"])
164
+
165
+ asyncio.run(main())
166
+ ```
167
+
168
+ #### Fetch Metadata
169
+
170
+ ```python
171
+ async def main():
172
+ metadata = await client.fetch_metadata("https://example.com")
173
+ print(metadata["metadata"])
174
+
175
+ asyncio.run(main())
176
+ ```
177
+
178
+ #### Fetch Animated GIF
179
+
180
+ ```python
181
+ async def main():
182
+ gif_data = await client.fetch_animated("https://example.com")
183
+ with open("animation.gif", "wb") as f:
184
+ f.write(gif_data)
185
+
186
+ asyncio.run(main())
187
+ ```
188
+
189
+ ## Configuration Options
190
+
191
+ ### Constructor Options
192
+
193
+ - `useEdge` (bool): Use edge.capture.page instead of cdn.capture.page for faster response times
194
+
195
+ ## API Endpoints
196
+
197
+ The SDK supports two base URLs:
198
+
199
+ - **CDN**: `https://cdn.capture.page` (default)
200
+ - **Edge**: `https://edge.capture.page` (when `useEdge: True`)
201
+
202
+ ## Type Hints
203
+
204
+ The SDK includes full type hint support:
205
+
206
+ ```python
207
+ from capture import Capture, RequestOptions
208
+
209
+ options: RequestOptions = {
210
+ "full": True,
211
+ "delay": 2,
212
+ "width": 1920
213
+ }
214
+
215
+ client = Capture("key", "secret")
216
+ url: str = client.build_image_url("https://example.com", options)
217
+ ```
218
+
219
+ ## License
220
+
221
+ MIT
222
+
223
+ ## Links
224
+
225
+ - [Website](https://capture.page)
226
+ - [Documentation](https://docs.capture.page)
227
+ - [API Reference](https://docs.capture.page/api)
228
+ - [GitHub](https://github.com/techulus/capture-py)
229
+
230
+ ## Support
231
+
232
+ For support, please visit [capture.page](https://capture.page) or open an issue on GitHub.
@@ -0,0 +1,6 @@
1
+ capture/__init__.py,sha256=3RpAQG7CO8xHURLGCra60xZer5NAW7tXfQr-MEAZWJs,135
2
+ capture/client.py,sha256=0rP8PSPp8fGFk6zWgTq5Z6HoaQoHpUcsfQZOp7CFeTc,4614
3
+ capture_sdk-1.0.0.dist-info/METADATA,sha256=At4FktGn1V0Nik0_y1DbPIvE-QIJA5PM9HyOkYCwTUM,5296
4
+ capture_sdk-1.0.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
5
+ capture_sdk-1.0.0.dist-info/licenses/LICENSE,sha256=qz-tHQmZDgzuwpEY4svMP2ZdakvId2QPv-VYoU-zybA,1069
6
+ capture_sdk-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024-2025 Capture
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.