capture-sdk 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+
5
+ *.so
6
+
7
+ .Python
8
+ build/
9
+ develop-eggs/
10
+ dist/
11
+ downloads/
12
+ eggs/
13
+ .eggs/
14
+ lib/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ share/python-wheels/
21
+ *.egg-info/
22
+ .installed.cfg
23
+ *.egg
24
+ MANIFEST
25
+
26
+ .pytest_cache/
27
+ .coverage
28
+ .coverage.*
29
+ htmlcov/
30
+ .tox/
31
+ .nox/
32
+ .hypothesis/
33
+
34
+ .env
35
+ .venv
36
+ env/
37
+ venv/
38
+ ENV/
39
+ env.bak/
40
+ venv.bak/
41
+
42
+ .mypy_cache/
43
+ .dmypy.json
44
+ dmypy.json
45
+
46
+ .ruff_cache
47
+
48
+ .vscode/
49
+ .idea/
50
+ *.swp
51
+ *.swo
52
+ *~
53
+
54
+ .DS_Store
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024-2025 Capture
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,232 @@
1
+ Metadata-Version: 2.4
2
+ Name: capture-sdk
3
+ Version: 1.0.0
4
+ Summary: Python SDK for Capture - Screenshot and content extraction API
5
+ Project-URL: Homepage, https://capture.page
6
+ Project-URL: Documentation, https://docs.capture.page
7
+ Project-URL: Repository, https://github.com/techulus/capture-py
8
+ Author-email: Capture Team <support@capture.page>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: capture,content extraction,pdf,screenshot,web scraping
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Requires-Python: >=3.8
23
+ Requires-Dist: aiohttp>=3.8.0
24
+ Requires-Dist: yarl>=1.9.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: black>=23.0.0; extra == 'dev'
27
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
28
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
29
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
30
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
31
+ Description-Content-Type: text/markdown
32
+
33
+ # Capture Python SDK
34
+
35
+ Official Python SDK for [Capture](https://capture.page) - Screenshot and content extraction API.
36
+
37
+ ## Installation
38
+
39
+ ```bash
40
+ pip install capture-sdk
41
+ ```
42
+
43
+ ## Quick Start
44
+
45
+ ```python
46
+ from capture import Capture
47
+
48
+ client = Capture("your-api-key", "your-api-secret")
49
+
50
+ image_url = client.build_image_url("https://example.com")
51
+ print(image_url)
52
+ ```
53
+
54
+ ## Features
55
+
56
+ - **Screenshot Capture**: Capture full-page or viewport screenshots as PNG/JPG
57
+ - **PDF Generation**: Convert web pages to PDF documents
58
+ - **Content Extraction**: Extract HTML and text content from web pages
59
+ - **Metadata Extraction**: Get page metadata (title, description, og tags, etc.)
60
+ - **Animated GIFs**: Create animated GIFs of page interactions
61
+ - **Async Support**: Built-in async/await support for all fetch methods
62
+ - **Type Hints**: Full type hint support for better IDE integration
63
+
64
+ ## Usage
65
+
66
+ ### Initialize the Client
67
+
68
+ ```python
69
+ from capture import Capture
70
+
71
+ client = Capture("your-api-key", "your-api-secret")
72
+
73
+ client_with_edge = Capture("your-api-key", "your-api-secret", {"useEdge": True})
74
+ ```
75
+
76
+ ### Building URLs
77
+
78
+ The SDK provides URL builders for each capture type:
79
+
80
+ #### Image Capture
81
+
82
+ ```python
83
+ image_url = client.build_image_url("https://example.com")
84
+
85
+ image_url_with_options = client.build_image_url(
86
+ "https://example.com",
87
+ {
88
+ "full": True,
89
+ "delay": 2,
90
+ "width": 1920,
91
+ "height": 1080,
92
+ "quality": 90
93
+ }
94
+ )
95
+ ```
96
+
97
+ #### PDF Capture
98
+
99
+ ```python
100
+ pdf_url = client.build_pdf_url("https://example.com")
101
+
102
+ pdf_url_with_options = client.build_pdf_url(
103
+ "https://example.com",
104
+ {
105
+ "full": True,
106
+ "delay": 1
107
+ }
108
+ )
109
+ ```
110
+
111
+ #### Content Extraction
112
+
113
+ ```python
114
+ content_url = client.build_content_url("https://example.com")
115
+ ```
116
+
117
+ #### Metadata Extraction
118
+
119
+ ```python
120
+ metadata_url = client.build_metadata_url("https://example.com")
121
+ ```
122
+
123
+ #### Animated GIF
124
+
125
+ ```python
126
+ animated_url = client.build_animated_url("https://example.com")
127
+ ```
128
+
129
+ ### Fetching Data (Async)
130
+
131
+ The SDK provides async methods to fetch data directly:
132
+
133
+ #### Fetch Image
134
+
135
+ ```python
136
+ import asyncio
137
+
138
+ async def main():
139
+ image_data = await client.fetch_image("https://example.com")
140
+ with open("screenshot.png", "wb") as f:
141
+ f.write(image_data)
142
+
143
+ asyncio.run(main())
144
+ ```
145
+
146
+ #### Fetch PDF
147
+
148
+ ```python
149
+ async def main():
150
+ pdf_data = await client.fetch_pdf("https://example.com", {"full": True})
151
+ with open("page.pdf", "wb") as f:
152
+ f.write(pdf_data)
153
+
154
+ asyncio.run(main())
155
+ ```
156
+
157
+ #### Fetch Content
158
+
159
+ ```python
160
+ async def main():
161
+ content = await client.fetch_content("https://example.com")
162
+ print(content["html"])
163
+ print(content["textContent"])
164
+
165
+ asyncio.run(main())
166
+ ```
167
+
168
+ #### Fetch Metadata
169
+
170
+ ```python
171
+ async def main():
172
+ metadata = await client.fetch_metadata("https://example.com")
173
+ print(metadata["metadata"])
174
+
175
+ asyncio.run(main())
176
+ ```
177
+
178
+ #### Fetch Animated GIF
179
+
180
+ ```python
181
+ async def main():
182
+ gif_data = await client.fetch_animated("https://example.com")
183
+ with open("animation.gif", "wb") as f:
184
+ f.write(gif_data)
185
+
186
+ asyncio.run(main())
187
+ ```
188
+
189
+ ## Configuration Options
190
+
191
+ ### Constructor Options
192
+
193
+ - `useEdge` (bool): Use edge.capture.page instead of cdn.capture.page for faster response times
194
+
195
+ ## API Endpoints
196
+
197
+ The SDK supports two base URLs:
198
+
199
+ - **CDN**: `https://cdn.capture.page` (default)
200
+ - **Edge**: `https://edge.capture.page` (when `useEdge: True`)
201
+
202
+ ## Type Hints
203
+
204
+ The SDK includes full type hint support:
205
+
206
+ ```python
207
+ from capture import Capture, RequestOptions
208
+
209
+ options: RequestOptions = {
210
+ "full": True,
211
+ "delay": 2,
212
+ "width": 1920
213
+ }
214
+
215
+ client = Capture("key", "secret")
216
+ url: str = client.build_image_url("https://example.com", options)
217
+ ```
218
+
219
+ ## License
220
+
221
+ MIT
222
+
223
+ ## Links
224
+
225
+ - [Website](https://capture.page)
226
+ - [Documentation](https://docs.capture.page)
227
+ - [API Reference](https://docs.capture.page/api)
228
+ - [GitHub](https://github.com/techulus/capture-py)
229
+
230
+ ## Support
231
+
232
+ For support, please visit [capture.page](https://capture.page) or open an issue on GitHub.
@@ -0,0 +1,200 @@
1
+ # Capture Python SDK
2
+
3
+ Official Python SDK for [Capture](https://capture.page) - Screenshot and content extraction API.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install capture-sdk
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```python
14
+ from capture import Capture
15
+
16
+ client = Capture("your-api-key", "your-api-secret")
17
+
18
+ image_url = client.build_image_url("https://example.com")
19
+ print(image_url)
20
+ ```
21
+
22
+ ## Features
23
+
24
+ - **Screenshot Capture**: Capture full-page or viewport screenshots as PNG/JPG
25
+ - **PDF Generation**: Convert web pages to PDF documents
26
+ - **Content Extraction**: Extract HTML and text content from web pages
27
+ - **Metadata Extraction**: Get page metadata (title, description, og tags, etc.)
28
+ - **Animated GIFs**: Create animated GIFs of page interactions
29
+ - **Async Support**: Built-in async/await support for all fetch methods
30
+ - **Type Hints**: Full type hint support for better IDE integration
31
+
32
+ ## Usage
33
+
34
+ ### Initialize the Client
35
+
36
+ ```python
37
+ from capture import Capture
38
+
39
+ client = Capture("your-api-key", "your-api-secret")
40
+
41
+ client_with_edge = Capture("your-api-key", "your-api-secret", {"useEdge": True})
42
+ ```
43
+
44
+ ### Building URLs
45
+
46
+ The SDK provides URL builders for each capture type:
47
+
48
+ #### Image Capture
49
+
50
+ ```python
51
+ image_url = client.build_image_url("https://example.com")
52
+
53
+ image_url_with_options = client.build_image_url(
54
+ "https://example.com",
55
+ {
56
+ "full": True,
57
+ "delay": 2,
58
+ "width": 1920,
59
+ "height": 1080,
60
+ "quality": 90
61
+ }
62
+ )
63
+ ```
64
+
65
+ #### PDF Capture
66
+
67
+ ```python
68
+ pdf_url = client.build_pdf_url("https://example.com")
69
+
70
+ pdf_url_with_options = client.build_pdf_url(
71
+ "https://example.com",
72
+ {
73
+ "full": True,
74
+ "delay": 1
75
+ }
76
+ )
77
+ ```
78
+
79
+ #### Content Extraction
80
+
81
+ ```python
82
+ content_url = client.build_content_url("https://example.com")
83
+ ```
84
+
85
+ #### Metadata Extraction
86
+
87
+ ```python
88
+ metadata_url = client.build_metadata_url("https://example.com")
89
+ ```
90
+
91
+ #### Animated GIF
92
+
93
+ ```python
94
+ animated_url = client.build_animated_url("https://example.com")
95
+ ```
96
+
97
+ ### Fetching Data (Async)
98
+
99
+ The SDK provides async methods to fetch data directly:
100
+
101
+ #### Fetch Image
102
+
103
+ ```python
104
+ import asyncio
105
+
106
+ async def main():
107
+ image_data = await client.fetch_image("https://example.com")
108
+ with open("screenshot.png", "wb") as f:
109
+ f.write(image_data)
110
+
111
+ asyncio.run(main())
112
+ ```
113
+
114
+ #### Fetch PDF
115
+
116
+ ```python
117
+ async def main():
118
+ pdf_data = await client.fetch_pdf("https://example.com", {"full": True})
119
+ with open("page.pdf", "wb") as f:
120
+ f.write(pdf_data)
121
+
122
+ asyncio.run(main())
123
+ ```
124
+
125
+ #### Fetch Content
126
+
127
+ ```python
128
+ async def main():
129
+ content = await client.fetch_content("https://example.com")
130
+ print(content["html"])
131
+ print(content["textContent"])
132
+
133
+ asyncio.run(main())
134
+ ```
135
+
136
+ #### Fetch Metadata
137
+
138
+ ```python
139
+ async def main():
140
+ metadata = await client.fetch_metadata("https://example.com")
141
+ print(metadata["metadata"])
142
+
143
+ asyncio.run(main())
144
+ ```
145
+
146
+ #### Fetch Animated GIF
147
+
148
+ ```python
149
+ async def main():
150
+ gif_data = await client.fetch_animated("https://example.com")
151
+ with open("animation.gif", "wb") as f:
152
+ f.write(gif_data)
153
+
154
+ asyncio.run(main())
155
+ ```
156
+
157
+ ## Configuration Options
158
+
159
+ ### Constructor Options
160
+
161
+ - `useEdge` (bool): Use edge.capture.page instead of cdn.capture.page for faster response times
162
+
163
+ ## API Endpoints
164
+
165
+ The SDK supports two base URLs:
166
+
167
+ - **CDN**: `https://cdn.capture.page` (default)
168
+ - **Edge**: `https://edge.capture.page` (when `useEdge: True`)
169
+
170
+ ## Type Hints
171
+
172
+ The SDK includes full type hint support:
173
+
174
+ ```python
175
+ from capture import Capture, RequestOptions
176
+
177
+ options: RequestOptions = {
178
+ "full": True,
179
+ "delay": 2,
180
+ "width": 1920
181
+ }
182
+
183
+ client = Capture("key", "secret")
184
+ url: str = client.build_image_url("https://example.com", options)
185
+ ```
186
+
187
+ ## License
188
+
189
+ MIT
190
+
191
+ ## Links
192
+
193
+ - [Website](https://capture.page)
194
+ - [Documentation](https://docs.capture.page)
195
+ - [API Reference](https://docs.capture.page/api)
196
+ - [GitHub](https://github.com/techulus/capture-py)
197
+
198
+ ## Support
199
+
200
+ For support, please visit [capture.page](https://capture.page) or open an issue on GitHub.
@@ -0,0 +1,66 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "capture-sdk"
7
+ version = "1.0.0"
8
+ description = "Python SDK for Capture - Screenshot and content extraction API"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Capture Team", email = "support@capture.page"}
14
+ ]
15
+ keywords = ["capture", "screenshot", "pdf", "web scraping", "content extraction"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.8",
22
+ "Programming Language :: Python :: 3.9",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Programming Language :: Python :: 3.12",
26
+ "Programming Language :: Python :: 3.13",
27
+ ]
28
+ dependencies = [
29
+ "aiohttp>=3.8.0",
30
+ "yarl>=1.9.0",
31
+ ]
32
+
33
+ [project.optional-dependencies]
34
+ dev = [
35
+ "pytest>=7.0.0",
36
+ "pytest-asyncio>=0.21.0",
37
+ "black>=23.0.0",
38
+ "mypy>=1.0.0",
39
+ "ruff>=0.1.0",
40
+ ]
41
+
42
+ [project.urls]
43
+ Homepage = "https://capture.page"
44
+ Documentation = "https://docs.capture.page"
45
+ Repository = "https://github.com/techulus/capture-py"
46
+
47
+ [tool.hatch.build.targets.wheel]
48
+ packages = ["src/capture"]
49
+
50
+ [tool.black]
51
+ line-length = 88
52
+ target-version = ["py38"]
53
+
54
+ [tool.ruff]
55
+ line-length = 88
56
+ target-version = "py38"
57
+
58
+ [tool.mypy]
59
+ python_version = "3.8"
60
+ warn_return_any = true
61
+ warn_unused_configs = true
62
+ disallow_untyped_defs = true
63
+
64
+ [tool.pytest.ini_options]
65
+ asyncio_mode = "auto"
66
+ testpaths = ["tests"]
@@ -0,0 +1,6 @@
1
+ [pytest]
2
+ asyncio_mode = auto
3
+ testpaths = tests
4
+ python_files = test_*.py
5
+ python_classes = Test*
6
+ python_functions = test_*
@@ -0,0 +1,4 @@
1
+ from .client import Capture, RequestOptions, RequestType
2
+
3
+ __version__ = "1.0.0"
4
+ __all__ = ["Capture", "RequestOptions", "RequestType"]
@@ -0,0 +1,112 @@
1
+ import hashlib
2
+ from typing import Any, Dict, Literal, Optional, Union
3
+ from urllib.parse import urlencode
4
+
5
+ import aiohttp
6
+ from yarl import URL
7
+
8
+ RequestType = Literal["image", "pdf", "content", "metadata", "animated"]
9
+ RequestOptions = Dict[str, Union[str, int, bool]]
10
+
11
+ class Capture:
12
+ API_URL = "https://cdn.capture.page"
13
+ EDGE_URL = "https://edge.capture.page"
14
+
15
+ def __init__(
16
+ self, key: str, secret: str, options: Optional[Dict[str, bool]] = None
17
+ ) -> None:
18
+ self.key = key
19
+ self.secret = secret
20
+ self.options = options or {}
21
+
22
+ def _generate_token(self, secret: str, url: str) -> str:
23
+ token_string = f"{secret}{url}"
24
+ return hashlib.md5(token_string.encode()).hexdigest()
25
+
26
+ def _encode_query_string(self, params: Dict[str, Any]) -> str:
27
+ filtered_params = {}
28
+ for k, v in params.items():
29
+ if v is None:
30
+ continue
31
+ if isinstance(v, bool):
32
+ filtered_params[k] = str(v).lower()
33
+ else:
34
+ filtered_params[k] = v
35
+
36
+ return urlencode(filtered_params, safe="")
37
+
38
+ def _build_url(
39
+ self, url: str, request_type: RequestType, options: Optional[RequestOptions] = None
40
+ ) -> str:
41
+ if not self.key or not self.secret:
42
+ raise ValueError("Key and Secret is required")
43
+
44
+ if url is None:
45
+ raise ValueError("url is required")
46
+
47
+ if not isinstance(url, str):
48
+ raise TypeError("url should be of type string (something like www.google.com)")
49
+
50
+ params = options.copy() if options else {}
51
+ params["url"] = url
52
+
53
+ query_string = self._encode_query_string(params)
54
+ token = self._generate_token(self.secret, query_string)
55
+
56
+ base_url = self.EDGE_URL if self.options.get("useEdge") else self.API_URL
57
+
58
+ return f"{base_url}/{self.key}/{token}/{request_type}?{query_string}"
59
+
60
+ def build_image_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
61
+ return self._build_url(url, "image", options)
62
+
63
+ def build_pdf_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
64
+ return self._build_url(url, "pdf", options)
65
+
66
+ def build_content_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
67
+ return self._build_url(url, "content", options)
68
+
69
+ def build_metadata_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
70
+ return self._build_url(url, "metadata", options)
71
+
72
+ def build_animated_url(self, url: str, options: Optional[RequestOptions] = None) -> str:
73
+ return self._build_url(url, "animated", options)
74
+
75
+ async def fetch_image(self, url: str, options: Optional[RequestOptions] = None) -> bytes:
76
+ fetch_url = self.build_image_url(url, options)
77
+ async with aiohttp.ClientSession() as session:
78
+ async with session.get(URL(fetch_url, encoded=True)) as response:
79
+ response.raise_for_status()
80
+ return await response.read()
81
+
82
+ async def fetch_pdf(self, url: str, options: Optional[RequestOptions] = None) -> bytes:
83
+ fetch_url = self.build_pdf_url(url, options)
84
+ async with aiohttp.ClientSession() as session:
85
+ async with session.get(URL(fetch_url, encoded=True)) as response:
86
+ response.raise_for_status()
87
+ return await response.read()
88
+
89
+ async def fetch_content(
90
+ self, url: str, options: Optional[RequestOptions] = None
91
+ ) -> Dict[str, Union[bool, str]]:
92
+ fetch_url = self.build_content_url(url, options)
93
+ async with aiohttp.ClientSession() as session:
94
+ async with session.get(URL(fetch_url, encoded=True)) as response:
95
+ response.raise_for_status()
96
+ return await response.json()
97
+
98
+ async def fetch_metadata(
99
+ self, url: str, options: Optional[RequestOptions] = None
100
+ ) -> Dict[str, Union[bool, Dict[str, Union[str, int]]]]:
101
+ fetch_url = self.build_metadata_url(url, options)
102
+ async with aiohttp.ClientSession() as session:
103
+ async with session.get(URL(fetch_url, encoded=True)) as response:
104
+ response.raise_for_status()
105
+ return await response.json()
106
+
107
+ async def fetch_animated(self, url: str, options: Optional[RequestOptions] = None) -> bytes:
108
+ fetch_url = self.build_animated_url(url, options)
109
+ async with aiohttp.ClientSession() as session:
110
+ async with session.get(URL(fetch_url, encoded=True)) as response:
111
+ response.raise_for_status()
112
+ return await response.read()
File without changes