contentapi 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 ContentAPI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,294 @@
1
+ Metadata-Version: 2.4
2
+ Name: contentapi
3
+ Version: 0.1.0
4
+ Summary: Official Python SDK for ContentAPI — extract content from any URL
5
+ Project-URL: Homepage, https://getcontentapi.com
6
+ Project-URL: Documentation, https://docs.getcontentapi.com
7
+ Project-URL: Repository, https://github.com/contentapi/contentapi-python
8
+ Project-URL: Issues, https://github.com/contentapi/contentapi-python/issues
9
+ Author-email: ContentAPI <support@getcontentapi.com>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: api-client,content-extraction,contentapi,web-scraping,youtube-transcript
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: Internet :: WWW/HTTP
24
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
+ Classifier: Typing :: Typed
26
+ Requires-Python: >=3.9
27
+ Requires-Dist: httpx<1.0.0,>=0.25.0
28
+ Requires-Dist: pydantic<3.0.0,>=2.0.0
29
+ Provides-Extra: dev
30
+ Requires-Dist: mypy>=1.0; extra == 'dev'
31
+ Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
32
+ Requires-Dist: pytest>=7.0; extra == 'dev'
33
+ Requires-Dist: respx>=0.21; extra == 'dev'
34
+ Requires-Dist: ruff>=0.1; extra == 'dev'
35
+ Description-Content-Type: text/markdown
36
+
37
+ # ContentAPI Python SDK
38
+
39
+ Official Python SDK for [ContentAPI](https://getcontentapi.com) — extract structured content from any URL.
40
+
41
+ [![PyPI version](https://img.shields.io/pypi/v/contentapi.svg)](https://pypi.org/project/contentapi/)
42
+ [![Python](https://img.shields.io/pypi/pyversions/contentapi.svg)](https://pypi.org/project/contentapi/)
43
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
44
+
45
+ ## Features
46
+
47
+ - 🌐 **Web extraction** — Get clean markdown/text from any webpage
48
+ - 🎬 **YouTube** — Transcripts, metadata, and summaries
49
+ - 🐦 **Twitter/X** — Thread and tweet extraction
50
+ - 🤖 **Reddit** — Post extraction
51
+ - 🔍 **Web search** — Search the web programmatically
52
+ - 📦 **Batch** — Extract multiple URLs in a single request
53
+ - ⚡ **Async support** — Full async/await with `httpx`
54
+ - 🔄 **Auto-retry** — Exponential backoff on rate limits and server errors
55
+ - 📐 **Type-safe** — Pydantic v2 models with full type hints
56
+
57
+ ## Installation
58
+
59
+ ```bash
60
+ pip install contentapi
61
+ ```
62
+
63
+ ## Quick Start
64
+
65
+ ```python
66
+ from contentapi import ContentAPI
67
+
68
+ client = ContentAPI(api_key="sk_live_...")
69
+
70
+ # Extract web content
71
+ result = client.web.extract("https://example.com")
72
+ print(result.title) # "Example Domain"
73
+ print(result.content) # Extracted content as markdown
74
+ print(result.word_count) # 17
75
+ ```
76
+
77
+ ## Usage
78
+
79
+ ### Web Extraction
80
+
81
+ ```python
82
+ # Default extraction
83
+ result = client.web.extract("https://example.com")
84
+
85
+ # Specify output format
86
+ result = client.web.extract("https://example.com", format="markdown")
87
+ result = client.web.extract("https://example.com", format="text")
88
+
89
+ # Access structured data
90
+ print(result.title)
91
+ print(result.content)
92
+ print(result.word_count)
93
+ print(result.metadata.language) # "en"
94
+ print(result.metadata.description) # Meta description
95
+
96
+ # Page structure
97
+ for item in result.structure or []:
98
+ print(item.tag, item.text)
99
+ ```
100
+
101
+ ### YouTube
102
+
103
+ ```python
104
+ # Get transcript with segments
105
+ transcript = client.youtube.transcript("https://youtube.com/watch?v=dQw4w9WgXcQ")
106
+ print(transcript.title) # Video title
107
+ print(transcript.channel) # Channel name
108
+ print(transcript.full_text) # All segments joined
109
+ print(transcript.word_count)
110
+
111
+ for segment in transcript.segments:
112
+ print(f"[{segment.start:.1f}s] {segment.text}")
113
+
114
+ # Get video metadata
115
+ metadata = client.youtube.metadata("https://youtube.com/watch?v=dQw4w9WgXcQ")
116
+ print(metadata.title)
117
+ print(metadata.description)
118
+ print(metadata.view_count)
119
+ print(metadata.duration) # seconds
120
+ print(metadata.published_at)
121
+ print(metadata.tags)
122
+ ```
123
+
124
+ ### Twitter / X
125
+
126
+ ```python
127
+ thread = client.twitter.thread("https://x.com/user/status/123456789")
128
+ print(thread.author) # "@user"
129
+ print(thread.content) # Thread text
130
+
131
+ for tweet in thread.tweets or []:
132
+ print(tweet.text, tweet.likes)
133
+ ```
134
+
135
+ ### Reddit
136
+
137
+ ```python
138
+ post = client.reddit.post("https://reddit.com/r/Python/comments/abc123/my_post/")
139
+ print(post.title)
140
+ print(post.subreddit) # "r/Python"
141
+ print(post.author)
142
+ print(post.score)
143
+ print(post.content)
144
+ ```
145
+
146
+ ### Web Search
147
+
148
+ ```python
149
+ results = client.search("python RAG tutorial", count=5)
150
+ print(f"Found {results.total_results} results")
151
+
152
+ for item in results.results:
153
+ print(f"{item.title}: {item.url}")
154
+ print(f" {item.snippet}")
155
+ ```
156
+
157
+ ### Batch Extraction
158
+
159
+ ```python
160
+ batch = client.batch([
161
+ "https://example.com",
162
+ "https://youtube.com/watch?v=dQw4w9WgXcQ",
163
+ "https://x.com/user/status/123",
164
+ ])
165
+
166
+ print(f"{batch.summary.succeeded}/{batch.summary.total} succeeded")
167
+
168
+ for item in batch.results:
169
+ if item.success:
170
+ print(f"✅ {item.url}: {item.data}")
171
+ else:
172
+ print(f"❌ {item.url}: {item.error}")
173
+ ```
174
+
175
+ ### Async Usage
176
+
177
+ ```python
178
+ import asyncio
179
+ from contentapi import ContentAPI
180
+
181
+ async def main():
182
+ async with ContentAPI(api_key="sk_live_...", async_mode=True) as client:
183
+ # All methods return coroutines in async mode
184
+ result = await client.web.extract("https://example.com")
185
+ print(result.title)
186
+
187
+ # Parallel requests
188
+ import asyncio
189
+ web, yt = await asyncio.gather(
190
+ client.web.extract("https://example.com"),
191
+ client.youtube.transcript("https://youtube.com/watch?v=dQw4w9WgXcQ"),
192
+ )
193
+
194
+ asyncio.run(main())
195
+ ```
196
+
197
+ You can also use the async methods explicitly:
198
+
199
+ ```python
200
+ result = await client.web.aextract("https://example.com")
201
+ transcript = await client.youtube.atranscript("https://youtube.com/watch?v=...")
202
+ ```
203
+
204
+ ## Error Handling
205
+
206
+ ```python
207
+ from contentapi import (
208
+ ContentAPI,
209
+ ContentAPIError,
210
+ AuthenticationError,
211
+ RateLimitError,
212
+ QuotaExceededError,
213
+ ExtractionError,
214
+ NotFoundError,
215
+ )
216
+
217
+ client = ContentAPI(api_key="sk_live_...")
218
+
219
+ try:
220
+ result = client.web.extract("https://example.com")
221
+ except AuthenticationError:
222
+ print("Invalid API key!")
223
+ except RateLimitError as e:
224
+ print(f"Rate limited! Retry after {e.retry_after}s")
225
+ except QuotaExceededError:
226
+ print("Out of credits!")
227
+ except ExtractionError as e:
228
+ print(f"Extraction failed: {e.message}")
229
+ except NotFoundError:
230
+ print("Endpoint not found")
231
+ except ContentAPIError as e:
232
+ print(f"API error [{e.status_code}]: {e.message}")
233
+ ```
234
+
235
+ ### Automatic Retries
236
+
237
+ The SDK automatically retries on:
238
+ - **429** — Rate limit exceeded (with exponential backoff)
239
+ - **503** — Service unavailable
240
+ - **Timeouts** — Network timeouts
241
+
242
+ Default: 3 retries with exponential backoff (1s → 2s → 4s).
243
+
244
+ ```python
245
+ # Customize retry behavior
246
+ client = ContentAPI(
247
+ api_key="sk_live_...",
248
+ max_retries=5,
249
+ timeout=30.0,
250
+ )
251
+ ```
252
+
253
+ ## Configuration
254
+
255
+ ```python
256
+ client = ContentAPI(
257
+ api_key="sk_live_...", # Required
258
+ base_url="https://api.example.com", # Custom base URL
259
+ timeout=60.0, # Request timeout in seconds
260
+ max_retries=3, # Max retry attempts
261
+ )
262
+ ```
263
+
264
+ ## Credits Tracking
265
+
266
+ Every response includes credit usage:
267
+
268
+ ```python
269
+ result = client.web.extract("https://example.com")
270
+ print(result.credits_used) # 1
271
+ print(result.credits_remaining) # 99
272
+ ```
273
+
274
+ ## Context Manager
275
+
276
+ ```python
277
+ # Sync
278
+ with ContentAPI(api_key="sk_live_...") as client:
279
+ result = client.web.extract("https://example.com")
280
+
281
+ # Async
282
+ async with ContentAPI(api_key="sk_live_...", async_mode=True) as client:
283
+ result = await client.web.extract("https://example.com")
284
+ ```
285
+
286
+ ## Requirements
287
+
288
+ - Python ≥ 3.9
289
+ - `httpx` ≥ 0.25
290
+ - `pydantic` ≥ 2.0
291
+
292
+ ## License
293
+
294
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,258 @@
1
+ # ContentAPI Python SDK
2
+
3
+ Official Python SDK for [ContentAPI](https://getcontentapi.com) — extract structured content from any URL.
4
+
5
+ [![PyPI version](https://img.shields.io/pypi/v/contentapi.svg)](https://pypi.org/project/contentapi/)
6
+ [![Python](https://img.shields.io/pypi/pyversions/contentapi.svg)](https://pypi.org/project/contentapi/)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
8
+
9
+ ## Features
10
+
11
+ - 🌐 **Web extraction** — Get clean markdown/text from any webpage
12
+ - 🎬 **YouTube** — Transcripts, metadata, and summaries
13
+ - 🐦 **Twitter/X** — Thread and tweet extraction
14
+ - 🤖 **Reddit** — Post extraction
15
+ - 🔍 **Web search** — Search the web programmatically
16
+ - 📦 **Batch** — Extract multiple URLs in a single request
17
+ - ⚡ **Async support** — Full async/await with `httpx`
18
+ - 🔄 **Auto-retry** — Exponential backoff on rate limits and server errors
19
+ - 📐 **Type-safe** — Pydantic v2 models with full type hints
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ pip install contentapi
25
+ ```
26
+
27
+ ## Quick Start
28
+
29
+ ```python
30
+ from contentapi import ContentAPI
31
+
32
+ client = ContentAPI(api_key="sk_live_...")
33
+
34
+ # Extract web content
35
+ result = client.web.extract("https://example.com")
36
+ print(result.title) # "Example Domain"
37
+ print(result.content) # Extracted content as markdown
38
+ print(result.word_count) # 17
39
+ ```
40
+
41
+ ## Usage
42
+
43
+ ### Web Extraction
44
+
45
+ ```python
46
+ # Default extraction
47
+ result = client.web.extract("https://example.com")
48
+
49
+ # Specify output format
50
+ result = client.web.extract("https://example.com", format="markdown")
51
+ result = client.web.extract("https://example.com", format="text")
52
+
53
+ # Access structured data
54
+ print(result.title)
55
+ print(result.content)
56
+ print(result.word_count)
57
+ print(result.metadata.language) # "en"
58
+ print(result.metadata.description) # Meta description
59
+
60
+ # Page structure
61
+ for item in result.structure or []:
62
+ print(item.tag, item.text)
63
+ ```
64
+
65
+ ### YouTube
66
+
67
+ ```python
68
+ # Get transcript with segments
69
+ transcript = client.youtube.transcript("https://youtube.com/watch?v=dQw4w9WgXcQ")
70
+ print(transcript.title) # Video title
71
+ print(transcript.channel) # Channel name
72
+ print(transcript.full_text) # All segments joined
73
+ print(transcript.word_count)
74
+
75
+ for segment in transcript.segments:
76
+ print(f"[{segment.start:.1f}s] {segment.text}")
77
+
78
+ # Get video metadata
79
+ metadata = client.youtube.metadata("https://youtube.com/watch?v=dQw4w9WgXcQ")
80
+ print(metadata.title)
81
+ print(metadata.description)
82
+ print(metadata.view_count)
83
+ print(metadata.duration) # seconds
84
+ print(metadata.published_at)
85
+ print(metadata.tags)
86
+ ```
87
+
88
+ ### Twitter / X
89
+
90
+ ```python
91
+ thread = client.twitter.thread("https://x.com/user/status/123456789")
92
+ print(thread.author) # "@user"
93
+ print(thread.content) # Thread text
94
+
95
+ for tweet in thread.tweets or []:
96
+ print(tweet.text, tweet.likes)
97
+ ```
98
+
99
+ ### Reddit
100
+
101
+ ```python
102
+ post = client.reddit.post("https://reddit.com/r/Python/comments/abc123/my_post/")
103
+ print(post.title)
104
+ print(post.subreddit) # "r/Python"
105
+ print(post.author)
106
+ print(post.score)
107
+ print(post.content)
108
+ ```
109
+
110
+ ### Web Search
111
+
112
+ ```python
113
+ results = client.search("python RAG tutorial", count=5)
114
+ print(f"Found {results.total_results} results")
115
+
116
+ for item in results.results:
117
+ print(f"{item.title}: {item.url}")
118
+ print(f" {item.snippet}")
119
+ ```
120
+
121
+ ### Batch Extraction
122
+
123
+ ```python
124
+ batch = client.batch([
125
+ "https://example.com",
126
+ "https://youtube.com/watch?v=dQw4w9WgXcQ",
127
+ "https://x.com/user/status/123",
128
+ ])
129
+
130
+ print(f"{batch.summary.succeeded}/{batch.summary.total} succeeded")
131
+
132
+ for item in batch.results:
133
+ if item.success:
134
+ print(f"✅ {item.url}: {item.data}")
135
+ else:
136
+ print(f"❌ {item.url}: {item.error}")
137
+ ```
138
+
139
+ ### Async Usage
140
+
141
+ ```python
142
+ import asyncio
143
+ from contentapi import ContentAPI
144
+
145
+ async def main():
146
+ async with ContentAPI(api_key="sk_live_...", async_mode=True) as client:
147
+ # All methods return coroutines in async mode
148
+ result = await client.web.extract("https://example.com")
149
+ print(result.title)
150
+
151
+ # Parallel requests
152
+ import asyncio
153
+ web, yt = await asyncio.gather(
154
+ client.web.extract("https://example.com"),
155
+ client.youtube.transcript("https://youtube.com/watch?v=dQw4w9WgXcQ"),
156
+ )
157
+
158
+ asyncio.run(main())
159
+ ```
160
+
161
+ You can also use the async methods explicitly:
162
+
163
+ ```python
164
+ result = await client.web.aextract("https://example.com")
165
+ transcript = await client.youtube.atranscript("https://youtube.com/watch?v=...")
166
+ ```
167
+
168
+ ## Error Handling
169
+
170
+ ```python
171
+ from contentapi import (
172
+ ContentAPI,
173
+ ContentAPIError,
174
+ AuthenticationError,
175
+ RateLimitError,
176
+ QuotaExceededError,
177
+ ExtractionError,
178
+ NotFoundError,
179
+ )
180
+
181
+ client = ContentAPI(api_key="sk_live_...")
182
+
183
+ try:
184
+ result = client.web.extract("https://example.com")
185
+ except AuthenticationError:
186
+ print("Invalid API key!")
187
+ except RateLimitError as e:
188
+ print(f"Rate limited! Retry after {e.retry_after}s")
189
+ except QuotaExceededError:
190
+ print("Out of credits!")
191
+ except ExtractionError as e:
192
+ print(f"Extraction failed: {e.message}")
193
+ except NotFoundError:
194
+ print("Endpoint not found")
195
+ except ContentAPIError as e:
196
+ print(f"API error [{e.status_code}]: {e.message}")
197
+ ```
198
+
199
+ ### Automatic Retries
200
+
201
+ The SDK automatically retries on:
202
+ - **429** — Rate limit exceeded (with exponential backoff)
203
+ - **503** — Service unavailable
204
+ - **Timeouts** — Network timeouts
205
+
206
+ Default: 3 retries with exponential backoff (1s → 2s → 4s).
207
+
208
+ ```python
209
+ # Customize retry behavior
210
+ client = ContentAPI(
211
+ api_key="sk_live_...",
212
+ max_retries=5,
213
+ timeout=30.0,
214
+ )
215
+ ```
216
+
217
+ ## Configuration
218
+
219
+ ```python
220
+ client = ContentAPI(
221
+ api_key="sk_live_...", # Required
222
+ base_url="https://api.example.com", # Custom base URL
223
+ timeout=60.0, # Request timeout in seconds
224
+ max_retries=3, # Max retry attempts
225
+ )
226
+ ```
227
+
228
+ ## Credits Tracking
229
+
230
+ Every response includes credit usage:
231
+
232
+ ```python
233
+ result = client.web.extract("https://example.com")
234
+ print(result.credits_used) # 1
235
+ print(result.credits_remaining) # 99
236
+ ```
237
+
238
+ ## Context Manager
239
+
240
+ ```python
241
+ # Sync
242
+ with ContentAPI(api_key="sk_live_...") as client:
243
+ result = client.web.extract("https://example.com")
244
+
245
+ # Async
246
+ async with ContentAPI(api_key="sk_live_...", async_mode=True) as client:
247
+ result = await client.web.extract("https://example.com")
248
+ ```
249
+
250
+ ## Requirements
251
+
252
+ - Python ≥ 3.9
253
+ - `httpx` ≥ 0.25
254
+ - `pydantic` ≥ 2.0
255
+
256
+ ## License
257
+
258
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,69 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "contentapi"
7
+ version = "0.1.0"
8
+ description = "Official Python SDK for ContentAPI — extract content from any URL"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ { name = "ContentAPI", email = "support@getcontentapi.com" },
14
+ ]
15
+ keywords = [
16
+ "contentapi",
17
+ "web-scraping",
18
+ "content-extraction",
19
+ "youtube-transcript",
20
+ "api-client",
21
+ ]
22
+ classifiers = [
23
+ "Development Status :: 4 - Beta",
24
+ "Intended Audience :: Developers",
25
+ "License :: OSI Approved :: MIT License",
26
+ "Operating System :: OS Independent",
27
+ "Programming Language :: Python :: 3",
28
+ "Programming Language :: Python :: 3.9",
29
+ "Programming Language :: Python :: 3.10",
30
+ "Programming Language :: Python :: 3.11",
31
+ "Programming Language :: Python :: 3.12",
32
+ "Programming Language :: Python :: 3.13",
33
+ "Typing :: Typed",
34
+ "Topic :: Software Development :: Libraries :: Python Modules",
35
+ "Topic :: Internet :: WWW/HTTP",
36
+ ]
37
+ dependencies = [
38
+ "httpx>=0.25.0,<1.0.0",
39
+ "pydantic>=2.0.0,<3.0.0",
40
+ ]
41
+
42
+ [project.optional-dependencies]
43
+ dev = [
44
+ "pytest>=7.0",
45
+ "pytest-asyncio>=0.21",
46
+ "respx>=0.21",
47
+ "ruff>=0.1",
48
+ "mypy>=1.0",
49
+ ]
50
+
51
+ [project.urls]
52
+ Homepage = "https://getcontentapi.com"
53
+ Documentation = "https://docs.getcontentapi.com"
54
+ Repository = "https://github.com/contentapi/contentapi-python"
55
+ Issues = "https://github.com/contentapi/contentapi-python/issues"
56
+
57
+ [tool.hatch.build.targets.wheel]
58
+ packages = ["src/contentapi"]
59
+
60
+ [tool.ruff]
61
+ target-version = "py39"
62
+ line-length = 100
63
+
64
+ [tool.mypy]
65
+ python_version = "3.9"
66
+ strict = true
67
+
68
+ [tool.pytest.ini_options]
69
+ asyncio_mode = "auto"