PyPI - contentapi - Versions diffs - 0.1.0__tar.gz - Mend

contentapi 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

contentapi-0.1.0/.gitignore +0 -0
contentapi-0.1.0/LICENSE +21 -0
contentapi-0.1.0/PKG-INFO +294 -0
contentapi-0.1.0/README.md +258 -0
contentapi-0.1.0/pyproject.toml +69 -0
contentapi-0.1.0/src/contentapi/__init__.py +108 -0
contentapi-0.1.0/src/contentapi/_async/__init__.py +0 -0
contentapi-0.1.0/src/contentapi/_async/client.py +256 -0
contentapi-0.1.0/src/contentapi/client.py +293 -0
contentapi-0.1.0/src/contentapi/exceptions.py +84 -0
contentapi-0.1.0/src/contentapi/models.py +429 -0
contentapi-0.1.0/src/contentapi/resources/__init__.py +21 -0
contentapi-0.1.0/src/contentapi/resources/ai.py +154 -0
contentapi-0.1.0/src/contentapi/resources/batch.py +60 -0
contentapi-0.1.0/src/contentapi/resources/crawl.py +170 -0
contentapi-0.1.0/src/contentapi/resources/reddit.py +53 -0
contentapi-0.1.0/src/contentapi/resources/search.py +73 -0
contentapi-0.1.0/src/contentapi/resources/twitter.py +55 -0
contentapi-0.1.0/src/contentapi/resources/web.py +94 -0
contentapi-0.1.0/src/contentapi/resources/youtube.py +226 -0
contentapi-0.1.0/tests/__init__.py +0 -0
contentapi-0.1.0/tests/test_client.py +304 -0

contentapi-0.1.0/.gitignore ADDED Viewed

Binary file

contentapi-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 ContentAPI
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

contentapi-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,294 @@
+Metadata-Version: 2.4
+Name: contentapi
+Version: 0.1.0
+Summary: Official Python SDK for ContentAPI — extract content from any URL
+Project-URL: Homepage, https://getcontentapi.com
+Project-URL: Documentation, https://docs.getcontentapi.com
+Project-URL: Repository, https://github.com/contentapi/contentapi-python
+Project-URL: Issues, https://github.com/contentapi/contentapi-python/issues
+Author-email: ContentAPI <support@getcontentapi.com>
+License: MIT
+License-File: LICENSE
+Keywords: api-client,content-extraction,contentapi,web-scraping,youtube-transcript
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Internet :: WWW/HTTP
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Typing :: Typed
+Requires-Python: >=3.9
+Requires-Dist: httpx<1.0.0,>=0.25.0
+Requires-Dist: pydantic<3.0.0,>=2.0.0
+Provides-Extra: dev
+Requires-Dist: mypy>=1.0; extra == 'dev'
+Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
+Requires-Dist: pytest>=7.0; extra == 'dev'
+Requires-Dist: respx>=0.21; extra == 'dev'
+Requires-Dist: ruff>=0.1; extra == 'dev'
+Description-Content-Type: text/markdown
+# ContentAPI Python SDK
+Official Python SDK for [ContentAPI](https://getcontentapi.com) — extract structured content from any URL.
+[![PyPI version](https://img.shields.io/pypi/v/contentapi.svg)](https://pypi.org/project/contentapi/)
+[![Python](https://img.shields.io/pypi/pyversions/contentapi.svg)](https://pypi.org/project/contentapi/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
+## Features
+- 🌐 **Web extraction** — Get clean markdown/text from any webpage
+- 🎬 **YouTube** — Transcripts, metadata, and summaries
+- 🐦 **Twitter/X** — Thread and tweet extraction
+- 🤖 **Reddit** — Post extraction
+- 🔍 **Web search** — Search the web programmatically
+- 📦 **Batch** — Extract multiple URLs in a single request
+- ⚡ **Async support** — Full async/await with `httpx`
+- 🔄 **Auto-retry** — Exponential backoff on rate limits and server errors
+- 📐 **Type-safe** — Pydantic v2 models with full type hints
+## Installation
+```bash
+pip install contentapi
+```
+## Quick Start
+```python
+from contentapi import ContentAPI
+client = ContentAPI(api_key="sk_live_...")
+# Extract web content
+result = client.web.extract("https://example.com")
+print(result.title)       # "Example Domain"
+print(result.content)     # Extracted content as markdown
+print(result.word_count)  # 17
+```
+## Usage
+### Web Extraction
+```python
+# Default extraction
+result = client.web.extract("https://example.com")
+# Specify output format
+result = client.web.extract("https://example.com", format="markdown")
+result = client.web.extract("https://example.com", format="text")
+# Access structured data
+print(result.title)
+print(result.content)
+print(result.word_count)
+print(result.metadata.language)     # "en"
+print(result.metadata.description)  # Meta description
+# Page structure
+for item in result.structure or []:
+    print(item.tag, item.text)
+```
+### YouTube
+```python
+# Get transcript with segments
+transcript = client.youtube.transcript("https://youtube.com/watch?v=dQw4w9WgXcQ")
+print(transcript.title)      # Video title
+print(transcript.channel)    # Channel name
+print(transcript.full_text)  # All segments joined
+print(transcript.word_count)
+for segment in transcript.segments:
+    print(f"[{segment.start:.1f}s] {segment.text}")
+# Get video metadata
+metadata = client.youtube.metadata("https://youtube.com/watch?v=dQw4w9WgXcQ")
+print(metadata.title)
+print(metadata.description)
+print(metadata.view_count)
+print(metadata.duration)       # seconds
+print(metadata.published_at)
+print(metadata.tags)
+```
+### Twitter / X
+```python
+thread = client.twitter.thread("https://x.com/user/status/123456789")
+print(thread.author)       # "@user"
+print(thread.content)      # Thread text
+for tweet in thread.tweets or []:
+    print(tweet.text, tweet.likes)
+```
+### Reddit
+```python
+post = client.reddit.post("https://reddit.com/r/Python/comments/abc123/my_post/")
+print(post.title)
+print(post.subreddit)    # "r/Python"
+print(post.author)
+print(post.score)
+print(post.content)
+```
+### Web Search
+```python
+results = client.search("python RAG tutorial", count=5)
+print(f"Found {results.total_results} results")
+for item in results.results:
+    print(f"{item.title}: {item.url}")
+    print(f"  {item.snippet}")
+```
+### Batch Extraction
+```python
+batch = client.batch([
+    "https://example.com",
+    "https://youtube.com/watch?v=dQw4w9WgXcQ",
+    "https://x.com/user/status/123",
+])
+print(f"{batch.summary.succeeded}/{batch.summary.total} succeeded")
+for item in batch.results:
+    if item.success:
+        print(f"✅ {item.url}: {item.data}")
+    else:
+        print(f"❌ {item.url}: {item.error}")
+```
+### Async Usage
+```python
+import asyncio
+from contentapi import ContentAPI
+async def main():
+    async with ContentAPI(api_key="sk_live_...", async_mode=True) as client:
+        # All methods return coroutines in async mode
+        result = await client.web.extract("https://example.com")
+        print(result.title)
+        # Parallel requests
+        import asyncio
+        web, yt = await asyncio.gather(
+            client.web.extract("https://example.com"),
+            client.youtube.transcript("https://youtube.com/watch?v=dQw4w9WgXcQ"),
+        )
+asyncio.run(main())
+```
+You can also use the async methods explicitly:
+```python
+result = await client.web.aextract("https://example.com")
+transcript = await client.youtube.atranscript("https://youtube.com/watch?v=...")
+```
+## Error Handling
+```python
+from contentapi import (
+    ContentAPI,
+    ContentAPIError,
+    AuthenticationError,
+    RateLimitError,
+    QuotaExceededError,
+    ExtractionError,
+    NotFoundError,
+)
+client = ContentAPI(api_key="sk_live_...")
+try:
+    result = client.web.extract("https://example.com")
+except AuthenticationError:
+    print("Invalid API key!")
+except RateLimitError as e:
+    print(f"Rate limited! Retry after {e.retry_after}s")
+except QuotaExceededError:
+    print("Out of credits!")
+except ExtractionError as e:
+    print(f"Extraction failed: {e.message}")
+except NotFoundError:
+    print("Endpoint not found")
+except ContentAPIError as e:
+    print(f"API error [{e.status_code}]: {e.message}")
+```
+### Automatic Retries
+The SDK automatically retries on:
+- **429** — Rate limit exceeded (with exponential backoff)
+- **503** — Service unavailable
+- **Timeouts** — Network timeouts
+Default: 3 retries with exponential backoff (1s → 2s → 4s).
+```python
+# Customize retry behavior
+client = ContentAPI(
+    api_key="sk_live_...",
+    max_retries=5,
+    timeout=30.0,
+)
+```
+## Configuration
+```python
+client = ContentAPI(
+    api_key="sk_live_...",               # Required
+    base_url="https://api.example.com",  # Custom base URL
+    timeout=60.0,                        # Request timeout in seconds
+    max_retries=3,                       # Max retry attempts
+)
+```
+## Credits Tracking
+Every response includes credit usage:
+```python
+result = client.web.extract("https://example.com")
+print(result.credits_used)       # 1
+print(result.credits_remaining)  # 99
+```
+## Context Manager
+```python
+# Sync
+with ContentAPI(api_key="sk_live_...") as client:
+    result = client.web.extract("https://example.com")
+# Async
+async with ContentAPI(api_key="sk_live_...", async_mode=True) as client:
+    result = await client.web.extract("https://example.com")
+```
+## Requirements
+- Python ≥ 3.9
+- `httpx` ≥ 0.25
+- `pydantic` ≥ 2.0
+## License
+MIT — see [LICENSE](LICENSE).

contentapi-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,258 @@
+# ContentAPI Python SDK
+Official Python SDK for [ContentAPI](https://getcontentapi.com) — extract structured content from any URL.
+[![PyPI version](https://img.shields.io/pypi/v/contentapi.svg)](https://pypi.org/project/contentapi/)
+[![Python](https://img.shields.io/pypi/pyversions/contentapi.svg)](https://pypi.org/project/contentapi/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
+## Features
+- 🌐 **Web extraction** — Get clean markdown/text from any webpage
+- 🎬 **YouTube** — Transcripts, metadata, and summaries
+- 🐦 **Twitter/X** — Thread and tweet extraction
+- 🤖 **Reddit** — Post extraction
+- 🔍 **Web search** — Search the web programmatically
+- 📦 **Batch** — Extract multiple URLs in a single request
+- ⚡ **Async support** — Full async/await with `httpx`
+- 🔄 **Auto-retry** — Exponential backoff on rate limits and server errors
+- 📐 **Type-safe** — Pydantic v2 models with full type hints
+## Installation
+```bash
+pip install contentapi
+```
+## Quick Start
+```python
+from contentapi import ContentAPI
+client = ContentAPI(api_key="sk_live_...")
+# Extract web content
+result = client.web.extract("https://example.com")
+print(result.title)       # "Example Domain"
+print(result.content)     # Extracted content as markdown
+print(result.word_count)  # 17
+```
+## Usage
+### Web Extraction
+```python
+# Default extraction
+result = client.web.extract("https://example.com")
+# Specify output format
+result = client.web.extract("https://example.com", format="markdown")
+result = client.web.extract("https://example.com", format="text")
+# Access structured data
+print(result.title)
+print(result.content)
+print(result.word_count)
+print(result.metadata.language)     # "en"
+print(result.metadata.description)  # Meta description
+# Page structure
+for item in result.structure or []:
+    print(item.tag, item.text)
+```
+### YouTube
+```python
+# Get transcript with segments
+transcript = client.youtube.transcript("https://youtube.com/watch?v=dQw4w9WgXcQ")
+print(transcript.title)      # Video title
+print(transcript.channel)    # Channel name
+print(transcript.full_text)  # All segments joined
+print(transcript.word_count)
+for segment in transcript.segments:
+    print(f"[{segment.start:.1f}s] {segment.text}")
+# Get video metadata
+metadata = client.youtube.metadata("https://youtube.com/watch?v=dQw4w9WgXcQ")
+print(metadata.title)
+print(metadata.description)
+print(metadata.view_count)
+print(metadata.duration)       # seconds
+print(metadata.published_at)
+print(metadata.tags)
+```
+### Twitter / X
+```python
+thread = client.twitter.thread("https://x.com/user/status/123456789")
+print(thread.author)       # "@user"
+print(thread.content)      # Thread text
+for tweet in thread.tweets or []:
+    print(tweet.text, tweet.likes)
+```
+### Reddit
+```python
+post = client.reddit.post("https://reddit.com/r/Python/comments/abc123/my_post/")
+print(post.title)
+print(post.subreddit)    # "r/Python"
+print(post.author)
+print(post.score)
+print(post.content)
+```
+### Web Search
+```python
+results = client.search("python RAG tutorial", count=5)
+print(f"Found {results.total_results} results")
+for item in results.results:
+    print(f"{item.title}: {item.url}")
+    print(f"  {item.snippet}")
+```
+### Batch Extraction
+```python
+batch = client.batch([
+    "https://example.com",
+    "https://youtube.com/watch?v=dQw4w9WgXcQ",
+    "https://x.com/user/status/123",
+])
+print(f"{batch.summary.succeeded}/{batch.summary.total} succeeded")
+for item in batch.results:
+    if item.success:
+        print(f"✅ {item.url}: {item.data}")
+    else:
+        print(f"❌ {item.url}: {item.error}")
+```
+### Async Usage
+```python
+import asyncio
+from contentapi import ContentAPI
+async def main():
+    async with ContentAPI(api_key="sk_live_...", async_mode=True) as client:
+        # All methods return coroutines in async mode
+        result = await client.web.extract("https://example.com")
+        print(result.title)
+        # Parallel requests
+        import asyncio
+        web, yt = await asyncio.gather(
+            client.web.extract("https://example.com"),
+            client.youtube.transcript("https://youtube.com/watch?v=dQw4w9WgXcQ"),
+        )
+asyncio.run(main())
+```
+You can also use the async methods explicitly:
+```python
+result = await client.web.aextract("https://example.com")
+transcript = await client.youtube.atranscript("https://youtube.com/watch?v=...")
+```
+## Error Handling
+```python
+from contentapi import (
+    ContentAPI,
+    ContentAPIError,
+    AuthenticationError,
+    RateLimitError,
+    QuotaExceededError,
+    ExtractionError,
+    NotFoundError,
+)
+client = ContentAPI(api_key="sk_live_...")
+try:
+    result = client.web.extract("https://example.com")
+except AuthenticationError:
+    print("Invalid API key!")
+except RateLimitError as e:
+    print(f"Rate limited! Retry after {e.retry_after}s")
+except QuotaExceededError:
+    print("Out of credits!")
+except ExtractionError as e:
+    print(f"Extraction failed: {e.message}")
+except NotFoundError:
+    print("Endpoint not found")
+except ContentAPIError as e:
+    print(f"API error [{e.status_code}]: {e.message}")
+```
+### Automatic Retries
+The SDK automatically retries on:
+- **429** — Rate limit exceeded (with exponential backoff)
+- **503** — Service unavailable
+- **Timeouts** — Network timeouts
+Default: 3 retries with exponential backoff (1s → 2s → 4s).
+```python
+# Customize retry behavior
+client = ContentAPI(
+    api_key="sk_live_...",
+    max_retries=5,
+    timeout=30.0,
+)
+```
+## Configuration
+```python
+client = ContentAPI(
+    api_key="sk_live_...",               # Required
+    base_url="https://api.example.com",  # Custom base URL
+    timeout=60.0,                        # Request timeout in seconds
+    max_retries=3,                       # Max retry attempts
+)
+```
+## Credits Tracking
+Every response includes credit usage:
+```python
+result = client.web.extract("https://example.com")
+print(result.credits_used)       # 1
+print(result.credits_remaining)  # 99
+```
+## Context Manager
+```python
+# Sync
+with ContentAPI(api_key="sk_live_...") as client:
+    result = client.web.extract("https://example.com")
+# Async
+async with ContentAPI(api_key="sk_live_...", async_mode=True) as client:
+    result = await client.web.extract("https://example.com")
+```
+## Requirements
+- Python ≥ 3.9
+- `httpx` ≥ 0.25
+- `pydantic` ≥ 2.0
+## License
+MIT — see [LICENSE](LICENSE).

contentapi-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,69 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "contentapi"
+version = "0.1.0"
+description = "Official Python SDK for ContentAPI — extract content from any URL"
+readme = "README.md"
+license = {text = "MIT"}
+requires-python = ">=3.9"
+authors = [
+    { name = "ContentAPI", email = "support@getcontentapi.com" },
+]
+keywords = [
+    "contentapi",
+    "web-scraping",
+    "content-extraction",
+    "youtube-transcript",
+    "api-client",
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Typing :: Typed",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Topic :: Internet :: WWW/HTTP",
+]
+dependencies = [
+    "httpx>=0.25.0,<1.0.0",
+    "pydantic>=2.0.0,<3.0.0",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0",
+    "pytest-asyncio>=0.21",
+    "respx>=0.21",
+    "ruff>=0.1",
+    "mypy>=1.0",
+]
+[project.urls]
+Homepage = "https://getcontentapi.com"
+Documentation = "https://docs.getcontentapi.com"
+Repository = "https://github.com/contentapi/contentapi-python"
+Issues = "https://github.com/contentapi/contentapi-python/issues"
+[tool.hatch.build.targets.wheel]
+packages = ["src/contentapi"]
+[tool.ruff]
+target-version = "py39"
+line-length = 100
+[tool.mypy]
+python_version = "3.9"
+strict = true
+[tool.pytest.ini_options]
+asyncio_mode = "auto"