cast2md 2025.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cast2md-2025.1/LICENSE +21 -0
- cast2md-2025.1/PKG-INFO +208 -0
- cast2md-2025.1/README.md +158 -0
- cast2md-2025.1/pyproject.toml +92 -0
- cast2md-2025.1/setup.cfg +4 -0
- cast2md-2025.1/src/cast2md/__init__.py +5 -0
- cast2md-2025.1/src/cast2md/__main__.py +6 -0
- cast2md-2025.1/src/cast2md/api/__init__.py +1 -0
- cast2md-2025.1/src/cast2md/api/episodes.py +462 -0
- cast2md-2025.1/src/cast2md/api/feeds.py +347 -0
- cast2md-2025.1/src/cast2md/api/itunes.py +53 -0
- cast2md-2025.1/src/cast2md/api/nodes.py +971 -0
- cast2md-2025.1/src/cast2md/api/queue.py +1374 -0
- cast2md-2025.1/src/cast2md/api/runpod.py +459 -0
- cast2md-2025.1/src/cast2md/api/search.py +414 -0
- cast2md-2025.1/src/cast2md/api/settings.py +416 -0
- cast2md-2025.1/src/cast2md/api/system.py +139 -0
- cast2md-2025.1/src/cast2md/cli.py +982 -0
- cast2md-2025.1/src/cast2md/clients/__init__.py +12 -0
- cast2md-2025.1/src/cast2md/clients/itunes.py +146 -0
- cast2md-2025.1/src/cast2md/clients/pocketcasts.py +239 -0
- cast2md-2025.1/src/cast2md/config/__init__.py +5 -0
- cast2md-2025.1/src/cast2md/config/settings.py +259 -0
- cast2md-2025.1/src/cast2md/db/__init__.py +19 -0
- cast2md-2025.1/src/cast2md/db/config.py +157 -0
- cast2md-2025.1/src/cast2md/db/connection.py +210 -0
- cast2md-2025.1/src/cast2md/db/migrations.py +198 -0
- cast2md-2025.1/src/cast2md/db/models.py +261 -0
- cast2md-2025.1/src/cast2md/db/repository.py +2837 -0
- cast2md-2025.1/src/cast2md/db/schema.py +191 -0
- cast2md-2025.1/src/cast2md/db/sql.py +190 -0
- cast2md-2025.1/src/cast2md/distributed/__init__.py +8 -0
- cast2md-2025.1/src/cast2md/distributed/coordinator.py +312 -0
- cast2md-2025.1/src/cast2md/download/__init__.py +5 -0
- cast2md-2025.1/src/cast2md/download/downloader.py +277 -0
- cast2md-2025.1/src/cast2md/export/__init__.py +1 -0
- cast2md-2025.1/src/cast2md/export/formats.py +245 -0
- cast2md-2025.1/src/cast2md/feed/__init__.py +6 -0
- cast2md-2025.1/src/cast2md/feed/discovery.py +382 -0
- cast2md-2025.1/src/cast2md/feed/itunes.py +83 -0
- cast2md-2025.1/src/cast2md/feed/parser.py +317 -0
- cast2md-2025.1/src/cast2md/main.py +221 -0
- cast2md-2025.1/src/cast2md/mcp/__init__.py +5 -0
- cast2md-2025.1/src/cast2md/mcp/__main__.py +49 -0
- cast2md-2025.1/src/cast2md/mcp/client.py +268 -0
- cast2md-2025.1/src/cast2md/mcp/resources.py +364 -0
- cast2md-2025.1/src/cast2md/mcp/server.py +58 -0
- cast2md-2025.1/src/cast2md/mcp/tools.py +941 -0
- cast2md-2025.1/src/cast2md/node/__init__.py +6 -0
- cast2md-2025.1/src/cast2md/node/__main__.py +51 -0
- cast2md-2025.1/src/cast2md/node/config.py +76 -0
- cast2md-2025.1/src/cast2md/node/server.py +243 -0
- cast2md-2025.1/src/cast2md/node/worker.py +974 -0
- cast2md-2025.1/src/cast2md/notifications/__init__.py +1 -0
- cast2md-2025.1/src/cast2md/notifications/ntfy.py +125 -0
- cast2md-2025.1/src/cast2md/scheduler.py +213 -0
- cast2md-2025.1/src/cast2md/search/__init__.py +10 -0
- cast2md-2025.1/src/cast2md/search/embeddings.py +140 -0
- cast2md-2025.1/src/cast2md/search/parser.py +199 -0
- cast2md-2025.1/src/cast2md/search/repository.py +968 -0
- cast2md-2025.1/src/cast2md/services/__init__.py +1 -0
- cast2md-2025.1/src/cast2md/services/pod_setup.py +171 -0
- cast2md-2025.1/src/cast2md/services/runpod_service.py +1444 -0
- cast2md-2025.1/src/cast2md/storage/__init__.py +19 -0
- cast2md-2025.1/src/cast2md/storage/filesystem.py +342 -0
- cast2md-2025.1/src/cast2md/transcription/__init__.py +5 -0
- cast2md-2025.1/src/cast2md/transcription/formats.py +484 -0
- cast2md-2025.1/src/cast2md/transcription/preprocessing.py +217 -0
- cast2md-2025.1/src/cast2md/transcription/providers/__init__.py +92 -0
- cast2md-2025.1/src/cast2md/transcription/providers/base.py +85 -0
- cast2md-2025.1/src/cast2md/transcription/providers/pocketcasts.py +265 -0
- cast2md-2025.1/src/cast2md/transcription/providers/podcast20.py +124 -0
- cast2md-2025.1/src/cast2md/transcription/service.py +899 -0
- cast2md-2025.1/src/cast2md/web/__init__.py +1 -0
- cast2md-2025.1/src/cast2md/web/views.py +994 -0
- cast2md-2025.1/src/cast2md/worker/__init__.py +5 -0
- cast2md-2025.1/src/cast2md/worker/manager.py +849 -0
- cast2md-2025.1/src/cast2md.egg-info/PKG-INFO +208 -0
- cast2md-2025.1/src/cast2md.egg-info/SOURCES.txt +87 -0
- cast2md-2025.1/src/cast2md.egg-info/dependency_links.txt +1 -0
- cast2md-2025.1/src/cast2md.egg-info/entry_points.txt +2 -0
- cast2md-2025.1/src/cast2md.egg-info/requires.txt +37 -0
- cast2md-2025.1/src/cast2md.egg-info/top_level.txt +1 -0
- cast2md-2025.1/tests/test_coordinator.py +146 -0
- cast2md-2025.1/tests/test_export_formats.py +79 -0
- cast2md-2025.1/tests/test_job_repository.py +658 -0
- cast2md-2025.1/tests/test_search.py +297 -0
- cast2md-2025.1/tests/test_semantic_search.py +310 -0
- cast2md-2025.1/tests/test_settings.py +81 -0
cast2md-2025.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Linus Dennert
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
cast2md-2025.1/PKG-INFO
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cast2md
|
|
3
|
+
Version: 2025.1
|
|
4
|
+
Summary: Podcast transcription service - download episodes via RSS and transcribe with Whisper
|
|
5
|
+
Author: cast2md
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Development Status :: 3 - Alpha
|
|
8
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: feedparser>=6.0.0
|
|
17
|
+
Requires-Dist: httpx[socks]>=0.26.0
|
|
18
|
+
Requires-Dist: faster-whisper>=1.0.0
|
|
19
|
+
Requires-Dist: pydantic-settings>=2.0.0
|
|
20
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
21
|
+
Requires-Dist: click>=8.1.0
|
|
22
|
+
Requires-Dist: fastapi>=0.109.0
|
|
23
|
+
Requires-Dist: uvicorn[standard]>=0.27.0
|
|
24
|
+
Requires-Dist: jinja2>=3.1.0
|
|
25
|
+
Requires-Dist: apscheduler>=3.10.0
|
|
26
|
+
Requires-Dist: bleach>=6.0
|
|
27
|
+
Requires-Dist: mcp[cli]>=1.2.0
|
|
28
|
+
Requires-Dist: sentence-transformers>=2.2.0
|
|
29
|
+
Requires-Dist: psycopg2-binary>=2.9.0
|
|
30
|
+
Requires-Dist: pgvector>=0.2.0
|
|
31
|
+
Requires-Dist: runpod>=1.6.0
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
35
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
36
|
+
Provides-Extra: mlx
|
|
37
|
+
Requires-Dist: mlx-whisper>=0.4.0; extra == "mlx"
|
|
38
|
+
Provides-Extra: node
|
|
39
|
+
Requires-Dist: httpx[socks]>=0.26.0; extra == "node"
|
|
40
|
+
Requires-Dist: faster-whisper>=1.0.0; extra == "node"
|
|
41
|
+
Requires-Dist: pydantic-settings>=2.0.0; extra == "node"
|
|
42
|
+
Requires-Dist: python-dotenv>=1.0.0; extra == "node"
|
|
43
|
+
Requires-Dist: click>=8.1.0; extra == "node"
|
|
44
|
+
Requires-Dist: fastapi>=0.109.0; extra == "node"
|
|
45
|
+
Requires-Dist: uvicorn[standard]>=0.27.0; extra == "node"
|
|
46
|
+
Requires-Dist: jinja2>=3.1.0; extra == "node"
|
|
47
|
+
Provides-Extra: node-mlx
|
|
48
|
+
Requires-Dist: mlx-whisper>=0.4.0; extra == "node-mlx"
|
|
49
|
+
Dynamic: license-file
|
|
50
|
+
|
|
51
|
+
# cast2md
|
|
52
|
+
|
|
53
|
+
Podcast transcription service - download episodes via RSS and transcribe with Whisper. Automatically downloads publisher-provided transcripts when available (Podcasting 2.0) or fetches auto-generated transcripts from Pocket Casts.
|
|
54
|
+
|
|
55
|
+
> **Note**: This is a personal project under active development. I'm sharing it in case others find it useful, but I'm not currently providing support or reviewing pull requests.
|
|
56
|
+
|
|
57
|
+
## Features
|
|
58
|
+
|
|
59
|
+
- **iTunes URL Support**: Add podcasts via Apple Podcasts URLs (automatically resolves to RSS)
|
|
60
|
+
- **RSS Feed Management**: Add podcast feeds and automatically discover new episodes
|
|
61
|
+
- **External Transcript Downloads**: Fetches transcripts from Podcasting 2.0 tags and Pocket Casts before falling back to Whisper
|
|
62
|
+
- **Whisper Transcription**: Transcribe audio using faster-whisper or mlx-whisper
|
|
63
|
+
- **Distributed Transcription**: Use remote machines (M4 Macs, GPU PCs) to transcribe in parallel
|
|
64
|
+
- **Full-Text Search**: Search across episode metadata and transcripts
|
|
65
|
+
- **Web Interface**: Manage feeds, view episodes, and monitor progress
|
|
66
|
+
- **REST API**: Full API for integration with other tools
|
|
67
|
+
- **MCP Server**: Claude integration via Model Context Protocol
|
|
68
|
+
|
|
69
|
+
## Installation
|
|
70
|
+
|
|
71
|
+
### Docker
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
git clone https://github.com/meltforce/cast2md.git
|
|
75
|
+
cd cast2md
|
|
76
|
+
docker compose up -d
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Manual Installation
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
# Install uv
|
|
83
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
84
|
+
|
|
85
|
+
# Clone and install
|
|
86
|
+
git clone https://github.com/meltforce/cast2md.git
|
|
87
|
+
cd cast2md
|
|
88
|
+
uv sync --frozen
|
|
89
|
+
|
|
90
|
+
# Configure
|
|
91
|
+
cp .env.example .env
|
|
92
|
+
# Edit .env with your settings
|
|
93
|
+
|
|
94
|
+
# Initialize and run
|
|
95
|
+
uv run cast2md init-db
|
|
96
|
+
uv run cast2md serve
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Configuration
|
|
100
|
+
|
|
101
|
+
Create a `.env` file:
|
|
102
|
+
|
|
103
|
+
```env
|
|
104
|
+
DATABASE_PATH=./data/cast2md.db
|
|
105
|
+
STORAGE_PATH=./data/media
|
|
106
|
+
TEMP_DOWNLOAD_PATH=./data/temp
|
|
107
|
+
|
|
108
|
+
# Whisper settings
|
|
109
|
+
WHISPER_MODEL=medium # tiny, base, small, medium, large-v3
|
|
110
|
+
WHISPER_DEVICE=cpu # cpu or cuda
|
|
111
|
+
WHISPER_COMPUTE_TYPE=int8 # int8, float16, float32
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Whisper Models
|
|
115
|
+
|
|
116
|
+
| Model | Quality | Speed (CPU) | RAM |
|
|
117
|
+
|-------|---------|-------------|-----|
|
|
118
|
+
| tiny | Basic | ~10x realtime | 1 GB |
|
|
119
|
+
| base | Good | ~5x realtime | 2 GB |
|
|
120
|
+
| small | Very good | ~2x realtime | 3 GB |
|
|
121
|
+
| medium | Excellent | ~1x realtime | 6 GB |
|
|
122
|
+
| large-v3 | Best | ~0.3x realtime | 12 GB |
|
|
123
|
+
|
|
124
|
+
## Usage
|
|
125
|
+
|
|
126
|
+
### Web Interface
|
|
127
|
+
|
|
128
|
+
Access at `http://localhost:8000`
|
|
129
|
+
|
|
130
|
+
- **Feeds**: Add and manage podcast RSS feeds
|
|
131
|
+
- **Episodes**: View episodes and transcription status
|
|
132
|
+
- **Search**: Search across titles, descriptions, and transcripts
|
|
133
|
+
- **Admin**: Monitor system health and processing queue
|
|
134
|
+
|
|
135
|
+
### CLI
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
# Add a podcast (RSS or Apple Podcasts URL)
|
|
139
|
+
cast2md add-feed "https://example.com/feed.xml"
|
|
140
|
+
cast2md add-feed "https://podcasts.apple.com/us/podcast/example/id123456"
|
|
141
|
+
|
|
142
|
+
# List and manage
|
|
143
|
+
cast2md list-feeds
|
|
144
|
+
cast2md list-episodes <feed_id>
|
|
145
|
+
|
|
146
|
+
# Process episodes
|
|
147
|
+
cast2md download <episode_id>
|
|
148
|
+
cast2md transcribe <episode_id>
|
|
149
|
+
cast2md process <episode_id> # download + transcribe
|
|
150
|
+
|
|
151
|
+
# Server
|
|
152
|
+
cast2md serve --host 0.0.0.0 --port 8000
|
|
153
|
+
|
|
154
|
+
# Backup/restore
|
|
155
|
+
cast2md backup -o backup.sql
|
|
156
|
+
cast2md restore backup.sql
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### MCP Server (Claude Integration)
|
|
160
|
+
|
|
161
|
+
```json
|
|
162
|
+
{
|
|
163
|
+
"mcpServers": {
|
|
164
|
+
"podcasts": {
|
|
165
|
+
"command": "/path/to/cast2md",
|
|
166
|
+
"args": ["mcp"]
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Available tools: `search_transcripts`, `search_episodes`, `queue_episode`, `add_feed`, `refresh_feed`
|
|
173
|
+
|
|
174
|
+
### Distributed Transcription
|
|
175
|
+
|
|
176
|
+
Run transcription workers on remote machines:
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
# On remote machine (Mac with MLX, GPU PC)
|
|
180
|
+
cast2md node register --server http://server:8000 --name "Worker Name"
|
|
181
|
+
cast2md node start
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
See [Distributed Transcription Setup](docs/distributed-transcription-setup.md) for details.
|
|
185
|
+
|
|
186
|
+
## API
|
|
187
|
+
|
|
188
|
+
| Endpoint | Description |
|
|
189
|
+
|----------|-------------|
|
|
190
|
+
| `GET /api/feeds` | List feeds |
|
|
191
|
+
| `POST /api/feeds` | Add feed |
|
|
192
|
+
| `GET /api/episodes/{id}` | Episode details |
|
|
193
|
+
| `GET /api/episodes/{id}/transcript` | Download transcript |
|
|
194
|
+
| `POST /api/queue/episodes/{id}/process` | Queue download |
|
|
195
|
+
| `POST /api/queue/episodes/{id}/transcribe` | Queue transcription |
|
|
196
|
+
| `GET /api/queue/status` | Queue status |
|
|
197
|
+
|
|
198
|
+
## Development
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
uv sync
|
|
202
|
+
uv run cast2md serve --reload
|
|
203
|
+
uv run pytest
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## License
|
|
207
|
+
|
|
208
|
+
MIT
|
cast2md-2025.1/README.md
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# cast2md
|
|
2
|
+
|
|
3
|
+
Podcast transcription service - download episodes via RSS and transcribe with Whisper. Automatically downloads publisher-provided transcripts when available (Podcasting 2.0) or fetches auto-generated transcripts from Pocket Casts.
|
|
4
|
+
|
|
5
|
+
> **Note**: This is a personal project under active development. I'm sharing it in case others find it useful, but I'm not currently providing support or reviewing pull requests.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **iTunes URL Support**: Add podcasts via Apple Podcasts URLs (automatically resolves to RSS)
|
|
10
|
+
- **RSS Feed Management**: Add podcast feeds and automatically discover new episodes
|
|
11
|
+
- **External Transcript Downloads**: Fetches transcripts from Podcasting 2.0 tags and Pocket Casts before falling back to Whisper
|
|
12
|
+
- **Whisper Transcription**: Transcribe audio using faster-whisper or mlx-whisper
|
|
13
|
+
- **Distributed Transcription**: Use remote machines (M4 Macs, GPU PCs) to transcribe in parallel
|
|
14
|
+
- **Full-Text Search**: Search across episode metadata and transcripts
|
|
15
|
+
- **Web Interface**: Manage feeds, view episodes, and monitor progress
|
|
16
|
+
- **REST API**: Full API for integration with other tools
|
|
17
|
+
- **MCP Server**: Claude integration via Model Context Protocol
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
### Docker
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
git clone https://github.com/meltforce/cast2md.git
|
|
25
|
+
cd cast2md
|
|
26
|
+
docker compose up -d
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Manual Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
# Install uv
|
|
33
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
34
|
+
|
|
35
|
+
# Clone and install
|
|
36
|
+
git clone https://github.com/meltforce/cast2md.git
|
|
37
|
+
cd cast2md
|
|
38
|
+
uv sync --frozen
|
|
39
|
+
|
|
40
|
+
# Configure
|
|
41
|
+
cp .env.example .env
|
|
42
|
+
# Edit .env with your settings
|
|
43
|
+
|
|
44
|
+
# Initialize and run
|
|
45
|
+
uv run cast2md init-db
|
|
46
|
+
uv run cast2md serve
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Configuration
|
|
50
|
+
|
|
51
|
+
Create a `.env` file:
|
|
52
|
+
|
|
53
|
+
```env
|
|
54
|
+
DATABASE_PATH=./data/cast2md.db
|
|
55
|
+
STORAGE_PATH=./data/media
|
|
56
|
+
TEMP_DOWNLOAD_PATH=./data/temp
|
|
57
|
+
|
|
58
|
+
# Whisper settings
|
|
59
|
+
WHISPER_MODEL=medium # tiny, base, small, medium, large-v3
|
|
60
|
+
WHISPER_DEVICE=cpu # cpu or cuda
|
|
61
|
+
WHISPER_COMPUTE_TYPE=int8 # int8, float16, float32
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### Whisper Models
|
|
65
|
+
|
|
66
|
+
| Model | Quality | Speed (CPU) | RAM |
|
|
67
|
+
|-------|---------|-------------|-----|
|
|
68
|
+
| tiny | Basic | ~10x realtime | 1 GB |
|
|
69
|
+
| base | Good | ~5x realtime | 2 GB |
|
|
70
|
+
| small | Very good | ~2x realtime | 3 GB |
|
|
71
|
+
| medium | Excellent | ~1x realtime | 6 GB |
|
|
72
|
+
| large-v3 | Best | ~0.3x realtime | 12 GB |
|
|
73
|
+
|
|
74
|
+
## Usage
|
|
75
|
+
|
|
76
|
+
### Web Interface
|
|
77
|
+
|
|
78
|
+
Access at `http://localhost:8000`
|
|
79
|
+
|
|
80
|
+
- **Feeds**: Add and manage podcast RSS feeds
|
|
81
|
+
- **Episodes**: View episodes and transcription status
|
|
82
|
+
- **Search**: Search across titles, descriptions, and transcripts
|
|
83
|
+
- **Admin**: Monitor system health and processing queue
|
|
84
|
+
|
|
85
|
+
### CLI
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
# Add a podcast (RSS or Apple Podcasts URL)
|
|
89
|
+
cast2md add-feed "https://example.com/feed.xml"
|
|
90
|
+
cast2md add-feed "https://podcasts.apple.com/us/podcast/example/id123456"
|
|
91
|
+
|
|
92
|
+
# List and manage
|
|
93
|
+
cast2md list-feeds
|
|
94
|
+
cast2md list-episodes <feed_id>
|
|
95
|
+
|
|
96
|
+
# Process episodes
|
|
97
|
+
cast2md download <episode_id>
|
|
98
|
+
cast2md transcribe <episode_id>
|
|
99
|
+
cast2md process <episode_id> # download + transcribe
|
|
100
|
+
|
|
101
|
+
# Server
|
|
102
|
+
cast2md serve --host 0.0.0.0 --port 8000
|
|
103
|
+
|
|
104
|
+
# Backup/restore
|
|
105
|
+
cast2md backup -o backup.sql
|
|
106
|
+
cast2md restore backup.sql
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### MCP Server (Claude Integration)
|
|
110
|
+
|
|
111
|
+
```json
|
|
112
|
+
{
|
|
113
|
+
"mcpServers": {
|
|
114
|
+
"podcasts": {
|
|
115
|
+
"command": "/path/to/cast2md",
|
|
116
|
+
"args": ["mcp"]
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Available tools: `search_transcripts`, `search_episodes`, `queue_episode`, `add_feed`, `refresh_feed`
|
|
123
|
+
|
|
124
|
+
### Distributed Transcription
|
|
125
|
+
|
|
126
|
+
Run transcription workers on remote machines:
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
# On remote machine (Mac with MLX, GPU PC)
|
|
130
|
+
cast2md node register --server http://server:8000 --name "Worker Name"
|
|
131
|
+
cast2md node start
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
See [Distributed Transcription Setup](docs/distributed-transcription-setup.md) for details.
|
|
135
|
+
|
|
136
|
+
## API
|
|
137
|
+
|
|
138
|
+
| Endpoint | Description |
|
|
139
|
+
|----------|-------------|
|
|
140
|
+
| `GET /api/feeds` | List feeds |
|
|
141
|
+
| `POST /api/feeds` | Add feed |
|
|
142
|
+
| `GET /api/episodes/{id}` | Episode details |
|
|
143
|
+
| `GET /api/episodes/{id}/transcript` | Download transcript |
|
|
144
|
+
| `POST /api/queue/episodes/{id}/process` | Queue download |
|
|
145
|
+
| `POST /api/queue/episodes/{id}/transcribe` | Queue transcription |
|
|
146
|
+
| `GET /api/queue/status` | Queue status |
|
|
147
|
+
|
|
148
|
+
## Development
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
uv sync
|
|
152
|
+
uv run cast2md serve --reload
|
|
153
|
+
uv run pytest
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## License
|
|
157
|
+
|
|
158
|
+
MIT
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "cast2md"
|
|
7
|
+
version = "2025.01"
|
|
8
|
+
description = "Podcast transcription service - download episodes via RSS and transcribe with Whisper"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "cast2md"}
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 3 - Alpha",
|
|
17
|
+
"Intended Audience :: End Users/Desktop",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
]
|
|
23
|
+
dependencies = [
|
|
24
|
+
"feedparser>=6.0.0",
|
|
25
|
+
"httpx[socks]>=0.26.0",
|
|
26
|
+
"faster-whisper>=1.0.0",
|
|
27
|
+
"pydantic-settings>=2.0.0",
|
|
28
|
+
"python-dotenv>=1.0.0",
|
|
29
|
+
"click>=8.1.0",
|
|
30
|
+
"fastapi>=0.109.0",
|
|
31
|
+
"uvicorn[standard]>=0.27.0",
|
|
32
|
+
"jinja2>=3.1.0",
|
|
33
|
+
"apscheduler>=3.10.0",
|
|
34
|
+
"bleach>=6.0",
|
|
35
|
+
"mcp[cli]>=1.2.0",
|
|
36
|
+
"sentence-transformers>=2.2.0",
|
|
37
|
+
"psycopg2-binary>=2.9.0",
|
|
38
|
+
"pgvector>=0.2.0",
|
|
39
|
+
"runpod>=1.6.0",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[project.optional-dependencies]
|
|
43
|
+
dev = [
|
|
44
|
+
"pytest>=7.0.0",
|
|
45
|
+
"pytest-asyncio>=0.23.0",
|
|
46
|
+
"ruff>=0.1.0",
|
|
47
|
+
]
|
|
48
|
+
mlx = [
|
|
49
|
+
"mlx-whisper>=0.4.0",
|
|
50
|
+
]
|
|
51
|
+
# Minimal node-only installation (~80-120 MB vs ~600 MB full install)
|
|
52
|
+
# Install with: pip install --no-deps cast2md && pip install cast2md[node]
|
|
53
|
+
node = [
|
|
54
|
+
"httpx[socks]>=0.26.0",
|
|
55
|
+
"faster-whisper>=1.0.0",
|
|
56
|
+
"pydantic-settings>=2.0.0",
|
|
57
|
+
"python-dotenv>=1.0.0",
|
|
58
|
+
"click>=8.1.0",
|
|
59
|
+
"fastapi>=0.109.0",
|
|
60
|
+
"uvicorn[standard]>=0.27.0",
|
|
61
|
+
"jinja2>=3.1.0",
|
|
62
|
+
]
|
|
63
|
+
# Add MLX support for Apple Silicon nodes
|
|
64
|
+
node-mlx = [
|
|
65
|
+
"mlx-whisper>=0.4.0",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
[project.scripts]
|
|
69
|
+
cast2md = "cast2md.cli:cli"
|
|
70
|
+
|
|
71
|
+
[tool.setuptools.packages.find]
|
|
72
|
+
where = ["src"]
|
|
73
|
+
|
|
74
|
+
[tool.ruff]
|
|
75
|
+
line-length = 100
|
|
76
|
+
target-version = "py311"
|
|
77
|
+
|
|
78
|
+
[tool.ruff.lint]
|
|
79
|
+
select = ["E", "F", "I", "N", "W", "UP"]
|
|
80
|
+
|
|
81
|
+
[tool.pytest.ini_options]
|
|
82
|
+
testpaths = ["tests"]
|
|
83
|
+
|
|
84
|
+
# Use CPU-only PyTorch (avoid downloading 2GB+ of CUDA dependencies)
|
|
85
|
+
[[tool.uv.index]]
|
|
86
|
+
name = "pytorch-cpu"
|
|
87
|
+
url = "https://download.pytorch.org/whl/cpu"
|
|
88
|
+
explicit = true
|
|
89
|
+
|
|
90
|
+
[tool.uv.sources]
|
|
91
|
+
torch = { index = "pytorch-cpu" }
|
|
92
|
+
triton = { index = "pytorch-cpu" }
|
cast2md-2025.1/setup.cfg
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""API module."""
|