hearsay 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hearsay-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mudassar Awan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
hearsay-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,242 @@
1
+ Metadata-Version: 2.4
2
+ Name: hearsay
3
+ Version: 0.1.0
4
+ Summary: crawl4ai for video & audio — turn any YouTube video, podcast episode, or local recording into clean, timestamped, LLM-ready markdown
5
+ Author: Mudassar Awan
6
+ Author-email: Mudassar Awan <mudassar.awan@snapdev.ai>
7
+ License-Expression: MIT
8
+ License-File: LICENSE
9
+ Requires-Dist: faster-whisper>=1.2.1
10
+ Requires-Dist: feedparser>=6.0.12
11
+ Requires-Dist: pydantic>=2.13.4
12
+ Requires-Dist: typer>=0.26.7
13
+ Requires-Dist: youtube-transcript-api>=1.2.4
14
+ Requires-Dist: yt-dlp>=2026.6.9
15
+ Requires-Dist: mcp>=1.2 ; extra == 'mcp'
16
+ Requires-Python: >=3.11
17
+ Provides-Extra: mcp
18
+ Description-Content-Type: text/markdown
19
+
20
+ # hearsay
21
+
22
+ > **crawl4ai for video & audio.** One command turns any YouTube video, podcast
23
+ > episode, or local recording into clean, timestamped, chunked, LLM-ready
24
+ > markdown — for RAG pipelines and AI agents.
25
+
26
+ [![CI](https://github.com/mudassar531/hearsay/actions/workflows/ci.yml/badge.svg)](https://github.com/mudassar531/hearsay/actions/workflows/ci.yml)
27
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue)](https://www.python.org/downloads/)
28
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green)](LICENSE)
29
+
30
+ ![hearsay in action](demo/demo.gif)
31
+
32
+ ## Why
33
+
34
+ Getting a transcript into your RAG pipeline usually means gluing together
35
+ `yt-dlp`, Whisper, and a pile of timestamp-wrangling scripts — and you still end
36
+ up with one line per caption fragment or an undifferentiated wall of text.
37
+ hearsay does the whole thing in one command and gives you back markdown a human
38
+ *and* a model can read: readable paragraphs, real timestamps, chapter headings,
39
+ and an optional JSON sidecar with a stable schema.
40
+
41
+ ## Install
42
+
43
+ ```bash
44
+ uv tool install hearsay # recommended
45
+ # or
46
+ pipx install hearsay
47
+ # transcription + MCP server support:
48
+ uv tool install "hearsay[mcp]"
49
+ ```
50
+
51
+ > **Pre-release:** hearsay isn't on PyPI yet. Until the first release, install
52
+ > from a checkout:
53
+ >
54
+ > ```bash
55
+ > git clone https://github.com/mudassar531/hearsay
56
+ > cd hearsay
57
+ > uv tool install . # puts `hearsay` on your PATH
58
+ > # or, for development: uv sync && uv run hearsay --help
59
+ > ```
60
+
61
+ **System requirement:** [ffmpeg](#requirements) on your PATH.
62
+
63
+ ## 30-second quickstart
64
+
65
+ ```bash
66
+ # YouTube → markdown via captions (fast — no download)
67
+ hearsay "https://www.youtube.com/watch?v=VIDEO_ID"
68
+
69
+ # Local audio/video → markdown via local Whisper (runs on CPU)
70
+ hearsay talk.mp3
71
+
72
+ # Force Whisper on a YouTube URL, pick a model, also emit JSON
73
+ hearsay "https://youtu.be/VIDEO_ID" --transcribe --model small --json
74
+
75
+ # Music/song? Add --no-vad so the lyrics aren't filtered out as "non-speech"
76
+ hearsay "https://youtu.be/SONG_ID" --no-vad
77
+
78
+ # A podcast feed or YouTube playlist: list, then ingest a selection
79
+ hearsay "https://example.com/feed.xml"
80
+ hearsay "https://example.com/feed.xml" --all --limit 3 --output-dir ./out
81
+ ```
82
+
83
+ No captions on a video? hearsay falls back to local Whisper automatically.
84
+
85
+ ## What you get
86
+
87
+ ```markdown
88
+ ---
89
+ title: "You Would Be a Terrible Leader"
90
+ source: "https://www.youtube.com/watch?v=rStL7niR7gs"
91
+ channel: "CGP Grey"
92
+ duration: "00:18:13"
93
+ ingested: "2026-06-13T10:00:00Z"
94
+ method: "captions"
95
+ language: "en"
96
+ ---
97
+
98
+ # You Would Be a Terrible Leader
99
+
100
+ ## [00:00:00 – 00:05:21]
101
+
102
+ **[00:00:00]** Do you want to rule? Do you see the problems in your country and
103
+ know how to fix them? If only you had the power to do so. Well. You've come to
104
+ the right place. But, before we begin this lesson in political power, ask
105
+ yourself, why don't rulers see as clearly as you...
106
+ ```
107
+
108
+ Pass `--json` for a sidecar matching the [`Transcript` schema](docs/schema.json):
109
+ metadata plus `chunks[]`, each with `start_s`, `end_s`, `section`, and `text` —
110
+ ready to embed.
111
+
112
+ ## How it compares
113
+
114
+ | | **hearsay** | DIY `yt-dlp` + Whisper | markitdown / docling |
115
+ | --- | --- | --- | --- |
116
+ | Input | video & **audio** | video & audio (you wire it) | documents (pdf/docx/pptx) |
117
+ | One command | ✅ | ❌ multi-step plumbing | ✅ (for docs) |
118
+ | Captions-first (no download) | ✅ | ✗ usually re-transcribes | n/a |
119
+ | Timestamps + paragraph grouping | ✅ readable | ✗ raw segments | n/a |
120
+ | Chapters → sections | ✅ | ✗ manual | n/a |
121
+ | Podcasts · playlists · batch | ✅ | ✗ manual | ✗ |
122
+ | JSON sidecar for RAG | ✅ stable schema | ✗ manual | varies |
123
+ | MCP server for agents | ✅ | ✗ | varies |
124
+
125
+ hearsay does **media**; document tools like
126
+ [markitdown](https://github.com/microsoft/markitdown) and
127
+ [docling](https://github.com/docling-project/docling) do **documents**. Use both.
128
+
129
+ ## Give your agent ears
130
+
131
+ hearsay ships an [MCP](https://modelcontextprotocol.io) server so AI agents can
132
+ ingest media themselves. It exposes two tools — `ingest_url(url, transcribe?, lang?)`
133
+ and `ingest_file(path)` — that each return clean, timestamped markdown.
134
+
135
+ ```bash
136
+ uv tool install "hearsay[mcp]"
137
+ hearsay mcp # stdio MCP server (Ctrl-C to stop)
138
+ ```
139
+
140
+ **Claude Code:**
141
+
142
+ ```bash
143
+ claude mcp add hearsay -- hearsay mcp
144
+ ```
145
+
146
+ or add to `.mcp.json` (project) / `~/.claude.json` (user):
147
+
148
+ ```json
149
+ {
150
+ "mcpServers": {
151
+ "hearsay": {
152
+ "type": "stdio",
153
+ "command": "hearsay",
154
+ "args": ["mcp"]
155
+ }
156
+ }
157
+ }
158
+ ```
159
+
160
+ **Claude Desktop** — add to `claude_desktop_config.json` (Settings → Developer →
161
+ Edit Config; macOS: `~/Library/Application Support/Claude/`, Windows:
162
+ `%APPDATA%\Claude\`):
163
+
164
+ ```json
165
+ {
166
+ "mcpServers": {
167
+ "hearsay": {
168
+ "type": "stdio",
169
+ "command": "hearsay",
170
+ "args": ["mcp"],
171
+ "env": {
172
+ "HEARSAY_MODEL": "small"
173
+ }
174
+ }
175
+ }
176
+ }
177
+ ```
178
+
179
+ If `hearsay` is not on the host's PATH, use the absolute path (`which hearsay`),
180
+ or `"command": "python"`, `"args": ["-m", "hearsay", "mcp"]`.
181
+
182
+ Server configuration (env vars, since MCP tool signatures are fixed):
183
+
184
+ | Variable | Default | Effect |
185
+ | --- | --- | --- |
186
+ | `HEARSAY_MODEL` | `small` | Whisper model size (`tiny`…`large-v3`) |
187
+ | `HEARSAY_LANG` | _(unset)_ | Default language: English captions, else Whisper auto-detect |
188
+ | `HEARSAY_VAD` | `1` | Voice-activity filter; set `0` for music/songs |
189
+
190
+ > **Speech vs. music:** hearsay is tuned for spoken audio (podcasts, talks,
191
+ > interviews, meetings), where transcription is accurate. For music, pass
192
+ > `--no-vad` so the vocals aren't discarded — but expect a rough, approximate
193
+ > lyric transcript, since Whisper is a speech model, not a lyrics transcriber.
194
+
195
+ ## CLI reference
196
+
197
+ ```text
198
+ hearsay <SOURCE> [options] SOURCE = YouTube video/playlist URL, podcast RSS, or local file
199
+
200
+ -o, --output PATH Output file for a single source (default ./<id>.md)
201
+ --output-dir PATH Output directory for batch (playlist/feed) ingestion (default ./hearsay-out)
202
+ --lang CODE Language: captions default to English; transcription auto-detects
203
+ --transcribe Force local Whisper even when captions exist
204
+ --model SIZE Whisper model: tiny | base | small | medium | large-v3 (default small)
205
+ --no-vad Disable voice-activity filtering (use for music/songs)
206
+ --json Also write a .json sidecar (Transcript schema)
207
+ --latest Batch: ingest only the most recent item
208
+ --episode N Batch: ingest only item N (1-indexed)
209
+ --all [--limit N] Batch: ingest all items (optionally capped)
210
+ --version Show version
211
+
212
+ hearsay mcp Run the MCP stdio server
213
+ ```
214
+
215
+ ## Requirements
216
+
217
+ - **Python 3.11+**
218
+ - **ffmpeg** on your PATH. hearsay decodes most audio/video directly
219
+ (faster-whisper bundles its own decoder), but ffmpeg is the safe baseline and
220
+ is used for some yt-dlp format merges.
221
+
222
+ | OS | Install ffmpeg |
223
+ | --- | --- |
224
+ | macOS (Homebrew) | `brew install ffmpeg` |
225
+ | Debian / Ubuntu | `sudo apt install ffmpeg` |
226
+ | Fedora | `sudo dnf install ffmpeg` |
227
+ | Arch | `sudo pacman -S ffmpeg` |
228
+ | Windows (winget) | `winget install Gyan.FFmpeg` |
229
+ | Windows (Chocolatey) | `choco install ffmpeg` |
230
+
231
+ The first transcription downloads the chosen Whisper model once (tens of MB to
232
+ ~1.5 GB), then caches it for offline use.
233
+
234
+ ## Contributing
235
+
236
+ See [CONTRIBUTING.md](CONTRIBUTING.md) and the
237
+ [good first issues](docs/good-first-issues.md). hearsay does one thing well —
238
+ media → great markdown — and aims to keep doing exactly that.
239
+
240
+ ## License
241
+
242
+ [MIT](LICENSE)
@@ -0,0 +1,223 @@
1
+ # hearsay
2
+
3
+ > **crawl4ai for video & audio.** One command turns any YouTube video, podcast
4
+ > episode, or local recording into clean, timestamped, chunked, LLM-ready
5
+ > markdown — for RAG pipelines and AI agents.
6
+
7
+ [![CI](https://github.com/mudassar531/hearsay/actions/workflows/ci.yml/badge.svg)](https://github.com/mudassar531/hearsay/actions/workflows/ci.yml)
8
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue)](https://www.python.org/downloads/)
9
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green)](LICENSE)
10
+
11
+ ![hearsay in action](demo/demo.gif)
12
+
13
+ ## Why
14
+
15
+ Getting a transcript into your RAG pipeline usually means gluing together
16
+ `yt-dlp`, Whisper, and a pile of timestamp-wrangling scripts — and you still end
17
+ up with one line per caption fragment or an undifferentiated wall of text.
18
+ hearsay does the whole thing in one command and gives you back markdown a human
19
+ *and* a model can read: readable paragraphs, real timestamps, chapter headings,
20
+ and an optional JSON sidecar with a stable schema.
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ uv tool install hearsay # recommended
26
+ # or
27
+ pipx install hearsay
28
+ # transcription + MCP server support:
29
+ uv tool install "hearsay[mcp]"
30
+ ```
31
+
32
+ > **Pre-release:** hearsay isn't on PyPI yet. Until the first release, install
33
+ > from a checkout:
34
+ >
35
+ > ```bash
36
+ > git clone https://github.com/mudassar531/hearsay
37
+ > cd hearsay
38
+ > uv tool install . # puts `hearsay` on your PATH
39
+ > # or, for development: uv sync && uv run hearsay --help
40
+ > ```
41
+
42
+ **System requirement:** [ffmpeg](#requirements) on your PATH.
43
+
44
+ ## 30-second quickstart
45
+
46
+ ```bash
47
+ # YouTube → markdown via captions (fast — no download)
48
+ hearsay "https://www.youtube.com/watch?v=VIDEO_ID"
49
+
50
+ # Local audio/video → markdown via local Whisper (runs on CPU)
51
+ hearsay talk.mp3
52
+
53
+ # Force Whisper on a YouTube URL, pick a model, also emit JSON
54
+ hearsay "https://youtu.be/VIDEO_ID" --transcribe --model small --json
55
+
56
+ # Music/song? Add --no-vad so the lyrics aren't filtered out as "non-speech"
57
+ hearsay "https://youtu.be/SONG_ID" --no-vad
58
+
59
+ # A podcast feed or YouTube playlist: list, then ingest a selection
60
+ hearsay "https://example.com/feed.xml"
61
+ hearsay "https://example.com/feed.xml" --all --limit 3 --output-dir ./out
62
+ ```
63
+
64
+ No captions on a video? hearsay falls back to local Whisper automatically.
65
+
66
+ ## What you get
67
+
68
+ ```markdown
69
+ ---
70
+ title: "You Would Be a Terrible Leader"
71
+ source: "https://www.youtube.com/watch?v=rStL7niR7gs"
72
+ channel: "CGP Grey"
73
+ duration: "00:18:13"
74
+ ingested: "2026-06-13T10:00:00Z"
75
+ method: "captions"
76
+ language: "en"
77
+ ---
78
+
79
+ # You Would Be a Terrible Leader
80
+
81
+ ## [00:00:00 – 00:05:21]
82
+
83
+ **[00:00:00]** Do you want to rule? Do you see the problems in your country and
84
+ know how to fix them? If only you had the power to do so. Well. You've come to
85
+ the right place. But, before we begin this lesson in political power, ask
86
+ yourself, why don't rulers see as clearly as you...
87
+ ```
88
+
89
+ Pass `--json` for a sidecar matching the [`Transcript` schema](docs/schema.json):
90
+ metadata plus `chunks[]`, each with `start_s`, `end_s`, `section`, and `text` —
91
+ ready to embed.
92
+
93
+ ## How it compares
94
+
95
+ | | **hearsay** | DIY `yt-dlp` + Whisper | markitdown / docling |
96
+ | --- | --- | --- | --- |
97
+ | Input | video & **audio** | video & audio (you wire it) | documents (pdf/docx/pptx) |
98
+ | One command | ✅ | ❌ multi-step plumbing | ✅ (for docs) |
99
+ | Captions-first (no download) | ✅ | ✗ usually re-transcribes | n/a |
100
+ | Timestamps + paragraph grouping | ✅ readable | ✗ raw segments | n/a |
101
+ | Chapters → sections | ✅ | ✗ manual | n/a |
102
+ | Podcasts · playlists · batch | ✅ | ✗ manual | ✗ |
103
+ | JSON sidecar for RAG | ✅ stable schema | ✗ manual | varies |
104
+ | MCP server for agents | ✅ | ✗ | varies |
105
+
106
+ hearsay does **media**; document tools like
107
+ [markitdown](https://github.com/microsoft/markitdown) and
108
+ [docling](https://github.com/docling-project/docling) do **documents**. Use both.
109
+
110
+ ## Give your agent ears
111
+
112
+ hearsay ships an [MCP](https://modelcontextprotocol.io) server so AI agents can
113
+ ingest media themselves. It exposes two tools — `ingest_url(url, transcribe?, lang?)`
114
+ and `ingest_file(path)` — that each return clean, timestamped markdown.
115
+
116
+ ```bash
117
+ uv tool install "hearsay[mcp]"
118
+ hearsay mcp # stdio MCP server (Ctrl-C to stop)
119
+ ```
120
+
121
+ **Claude Code:**
122
+
123
+ ```bash
124
+ claude mcp add hearsay -- hearsay mcp
125
+ ```
126
+
127
+ or add to `.mcp.json` (project) / `~/.claude.json` (user):
128
+
129
+ ```json
130
+ {
131
+ "mcpServers": {
132
+ "hearsay": {
133
+ "type": "stdio",
134
+ "command": "hearsay",
135
+ "args": ["mcp"]
136
+ }
137
+ }
138
+ }
139
+ ```
140
+
141
+ **Claude Desktop** — add to `claude_desktop_config.json` (Settings → Developer →
142
+ Edit Config; macOS: `~/Library/Application Support/Claude/`, Windows:
143
+ `%APPDATA%\Claude\`):
144
+
145
+ ```json
146
+ {
147
+ "mcpServers": {
148
+ "hearsay": {
149
+ "type": "stdio",
150
+ "command": "hearsay",
151
+ "args": ["mcp"],
152
+ "env": {
153
+ "HEARSAY_MODEL": "small"
154
+ }
155
+ }
156
+ }
157
+ }
158
+ ```
159
+
160
+ If `hearsay` is not on the host's PATH, use the absolute path (`which hearsay`),
161
+ or `"command": "python"`, `"args": ["-m", "hearsay", "mcp"]`.
162
+
163
+ Server configuration (env vars, since MCP tool signatures are fixed):
164
+
165
+ | Variable | Default | Effect |
166
+ | --- | --- | --- |
167
+ | `HEARSAY_MODEL` | `small` | Whisper model size (`tiny`…`large-v3`) |
168
+ | `HEARSAY_LANG` | _(unset)_ | Default language: English captions, else Whisper auto-detect |
169
+ | `HEARSAY_VAD` | `1` | Voice-activity filter; set `0` for music/songs |
170
+
171
+ > **Speech vs. music:** hearsay is tuned for spoken audio (podcasts, talks,
172
+ > interviews, meetings), where transcription is accurate. For music, pass
173
+ > `--no-vad` so the vocals aren't discarded — but expect a rough, approximate
174
+ > lyric transcript, since Whisper is a speech model, not a lyrics transcriber.
175
+
176
+ ## CLI reference
177
+
178
+ ```text
179
+ hearsay <SOURCE> [options] SOURCE = YouTube video/playlist URL, podcast RSS, or local file
180
+
181
+ -o, --output PATH Output file for a single source (default ./<id>.md)
182
+ --output-dir PATH Output directory for batch (playlist/feed) ingestion (default ./hearsay-out)
183
+ --lang CODE Language: captions default to English; transcription auto-detects
184
+ --transcribe Force local Whisper even when captions exist
185
+ --model SIZE Whisper model: tiny | base | small | medium | large-v3 (default small)
186
+ --no-vad Disable voice-activity filtering (use for music/songs)
187
+ --json Also write a .json sidecar (Transcript schema)
188
+ --latest Batch: ingest only the most recent item
189
+ --episode N Batch: ingest only item N (1-indexed)
190
+ --all [--limit N] Batch: ingest all items (optionally capped)
191
+ --version Show version
192
+
193
+ hearsay mcp Run the MCP stdio server
194
+ ```
195
+
196
+ ## Requirements
197
+
198
+ - **Python 3.11+**
199
+ - **ffmpeg** on your PATH. hearsay decodes most audio/video directly
200
+ (faster-whisper bundles its own decoder), but ffmpeg is the safe baseline and
201
+ is used for some yt-dlp format merges.
202
+
203
+ | OS | Install ffmpeg |
204
+ | --- | --- |
205
+ | macOS (Homebrew) | `brew install ffmpeg` |
206
+ | Debian / Ubuntu | `sudo apt install ffmpeg` |
207
+ | Fedora | `sudo dnf install ffmpeg` |
208
+ | Arch | `sudo pacman -S ffmpeg` |
209
+ | Windows (winget) | `winget install Gyan.FFmpeg` |
210
+ | Windows (Chocolatey) | `choco install ffmpeg` |
211
+
212
+ The first transcription downloads the chosen Whisper model once (tens of MB to
213
+ ~1.5 GB), then caches it for offline use.
214
+
215
+ ## Contributing
216
+
217
+ See [CONTRIBUTING.md](CONTRIBUTING.md) and the
218
+ [good first issues](docs/good-first-issues.md). hearsay does one thing well —
219
+ media → great markdown — and aims to keep doing exactly that.
220
+
221
+ ## License
222
+
223
+ [MIT](LICENSE)
@@ -0,0 +1,78 @@
1
+ [project]
2
+ name = "hearsay"
3
+ version = "0.1.0"
4
+ description = "crawl4ai for video & audio — turn any YouTube video, podcast episode, or local recording into clean, timestamped, LLM-ready markdown"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Mudassar Awan", email = "mudassar.awan@snapdev.ai" }
8
+ ]
9
+ license = "MIT"
10
+ license-files = ["LICENSE"]
11
+ requires-python = ">=3.11"
12
+ dependencies = [
13
+ "faster-whisper>=1.2.1",
14
+ "feedparser>=6.0.12",
15
+ "pydantic>=2.13.4",
16
+ "typer>=0.26.7",
17
+ "youtube-transcript-api>=1.2.4",
18
+ "yt-dlp>=2026.6.9",
19
+ ]
20
+
21
+ [project.scripts]
22
+ hearsay = "hearsay.cli:app"
23
+
24
+ [project.optional-dependencies]
25
+ mcp = [
26
+ "mcp>=1.2",
27
+ ]
28
+
29
+ [build-system]
30
+ requires = ["uv_build>=0.11.17,<0.12.0"]
31
+ build-backend = "uv_build"
32
+
33
+ [dependency-groups]
34
+ dev = [
35
+ "mcp>=1.2",
36
+ "mypy>=2.1.0",
37
+ "pytest>=9.0.3",
38
+ "ruff>=0.15.17",
39
+ ]
40
+
41
+ [tool.ruff]
42
+ line-length = 100
43
+ src = ["src", "tests"]
44
+
45
+ [tool.ruff.lint]
46
+ # These characters are intentional typography: the en dash appears in
47
+ # time-span section titles, and the curly quotes are matched literally in
48
+ # the grouping module's punctuation regexes (transcripts contain them).
49
+ allowed-confusables = ["–", "“", "”", "‘", "’"]
50
+ select = [
51
+ "E", # pycodestyle errors
52
+ "W", # pycodestyle warnings
53
+ "F", # pyflakes
54
+ "I", # isort
55
+ "UP", # pyupgrade
56
+ "B", # bugbear
57
+ "C4", # comprehensions
58
+ "SIM", # simplify
59
+ "RUF", # ruff-specific
60
+ ]
61
+
62
+ [tool.mypy]
63
+ files = ["src", "tests"]
64
+ python_version = "3.11"
65
+ check_untyped_defs = true
66
+ warn_unused_ignores = true
67
+ # Lenient policy: no strict mode; untyped third-party deps get scoped
68
+ # [[tool.mypy.overrides]] entries instead of a global ignore_missing_imports,
69
+ # so first-party import typos still fail.
70
+
71
+ [[tool.mypy.overrides]]
72
+ # These third-party deps ship no type stubs or py.typed marker.
73
+ module = ["faster_whisper.*", "feedparser.*"]
74
+ ignore_missing_imports = true
75
+
76
+ [tool.pytest.ini_options]
77
+ testpaths = ["tests"]
78
+ addopts = "-q"
@@ -0,0 +1,12 @@
1
+ """hearsay — crawl4ai for video & audio.
2
+
3
+ Turn any YouTube video, podcast episode, or local recording into clean,
4
+ timestamped, LLM-ready markdown.
5
+ """
6
+
7
+ from importlib.metadata import PackageNotFoundError, version
8
+
9
+ try:
10
+ __version__ = version("hearsay")
11
+ except PackageNotFoundError: # pragma: no cover - source tree without an install
12
+ __version__ = "0.0.0.dev0"
@@ -0,0 +1,6 @@
1
+ """Enable ``python -m hearsay`` (used by MCP server configs and tests)."""
2
+
3
+ from hearsay.cli import app
4
+
5
+ if __name__ == "__main__":
6
+ app()