anyscribecli 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anyscribecli-0.3.1/.claude/settings.local.json +13 -0
- anyscribecli-0.3.1/.gitignore +35 -0
- anyscribecli-0.3.1/AGENTS.md +82 -0
- anyscribecli-0.3.1/BACKLOG.md +178 -0
- anyscribecli-0.3.1/CLAUDE.md +129 -0
- anyscribecli-0.3.1/LICENSE +21 -0
- anyscribecli-0.3.1/PKG-INFO +269 -0
- anyscribecli-0.3.1/README.md +229 -0
- anyscribecli-0.3.1/docs/building/COMMIT_CHECKLIST.md +115 -0
- anyscribecli-0.3.1/docs/building/_index.md +11 -0
- anyscribecli-0.3.1/docs/building/architecture.md +70 -0
- anyscribecli-0.3.1/docs/building/downloaders.md +68 -0
- anyscribecli-0.3.1/docs/building/journal/2026-03-26-initial-architecture.md +67 -0
- anyscribecli-0.3.1/docs/building/journal/2026-03-26-v020-full-feature-build.md +49 -0
- anyscribecli-0.3.1/docs/building/journal/2026-03-27-v030-download-media-ux.md +56 -0
- anyscribecli-0.3.1/docs/building/journal/2026-03-29-v031-documentation-accuracy-audit.md +43 -0
- anyscribecli-0.3.1/docs/building/ops/pypi-guide.md +180 -0
- anyscribecli-0.3.1/docs/building/ops/release-checklist.md +206 -0
- anyscribecli-0.3.1/docs/building/ops/whats-automated.md +156 -0
- anyscribecli-0.3.1/docs/building/plans/v0.1.0-original-plan.md +165 -0
- anyscribecli-0.3.1/docs/building/providers.md +79 -0
- anyscribecli-0.3.1/docs/user/commands.md +406 -0
- anyscribecli-0.3.1/docs/user/configuration.md +187 -0
- anyscribecli-0.3.1/docs/user/getting-started.md +190 -0
- anyscribecli-0.3.1/docs/user/providers.md +162 -0
- anyscribecli-0.3.1/install.sh +323 -0
- anyscribecli-0.3.1/pyproject.toml +68 -0
- anyscribecli-0.3.1/src/anyscribecli/__init__.py +3 -0
- anyscribecli-0.3.1/src/anyscribecli/cli/__init__.py +0 -0
- anyscribecli-0.3.1/src/anyscribecli/cli/batch.py +160 -0
- anyscribecli-0.3.1/src/anyscribecli/cli/config_cmd.py +187 -0
- anyscribecli-0.3.1/src/anyscribecli/cli/download.py +160 -0
- anyscribecli-0.3.1/src/anyscribecli/cli/main.py +122 -0
- anyscribecli-0.3.1/src/anyscribecli/cli/onboard.py +289 -0
- anyscribecli-0.3.1/src/anyscribecli/cli/transcribe.py +200 -0
- anyscribecli-0.3.1/src/anyscribecli/config/__init__.py +0 -0
- anyscribecli-0.3.1/src/anyscribecli/config/paths.py +27 -0
- anyscribecli-0.3.1/src/anyscribecli/config/settings.py +86 -0
- anyscribecli-0.3.1/src/anyscribecli/core/__init__.py +0 -0
- anyscribecli-0.3.1/src/anyscribecli/core/audio.py +69 -0
- anyscribecli-0.3.1/src/anyscribecli/core/deps.py +274 -0
- anyscribecli-0.3.1/src/anyscribecli/core/orchestrator.py +98 -0
- anyscribecli-0.3.1/src/anyscribecli/core/updater.py +274 -0
- anyscribecli-0.3.1/src/anyscribecli/downloaders/__init__.py +0 -0
- anyscribecli-0.3.1/src/anyscribecli/downloaders/base.py +32 -0
- anyscribecli-0.3.1/src/anyscribecli/downloaders/instagram.py +169 -0
- anyscribecli-0.3.1/src/anyscribecli/downloaders/registry.py +35 -0
- anyscribecli-0.3.1/src/anyscribecli/downloaders/youtube.py +79 -0
- anyscribecli-0.3.1/src/anyscribecli/providers/__init__.py +36 -0
- anyscribecli-0.3.1/src/anyscribecli/providers/base.py +45 -0
- anyscribecli-0.3.1/src/anyscribecli/providers/elevenlabs.py +146 -0
- anyscribecli-0.3.1/src/anyscribecli/providers/local.py +102 -0
- anyscribecli-0.3.1/src/anyscribecli/providers/openai.py +133 -0
- anyscribecli-0.3.1/src/anyscribecli/providers/openrouter.py +127 -0
- anyscribecli-0.3.1/src/anyscribecli/providers/sargam.py +146 -0
- anyscribecli-0.3.1/src/anyscribecli/vault/__init__.py +0 -0
- anyscribecli-0.3.1/src/anyscribecli/vault/index.py +106 -0
- anyscribecli-0.3.1/src/anyscribecli/vault/scaffold.py +69 -0
- anyscribecli-0.3.1/src/anyscribecli/vault/writer.py +125 -0
- anyscribecli-0.3.1/tests/__init__.py +0 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Read(//Users/rish/thefoundry/**)",
|
|
5
|
+
"Read(//Users/rish/thefoundry/secondbrain/dev/journal/2026-03-29/**)"
|
|
6
|
+
],
|
|
7
|
+
"additionalDirectories": [
|
|
8
|
+
"/Users/rish/thefoundry/secondbrain/dev/journal",
|
|
9
|
+
"/Users/rish/thefoundry/secondbrain",
|
|
10
|
+
"/Users/rish/thefoundry/secondbrain/dev/journal/2026-03-29"
|
|
11
|
+
]
|
|
12
|
+
}
|
|
13
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
.venv/
|
|
9
|
+
*.egg
|
|
10
|
+
|
|
11
|
+
# Environment and secrets
|
|
12
|
+
.env
|
|
13
|
+
*.env.local
|
|
14
|
+
|
|
15
|
+
# Media files (downloaded content)
|
|
16
|
+
*.mp3
|
|
17
|
+
*.mp4
|
|
18
|
+
*.m4a
|
|
19
|
+
*.wav
|
|
20
|
+
*.ogg
|
|
21
|
+
*.flac
|
|
22
|
+
*.webm
|
|
23
|
+
|
|
24
|
+
# IDE
|
|
25
|
+
.vscode/
|
|
26
|
+
.idea/
|
|
27
|
+
|
|
28
|
+
# OS
|
|
29
|
+
.DS_Store
|
|
30
|
+
Thumbs.db
|
|
31
|
+
|
|
32
|
+
# App runtime (these live in ~/.anyscribecli/)
|
|
33
|
+
sessions/
|
|
34
|
+
tmp/
|
|
35
|
+
logs/
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# anyscribecli — Agent Directives
|
|
2
|
+
|
|
3
|
+
## Before Starting Work
|
|
4
|
+
|
|
5
|
+
1. Read `CLAUDE.md` for architecture and patterns
|
|
6
|
+
2. Read `docs/building/_index.md` for recent project history
|
|
7
|
+
3. If your task touches providers or downloaders, read the relevant living doc
|
|
8
|
+
4. Check `docs/building/journal/` for any recent entries related to your task area
|
|
9
|
+
|
|
10
|
+
## Memory Layer
|
|
11
|
+
|
|
12
|
+
`docs/building/journal/` is the project memory. It exists so that agents in future sessions have context about past decisions, bugs, and research.
|
|
13
|
+
|
|
14
|
+
### Reading memory (before work)
|
|
15
|
+
|
|
16
|
+
- Scan `docs/building/_index.md` — it's a newest-first table of all entries
|
|
17
|
+
- Read any entries tagged with your task area
|
|
18
|
+
- This prevents re-investigating solved problems or re-debating settled decisions
|
|
19
|
+
|
|
20
|
+
### Writing memory (after work)
|
|
21
|
+
|
|
22
|
+
After completing significant work, create a journal entry:
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
docs/building/journal/YYYY-MM-DD-<descriptive-slug>.md
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
With frontmatter:
|
|
29
|
+
```yaml
|
|
30
|
+
---
|
|
31
|
+
type: decision|research|troubleshooting|learning
|
|
32
|
+
tags: [relevant, tags]
|
|
33
|
+
tldr: "One-line summary"
|
|
34
|
+
---
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Then prepend a row to `docs/building/_index.md`.
|
|
38
|
+
|
|
39
|
+
## Post-Commit Checklist
|
|
40
|
+
|
|
41
|
+
**After every significant commit**, follow `docs/building/COMMIT_CHECKLIST.md`. It has per-scenario checklists (new command, new provider, new downloader, version bump) and grep commands to catch stale references. This is mandatory.
|
|
42
|
+
|
|
43
|
+
## Documentation Requirements
|
|
44
|
+
|
|
45
|
+
### Developer docs (`docs/building/`)
|
|
46
|
+
- Every PR or significant change MUST include a building doc update
|
|
47
|
+
- If you changed architecture: update `docs/building/architecture.md`
|
|
48
|
+
- If you added/changed a provider: update `docs/building/providers.md`
|
|
49
|
+
- If you added/changed a downloader: update `docs/building/downloaders.md`
|
|
50
|
+
- If you made a non-trivial decision: write a journal entry explaining why
|
|
51
|
+
|
|
52
|
+
### User docs (`docs/user/`)
|
|
53
|
+
- If you added/changed a user-facing command or flag: update `docs/user/commands.md`
|
|
54
|
+
- If you changed config options: update `docs/user/configuration.md`
|
|
55
|
+
- If you changed the onboarding flow: update `docs/user/getting-started.md`
|
|
56
|
+
- User docs target semi-technical users new to CLI — explain jargon, show examples, include troubleshooting
|
|
57
|
+
- Every user doc has frontmatter: `summary`, `read_when`, `title`
|
|
58
|
+
|
|
59
|
+
## Quick Context
|
|
60
|
+
|
|
61
|
+
- CLI entry point: `src/anyscribecli/cli/main.py`
|
|
62
|
+
- Onboarding + provider info: `src/anyscribecli/cli/onboard.py`
|
|
63
|
+
- Config/providers commands: `src/anyscribecli/cli/config_cmd.py`
|
|
64
|
+
- Download command: `src/anyscribecli/cli/download.py`
|
|
65
|
+
- Batch processing: `src/anyscribecli/cli/batch.py`
|
|
66
|
+
- Config loading: `src/anyscribecli/config/settings.py`
|
|
67
|
+
- Path constants: `src/anyscribecli/config/paths.py`
|
|
68
|
+
- Provider ABC + registry: `src/anyscribecli/providers/base.py`, `providers/__init__.py`
|
|
69
|
+
- Downloader ABC + registry: `src/anyscribecli/downloaders/base.py`, `downloaders/registry.py`
|
|
70
|
+
- Core flow: `src/anyscribecli/core/orchestrator.py`
|
|
71
|
+
- Dependency checker: `src/anyscribecli/core/deps.py`
|
|
72
|
+
- Update system: `src/anyscribecli/core/updater.py`
|
|
73
|
+
|
|
74
|
+
## Key Dependencies
|
|
75
|
+
|
|
76
|
+
- `beaupy` — arrow-key selectors in onboarding wizard
|
|
77
|
+
- `instaloader` — Instagram auth + post metadata (main dep, not optional)
|
|
78
|
+
- `faster-whisper` — local transcription (optional, only for `local` provider)
|
|
79
|
+
|
|
80
|
+
## Cross-Platform
|
|
81
|
+
|
|
82
|
+
All code must work on macOS and Linux. Use `pathlib.Path` everywhere. No platform-specific assumptions.
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# Backlog
|
|
2
|
+
|
|
3
|
+
What's built, what's next, and what's on the horizon.
|
|
4
|
+
|
|
5
|
+
## Versioning
|
|
6
|
+
|
|
7
|
+
This project uses **Semantic Versioning** (SemVer): `MAJOR.MINOR.PATCH`
|
|
8
|
+
|
|
9
|
+
- **PATCH** (0.1.0 → 0.1.1): bug fixes, typos, small tweaks
|
|
10
|
+
- **MINOR** (0.1.0 → 0.2.0): new features, new provider, new command — backwards compatible
|
|
11
|
+
- **MAJOR** (0.x → 1.0.0): breaking changes (config format, removed commands, renamed flags)
|
|
12
|
+
|
|
13
|
+
The `0.x` prefix means pre-stable — breaking changes are allowed between minor versions. `1.0.0` signals stability.
|
|
14
|
+
|
|
15
|
+
### Version → Release mapping
|
|
16
|
+
|
|
17
|
+
| Version | Milestone | Status |
|
|
18
|
+
|---------|-----------|--------|
|
|
19
|
+
| 0.1.0 | YouTube + OpenAI MVP | Released 2026-03-26 |
|
|
20
|
+
| 0.2.0 | Full feature build (Instagram, all providers, batch, config, onboarding) | Released 2026-03-26 |
|
|
21
|
+
| 0.3.0 | Download command, media restructure, post-transcription prompts, UX polish | Released 2026-03-27 |
|
|
22
|
+
| 0.3.1 | Documentation accuracy audit — 16 issues fixed across all docs | **Current** |
|
|
23
|
+
| 0.4.0 | Cache/dedup, test suite, error handling | Next |
|
|
24
|
+
| 1.0.0 | Stable: published on PyPI, full test coverage | Future |
|
|
25
|
+
|
|
26
|
+
### How to bump versions
|
|
27
|
+
|
|
28
|
+
Version lives in TWO places (must match):
|
|
29
|
+
- `src/anyscribecli/__init__.py`
|
|
30
|
+
- `pyproject.toml`
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
# After changing both files:
|
|
34
|
+
git add -A && git commit -m "Bump version to X.Y.Z"
|
|
35
|
+
git tag vX.Y.Z
|
|
36
|
+
git push && git push --tags
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## v0.1.0 — YouTube + OpenAI MVP ✅
|
|
42
|
+
|
|
43
|
+
**Released:** 2026-03-26
|
|
44
|
+
|
|
45
|
+
Everything needed to transcribe a YouTube video to markdown:
|
|
46
|
+
|
|
47
|
+
- [x] YouTube download via yt-dlp (optimized audio: 16kHz, mono, 64kbps)
|
|
48
|
+
- [x] OpenAI Whisper transcription (verbose_json, segment timestamps)
|
|
49
|
+
- [x] Audio chunking for files >25MB (18-min chunks)
|
|
50
|
+
- [x] Obsidian vault output with YAML frontmatter
|
|
51
|
+
- [x] Master index (_index.md) + daily processing logs
|
|
52
|
+
- [x] `ascli onboard` — interactive wizard with dependency checking + auto-install
|
|
53
|
+
- [x] `ascli transcribe <url>` — with --provider, --language, --json, --keep-media, --quiet
|
|
54
|
+
- [x] `ascli update` — dual-path updater (git + pip)
|
|
55
|
+
- [x] `ascli doctor` — system health checks
|
|
56
|
+
- [x] `install.sh` — zero-friction installer script
|
|
57
|
+
- [x] CLAUDE.md + AGENTS.md — AI developer instructions
|
|
58
|
+
- [x] Developer memory layer (docs/building/)
|
|
59
|
+
- [x] User documentation (docs/user/) — semi-technical audience
|
|
60
|
+
- [x] MIT license, PyPI-ready metadata
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## v0.2.0 — Instagram + Config + Providers + Batch + Local ✅
|
|
65
|
+
|
|
66
|
+
**Released:** 2026-03-26
|
|
67
|
+
|
|
68
|
+
All features originally planned for v0.2.0–v0.5.0, built in one session:
|
|
69
|
+
|
|
70
|
+
- [x] Instagram downloader (`downloaders/instagram.py`)
|
|
71
|
+
- instaloader Python API with session caching (Dropzone bundle pattern)
|
|
72
|
+
- Auth credentials from config.yaml
|
|
73
|
+
- Reels and posts supported
|
|
74
|
+
- [x] `ascli config show` — display current settings (supports --json)
|
|
75
|
+
- [x] `ascli config set <key> <value>` — dot-notation for nested keys
|
|
76
|
+
- [x] `ascli config path` — print config file location
|
|
77
|
+
- [x] `ascli providers list` — show available providers with active indicator
|
|
78
|
+
- [x] `ascli providers test <name>` — test a provider's API key
|
|
79
|
+
- [x] OpenRouter provider — audio-via-chat using GPT-4o-audio-preview
|
|
80
|
+
- [x] ElevenLabs provider — Scribe v1 STT API, word-level timestamps
|
|
81
|
+
- [x] Sargam/Sarvam provider — Indic languages, auto-chunks to 30s for REST API limit
|
|
82
|
+
- [x] Local provider — faster-whisper, CPU/GPU, no API key, offline
|
|
83
|
+
- [x] `ascli batch <file>` — batch transcribe URLs from a file
|
|
84
|
+
- [x] Updated user docs for all new commands
|
|
85
|
+
- [x] Per-provider API key management in onboarding wizard
|
|
86
|
+
- [x] Instagram credentials in onboarding wizard
|
|
87
|
+
- [x] `output_format: timestamped` — transcript with `[mm:ss]` timestamps per segment
|
|
88
|
+
- [x] Rich progress bar for batch jobs
|
|
89
|
+
- [x] Batch summary in daily log (each item indexed via orchestrator)
|
|
90
|
+
- [x] Provider comparison docs (`docs/user/providers.md`)
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## v0.3.0 — Download, Media Restructure, UX Polish ✅
|
|
95
|
+
|
|
96
|
+
**Released:** 2026-03-27
|
|
97
|
+
|
|
98
|
+
- [x] `ascli download <url>` — download video or audio only, no transcription
|
|
99
|
+
- `--video` (default) and `--audio-only` flags
|
|
100
|
+
- Saves to `~/.anyscribecli/media/video/` or `media/audio/`
|
|
101
|
+
- Supports --clipboard, --json, interactive prompt
|
|
102
|
+
- [x] Media restructured: moved outside workspace, split into audio/ and video/ by platform/date
|
|
103
|
+
- [x] Instagram password moved from config.yaml to .env (security fix)
|
|
104
|
+
- [x] `prompt_download` config: never/ask/always — post-transcription download offer
|
|
105
|
+
- [x] Onboarding wizard: arrow-key selectors (beaupy), post-transcription download step
|
|
106
|
+
- [x] URL validation: catches zsh glob mangling, interactive prompt fallback, --clipboard
|
|
107
|
+
- [x] instaloader promoted to main dependency (was optional)
|
|
108
|
+
- [x] Post-commit checklist (`docs/building/COMMIT_CHECKLIST.md`)
|
|
109
|
+
- [x] `build-with-rish.md` — reusable build reference for future projects
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## v0.4.0 — Cache, Dedup, Quality
|
|
114
|
+
|
|
115
|
+
- [ ] **Duplicate / cache checking** (inspired by AnyScribe web app's FindStamp pattern):
|
|
116
|
+
- Before transcribing: check if URL was already transcribed (lookup by source URL in _index.md or a cache file)
|
|
117
|
+
- Before downloading: check if video/audio already exists in media/
|
|
118
|
+
- If cached, show the existing transcript and ask to re-transcribe or skip
|
|
119
|
+
- `--force` flag to bypass cache and re-transcribe
|
|
120
|
+
- Track cache hits/misses for cost awareness
|
|
121
|
+
- [ ] Full test suite (pytest — unit tests for providers, downloaders, vault, config)
|
|
122
|
+
- [ ] Comprehensive error handling and retry logic (network failures, API rate limits)
|
|
123
|
+
- [ ] Suppress instaloader's noisy retry output (redirect to log file)
|
|
124
|
+
- [ ] `ascli logs` command to view recent log files
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## v1.0.0 — Stable Release
|
|
129
|
+
|
|
130
|
+
- [ ] PyPI published (`pip install anyscribecli`) — see "Publishing to PyPI" below
|
|
131
|
+
- [ ] GitHub Releases with release notes for each tag
|
|
132
|
+
- [ ] Full test coverage
|
|
133
|
+
- [ ] Stable config format (breaking changes require v2.0.0)
|
|
134
|
+
- [ ] CI/CD pipeline (GitHub Actions: lint, test, build, publish)
|
|
135
|
+
|
|
136
|
+
### Publishing to PyPI (when ready)
|
|
137
|
+
|
|
138
|
+
PyPI is the Python package registry — makes `pip install anyscribecli` work globally.
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
# One-time: create account at pypi.org, generate API token
|
|
142
|
+
pip install build twine
|
|
143
|
+
python -m build # creates dist/anyscribecli-X.Y.Z.tar.gz + .whl
|
|
144
|
+
twine upload dist/* # uploads to PyPI (prompts for token)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
After publishing, update `install.sh` to use `pip install anyscribecli` instead of `git+https://...`.
|
|
148
|
+
|
|
149
|
+
### GitHub Releases (when ready)
|
|
150
|
+
|
|
151
|
+
A GitHub Release attaches release notes and downloadable assets to a git tag.
|
|
152
|
+
It's how users on GitHub discover new versions.
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
# After committing and tagging:
|
|
156
|
+
gh release create v0.3.0 --title "v0.3.0 — Download, Media Restructure, UX Polish" --notes-file RELEASE_NOTES.md
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Or create via github.com/rishmadaan/anyscribecli/releases/new.
|
|
160
|
+
|
|
161
|
+
For now, distribution is via git only:
|
|
162
|
+
```bash
|
|
163
|
+
pip install git+https://github.com/rishmadaan/anyscribecli.git
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
## Icebox (ideas for later, no timeline)
|
|
169
|
+
|
|
170
|
+
- GUI (web UI via FastAPI + React, or TUI via Textual)
|
|
171
|
+
- Speaker diarization (who said what)
|
|
172
|
+
- AI-generated summaries (TL;DR via LLM after transcription)
|
|
173
|
+
- Chapter/section detection
|
|
174
|
+
- Search across all transcripts (`ascli search <query>`)
|
|
175
|
+
- Export formats beyond markdown (PDF, DOCX, SRT subtitles)
|
|
176
|
+
- Podcast RSS feed ingestion
|
|
177
|
+
- Topic file generation (Foundry-style, when 3+ transcripts share a topic)
|
|
178
|
+
- Cost tracking (Whisper API usage per month)
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# anyscribecli — AI Developer Instructions
|
|
2
|
+
|
|
3
|
+
## What This Is
|
|
4
|
+
|
|
5
|
+
A Python CLI tool (`ascli`) that downloads video/audio from YouTube/Instagram, transcribes it via API, and outputs structured markdown into an Obsidian vault at `~/.anyscribecli/workspace/`.
|
|
6
|
+
|
|
7
|
+
## Architecture
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
src/anyscribecli/
|
|
11
|
+
├── cli/ # Typer commands (main.py, onboard.py, transcribe.py, download.py, batch.py, config_cmd.py)
|
|
12
|
+
├── config/ # Paths + settings (paths.py, settings.py)
|
|
13
|
+
├── downloaders/ # Platform downloaders (base.py, youtube.py, instagram.py, registry.py)
|
|
14
|
+
├── providers/ # Transcription APIs (base.py, openai.py, openrouter.py, elevenlabs.py, sargam.py, local.py)
|
|
15
|
+
├── vault/ # Obsidian vault management (scaffold.py, writer.py, index.py)
|
|
16
|
+
└── core/ # Orchestration + audio + deps + updater (orchestrator.py, audio.py, deps.py, updater.py)
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Flow: `CLI command -> orchestrator -> downloader + provider -> vault writer -> index update`
|
|
20
|
+
|
|
21
|
+
## Key Patterns
|
|
22
|
+
|
|
23
|
+
- **Providers** implement `TranscriptionProvider` ABC from `providers/base.py` (5 active: openai, elevenlabs, openrouter, sargam, local)
|
|
24
|
+
- **Downloaders** implement `AbstractDownloader` ABC from `downloaders/base.py` (youtube, instagram)
|
|
25
|
+
- **Config** at `~/.anyscribecli/config.yaml` — secrets in `.env` (API keys, Instagram password)
|
|
26
|
+
- **All paths** use `pathlib.Path` via `config/paths.py` — no hardcoded separators
|
|
27
|
+
- **CLI output** human-readable by default, `--json` flag for machine/agent consumption
|
|
28
|
+
- **Interactive prompts** use `beaupy` (arrow-key selectors) for onboarding, `typer.prompt` for text input
|
|
29
|
+
- **URL input** three methods: quoted argument (primary), interactive prompt (fallback), clipboard
|
|
30
|
+
- **Media outside vault** — audio in `~/.anyscribecli/media/audio/`, video in `media/video/`, workspace is pure markdown
|
|
31
|
+
- **Audio params** optimized for Whisper: 16kHz, mono, 64kbps mp3
|
|
32
|
+
- **Chunking** — 18-min segments for Whisper (25MB limit), 30s segments for Sarvam (REST API limit)
|
|
33
|
+
|
|
34
|
+
## Documentation Ethic
|
|
35
|
+
|
|
36
|
+
This project maintains TWO documentation layers. Both are mandatory, not optional.
|
|
37
|
+
|
|
38
|
+
### 1. Developer memory layer (`docs/building/`)
|
|
39
|
+
|
|
40
|
+
For developers and AI agents working on the codebase.
|
|
41
|
+
|
|
42
|
+
**When to write a building doc entry:**
|
|
43
|
+
- After completing a significant feature or change
|
|
44
|
+
- After making an architecture decision
|
|
45
|
+
- After debugging a non-trivial issue
|
|
46
|
+
- After researching alternatives and choosing one
|
|
47
|
+
|
|
48
|
+
**How to write one:**
|
|
49
|
+
1. Create `docs/building/journal/YYYY-MM-DD-<slug>.md` with frontmatter (type, tags, tldr)
|
|
50
|
+
2. Update `docs/building/_index.md` with a new row (newest first)
|
|
51
|
+
3. Update relevant living docs (`architecture.md`, `providers.md`, `downloaders.md`) if the change affects them
|
|
52
|
+
|
|
53
|
+
**Living docs vs journal entries:**
|
|
54
|
+
- **Living docs** (`architecture.md`, `providers.md`) reflect current state — update in place
|
|
55
|
+
- **Journal entries** preserve historical decisions — append only, never edit old entries
|
|
56
|
+
|
|
57
|
+
### 2. User documentation (`docs/user/`)
|
|
58
|
+
|
|
59
|
+
For end users — assume a **semi-technical audience who may be new to CLI tools**. This is critical.
|
|
60
|
+
|
|
61
|
+
**Files:**
|
|
62
|
+
- `getting-started.md` — 5-minute install-to-first-transcription guide
|
|
63
|
+
- `commands.md` — complete command reference with examples
|
|
64
|
+
- `configuration.md` — all settings explained with context
|
|
65
|
+
- `providers.md` — provider comparison: features, pricing, languages, when to use each
|
|
66
|
+
|
|
67
|
+
**User doc standards:**
|
|
68
|
+
- Every doc has YAML frontmatter: `summary`, `read_when` (list of when to read this), `title`
|
|
69
|
+
- Lead with the command, explain after — show what to type before explaining why
|
|
70
|
+
- Use `>` blockquotes for tips, warnings, and "new to this?" asides
|
|
71
|
+
- Include copy-paste-ready examples for every command and flag
|
|
72
|
+
- Explain jargon when first used (e.g., "slug", "frontmatter", "editable install")
|
|
73
|
+
- Troubleshooting section with common errors and plain-English fixes
|
|
74
|
+
- Write for someone who can follow instructions but doesn't know CLI conventions
|
|
75
|
+
|
|
76
|
+
**When to update user docs:**
|
|
77
|
+
- Adding a new command → update `commands.md`, add to overview table
|
|
78
|
+
- Adding a new flag → update the flags table in `commands.md`
|
|
79
|
+
- Changing config options → update `configuration.md`
|
|
80
|
+
- Changing onboarding flow → update `getting-started.md`
|
|
81
|
+
- Adding a new platform/provider → update `providers.md` and relevant sections in other docs
|
|
82
|
+
|
|
83
|
+
**Never skip user docs.** If you add a feature users interact with, the user docs must be updated in the same commit.
|
|
84
|
+
|
|
85
|
+
## Adding a New Provider
|
|
86
|
+
|
|
87
|
+
1. Create `src/anyscribecli/providers/<name>.py` implementing `TranscriptionProvider`
|
|
88
|
+
2. Register it in `providers/__init__.py` PROVIDER_REGISTRY
|
|
89
|
+
3. Add any new env vars to the onboarding wizard
|
|
90
|
+
4. Update `docs/building/providers.md`
|
|
91
|
+
5. Write a journal entry explaining the decision
|
|
92
|
+
|
|
93
|
+
## Adding a New Downloader
|
|
94
|
+
|
|
95
|
+
1. Create `src/anyscribecli/downloaders/<name>.py` implementing `AbstractDownloader`
|
|
96
|
+
2. Register it in `downloaders/registry.py` DOWNLOADERS list
|
|
97
|
+
3. Update URL detection regex in `registry.py`
|
|
98
|
+
4. Update `docs/building/downloaders.md`
|
|
99
|
+
|
|
100
|
+
## Post-Commit Checklist
|
|
101
|
+
|
|
102
|
+
**After every significant commit**, follow `docs/building/COMMIT_CHECKLIST.md`. It ensures README, user docs, building docs, and version references stay in sync with code. This is mandatory — stale docs are bugs.
|
|
103
|
+
|
|
104
|
+
## Versioning
|
|
105
|
+
|
|
106
|
+
SemVer: `MAJOR.MINOR.PATCH`. See `BACKLOG.md` for the full version roadmap.
|
|
107
|
+
|
|
108
|
+
Version lives in **one place**: `src/anyscribecli/__init__.py`. The `pyproject.toml` also has a version field that must match — update both when bumping.
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
# After changing version in __init__.py AND pyproject.toml:
|
|
112
|
+
git add -A && git commit -m "Bump version to X.Y.Z"
|
|
113
|
+
git tag vX.Y.Z
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Testing
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
pytest # run all tests
|
|
120
|
+
ruff check src/ # lint
|
|
121
|
+
ruff format src/ # format
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Do Not
|
|
125
|
+
|
|
126
|
+
- Import from project root — use `from anyscribecli.x.y import z`
|
|
127
|
+
- Hardcode paths — use `config/paths.py`
|
|
128
|
+
- Skip documentation — every significant change gets a building doc entry
|
|
129
|
+
- Add features beyond what was asked — lean first, expand later
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Rishabh Madaan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|