crispasr-agent-transcriber 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. crispasr_agent_transcriber-0.3.2/.codex-plugin/plugin.json +46 -0
  2. crispasr_agent_transcriber-0.3.2/.github/workflows/ci.yml +29 -0
  3. crispasr_agent_transcriber-0.3.2/.github/workflows/publish-npm.yml +48 -0
  4. crispasr_agent_transcriber-0.3.2/.github/workflows/publish-pypi.yml +27 -0
  5. crispasr_agent_transcriber-0.3.2/.github/workflows/release.yml +35 -0
  6. crispasr_agent_transcriber-0.3.2/.gitignore +36 -0
  7. crispasr_agent_transcriber-0.3.2/.mcp.json +10 -0
  8. crispasr_agent_transcriber-0.3.2/AGENTS.md +37 -0
  9. crispasr_agent_transcriber-0.3.2/LICENSE +21 -0
  10. crispasr_agent_transcriber-0.3.2/PKG-INFO +366 -0
  11. crispasr_agent_transcriber-0.3.2/README.md +349 -0
  12. crispasr_agent_transcriber-0.3.2/assets/README.md +113 -0
  13. crispasr_agent_transcriber-0.3.2/assets/crispasr-icon.svg +9 -0
  14. crispasr_agent_transcriber-0.3.2/assets/crispasr-small.svg +6 -0
  15. crispasr_agent_transcriber-0.3.2/docs/agent_integrations.md +81 -0
  16. crispasr_agent_transcriber-0.3.2/docs/codex_usage.md +26 -0
  17. crispasr_agent_transcriber-0.3.2/docs/crispasr_setup.md +35 -0
  18. crispasr_agent_transcriber-0.3.2/docs/mcp_usage.md +27 -0
  19. crispasr_agent_transcriber-0.3.2/docs/plugin_install.md +156 -0
  20. crispasr_agent_transcriber-0.3.2/docs/publishing.md +391 -0
  21. crispasr_agent_transcriber-0.3.2/docs/release.md +37 -0
  22. crispasr_agent_transcriber-0.3.2/docs/security.md +17 -0
  23. crispasr_agent_transcriber-0.3.2/examples/audio_transcription.md +13 -0
  24. crispasr_agent_transcriber-0.3.2/examples/codex_prompts.md +9 -0
  25. crispasr_agent_transcriber-0.3.2/examples/video_transcription.md +19 -0
  26. crispasr_agent_transcriber-0.3.2/mcp_server/crispasr_mcp/__init__.py +1 -0
  27. crispasr_agent_transcriber-0.3.2/mcp_server/crispasr_mcp/server.py +25 -0
  28. crispasr_agent_transcriber-0.3.2/mcp_server/crispasr_mcp/tools.py +198 -0
  29. crispasr_agent_transcriber-0.3.2/npm/LICENSE +21 -0
  30. crispasr_agent_transcriber-0.3.2/npm/README.md +41 -0
  31. crispasr_agent_transcriber-0.3.2/npm/bin/crispasr-agent-transcriber.js +13 -0
  32. crispasr_agent_transcriber-0.3.2/npm/package-lock.json +31 -0
  33. crispasr_agent_transcriber-0.3.2/npm/package.json +37 -0
  34. crispasr_agent_transcriber-0.3.2/npm/src/cli.js +174 -0
  35. crispasr_agent_transcriber-0.3.2/npm/src/constants.js +65 -0
  36. crispasr_agent_transcriber-0.3.2/npm/src/errors.js +16 -0
  37. crispasr_agent_transcriber-0.3.2/npm/src/installer.js +397 -0
  38. crispasr_agent_transcriber-0.3.2/npm/src/marketplace.js +100 -0
  39. crispasr_agent_transcriber-0.3.2/npm/src/release.js +122 -0
  40. crispasr_agent_transcriber-0.3.2/npm/src/system.js +63 -0
  41. crispasr_agent_transcriber-0.3.2/npm/test/cli.test.js +38 -0
  42. crispasr_agent_transcriber-0.3.2/npm/test/installer.test.js +163 -0
  43. crispasr_agent_transcriber-0.3.2/npm/test/marketplace.test.js +42 -0
  44. crispasr_agent_transcriber-0.3.2/npm/test/release.test.js +55 -0
  45. crispasr_agent_transcriber-0.3.2/pyproject.toml +46 -0
  46. crispasr_agent_transcriber-0.3.2/scripts/build_plugin_bundle.py +98 -0
  47. crispasr_agent_transcriber-0.3.2/scripts/setup.ps1 +36 -0
  48. crispasr_agent_transcriber-0.3.2/scripts/transcribe.py +14 -0
  49. crispasr_agent_transcriber-0.3.2/server.json +23 -0
  50. crispasr_agent_transcriber-0.3.2/skills/crispasr-transcription/SKILL.md +45 -0
  51. crispasr_agent_transcriber-0.3.2/skills/crispasr-transcription/references/crispasr_server.md +27 -0
  52. crispasr_agent_transcriber-0.3.2/skills/crispasr-transcription/scripts/transcribe.py +15 -0
  53. crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/__init__.py +12 -0
  54. crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/cli.py +222 -0
  55. crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/client.py +114 -0
  56. crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/crispasr_manager.py +271 -0
  57. crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/errors.py +59 -0
  58. crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/language.py +127 -0
  59. crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/media.py +300 -0
  60. crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/profiles.py +131 -0
  61. crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/schemas.py +101 -0
  62. crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/server_manager.py +134 -0
  63. crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/workflow.py +237 -0
  64. crispasr_agent_transcriber-0.3.2/tests/__init__.py +1 -0
  65. crispasr_agent_transcriber-0.3.2/tests/helpers.py +17 -0
  66. crispasr_agent_transcriber-0.3.2/tests/test_cli.py +90 -0
  67. crispasr_agent_transcriber-0.3.2/tests/test_client.py +31 -0
  68. crispasr_agent_transcriber-0.3.2/tests/test_crispasr_manager.py +277 -0
  69. crispasr_agent_transcriber-0.3.2/tests/test_distribution_metadata.py +39 -0
  70. crispasr_agent_transcriber-0.3.2/tests/test_language.py +86 -0
  71. crispasr_agent_transcriber-0.3.2/tests/test_mcp_server.py +32 -0
  72. crispasr_agent_transcriber-0.3.2/tests/test_mcp_tools.py +118 -0
  73. crispasr_agent_transcriber-0.3.2/tests/test_media.py +59 -0
  74. crispasr_agent_transcriber-0.3.2/tests/test_plugin_bundle.py +39 -0
  75. crispasr_agent_transcriber-0.3.2/tests/test_profiles.py +33 -0
  76. crispasr_agent_transcriber-0.3.2/tests/test_server_manager.py +36 -0
  77. crispasr_agent_transcriber-0.3.2/uv.lock +813 -0
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "crispasr-agent-transcriber",
3
+ "version": "0.3.2",
4
+ "description": "Transcribe local audio or video files through CrispASR with local models only. Auto-detects English vs Chinese, routes to Cohere Transcribe or Qwen3-ASR, and extracts audio from video with ffmpeg. No cloud uploads.",
5
+ "author": {
6
+ "name": "crispasr-agent-transcriber contributors",
7
+ "url": "https://github.com/EmiyaKatuz/crispasr-agent-transcriber"
8
+ },
9
+ "homepage": "https://github.com/EmiyaKatuz/crispasr-agent-transcriber",
10
+ "repository": "https://github.com/EmiyaKatuz/crispasr-agent-transcriber",
11
+ "license": "MIT",
12
+ "keywords": [
13
+ "transcription",
14
+ "asr",
15
+ "crispasr",
16
+ "audio",
17
+ "video",
18
+ "subtitle",
19
+ "srt",
20
+ "vtt",
21
+ "whisper",
22
+ "local-models",
23
+ "mcp"
24
+ ],
25
+ "skills": "./skills/",
26
+ "mcpServers": "./.mcp.json",
27
+ "interface": {
28
+ "displayName": "CrispASR Transcriber",
29
+ "shortDescription": "Transcribe local audio/video with CrispASR, auto routing, and GPU acceleration",
30
+ "longDescription": "Use CrispASR Transcriber to transcribe local audio or video files entirely on your machine. Auto-detects English vs Chinese using FireRed LID, routes to Cohere Transcribe or Qwen3-ASR 1.7B, extracts audio from video with ffmpeg, and outputs text, verbose JSON, SRT, or VTT. No cloud uploads, no API keys. Supports GPU acceleration (CUDA > Vulkan > CPU).",
31
+ "developerName": "crispasr-agent-transcriber contributors",
32
+ "category": "Productivity",
33
+ "capabilities": [
34
+ "Read",
35
+ "Write"
36
+ ],
37
+ "websiteURL": "https://github.com/EmiyaKatuz/crispasr-agent-transcriber",
38
+ "defaultPrompt": [
39
+ "Transcribe this local audio or video file with CrispASR"
40
+ ],
41
+ "brandColor": "#2D7DD2",
42
+ "composerIcon": "./assets/crispasr-small.svg",
43
+ "logo": "./assets/crispasr-icon.svg",
44
+ "screenshots": []
45
+ }
46
+ }
@@ -0,0 +1,29 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+
7
+ jobs:
8
+ test:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v4
12
+ - uses: astral-sh/setup-uv@v5
13
+ - uses: actions/setup-python@v5
14
+ with:
15
+ python-version: "3.11"
16
+ - run: uv sync --extra dev --extra mcp
17
+ - run: uv run pytest
18
+ - run: uv run ruff check .
19
+ - uses: actions/setup-node@v4
20
+ with:
21
+ node-version: "24"
22
+ cache: npm
23
+ cache-dependency-path: npm/package-lock.json
24
+ - run: npm ci
25
+ working-directory: npm
26
+ - run: npm test
27
+ working-directory: npm
28
+ - run: npm pack --dry-run
29
+ working-directory: npm
@@ -0,0 +1,48 @@
1
+ name: Publish npm installer
2
+
3
+ on:
4
+ workflow_dispatch:
5
+
6
+ permissions:
7
+ contents: read
8
+ id-token: write
9
+
10
+ jobs:
11
+ publish:
12
+ runs-on: ubuntu-latest
13
+ environment: npm
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ - uses: actions/setup-node@v4
17
+ with:
18
+ node-version: "24"
19
+ registry-url: "https://registry.npmjs.org"
20
+ cache: npm
21
+ cache-dependency-path: npm/package-lock.json
22
+ - run: npm ci
23
+ working-directory: npm
24
+ - run: npm test
25
+ working-directory: npm
26
+ - run: npm pack --dry-run
27
+ working-directory: npm
28
+ - name: Verify the matching GitHub Release
29
+ env:
30
+ GH_TOKEN: ${{ github.token }}
31
+ run: |
32
+ VERSION=$(node -p "require('./npm/package.json').version")
33
+ gh release view "v${VERSION}" --json assets \
34
+ --jq '.assets[].name' | grep -Fx "crispasr-agent-transcriber-plugin-${VERSION}.zip"
35
+ gh release view "v${VERSION}" --json assets \
36
+ --jq '.assets[].name' | grep -Fx "SHA256SUMS"
37
+ - name: Publish public npm package
38
+ working-directory: npm
39
+ env:
40
+ NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
41
+ run: |
42
+ NAME=$(node -p "require('./package.json').name")
43
+ VERSION=$(node -p "require('./package.json').version")
44
+ if npm view "${NAME}@${VERSION}" version >/dev/null 2>&1; then
45
+ echo "${NAME}@${VERSION} is already published."
46
+ else
47
+ npm publish --access public --provenance
48
+ fi
@@ -0,0 +1,27 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+ workflow_dispatch:
8
+
9
+ permissions:
10
+ contents: read
11
+ id-token: write
12
+
13
+ jobs:
14
+ publish:
15
+ runs-on: ubuntu-latest
16
+ environment: pypi
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+ - uses: astral-sh/setup-uv@v5
20
+ - uses: actions/setup-python@v5
21
+ with:
22
+ python-version: "3.11"
23
+ - run: uv sync --extra dev --extra mcp
24
+ - run: uv run pytest
25
+ - run: uv run ruff check .
26
+ - run: uv build
27
+ - run: uv publish --trusted-publishing always
@@ -0,0 +1,35 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ contents: write
10
+
11
+ jobs:
12
+ build-release:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ - uses: astral-sh/setup-uv@v5
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.11"
20
+ - run: uv sync --extra dev --extra mcp
21
+ - run: uv run pytest
22
+ - run: uv run ruff check .
23
+ - run: uv build
24
+ - run: uv run python scripts/build_plugin_bundle.py
25
+ - name: Generate release checksums
26
+ run: |
27
+ cd dist
28
+ sha256sum *.whl *.tar.gz *.zip > SHA256SUMS
29
+ - name: Create or update GitHub Release
30
+ env:
31
+ GH_TOKEN: ${{ github.token }}
32
+ run: |
33
+ gh release view "$GITHUB_REF_NAME" >/dev/null 2>&1 || \
34
+ gh release create "$GITHUB_REF_NAME" --generate-notes
35
+ gh release upload "$GITHUB_REF_NAME" dist/* --clobber
@@ -0,0 +1,36 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.py[cod]
4
+ .pytest_cache/
5
+ .ruff_cache/
6
+ node_modules/
7
+ *.tgz
8
+ .codegraph/
9
+
10
+ # Local media, models, transcripts, and temporary outputs.
11
+ models/
12
+ outputs/
13
+ transcripts/
14
+ *.wav.tmp
15
+ *.mp3
16
+ *.mp4
17
+ *.m4a
18
+ *.mov
19
+ *.webm
20
+ *.flac
21
+ *.ogg
22
+ *.opus
23
+ *.srt
24
+ *.vtt
25
+ *.transcript.json
26
+ *.metadata.json
27
+
28
+ # OS/editor noise.
29
+ .DS_Store
30
+ Thumbs.db
31
+
32
+ # CrispASR binary
33
+ /bin/
34
+
35
+ # CPU build backup
36
+ /bin-cpu-backup/
@@ -0,0 +1,10 @@
1
+ {
2
+ "mcpServers": {
3
+ "crispasr-agent-transcriber": {
4
+ "cwd": ".",
5
+ "command": "uv",
6
+ "args": ["run", "--extra", "mcp", "crispasr-agent-mcp"],
7
+ "env": {}
8
+ }
9
+ }
10
+ }
@@ -0,0 +1,37 @@
1
+ # Project Instructions
2
+
3
+ This repository builds local-only transcription helpers for Codex and MCP agents.
4
+
5
+ - Do not upload media to cloud transcription services.
6
+ - Prefer the CrispASR HTTP server and `/v1/audio/transcriptions`.
7
+ - Do not automate the CrisperWeaver GUI as the main path.
8
+ - Keep file access narrow: accept local files only, validate paths, and reject URLs.
9
+ - Use ffmpeg with argument lists and `shell=False`.
10
+ - Do not commit model files, audio/video files, transcripts, generated outputs, or temp WAVs.
11
+ - Do not trigger model downloads during verification. If a local model is missing, stop and tell the user what to download.
12
+ - Before reporting back, run the available tests and explain the outcome in plain language.
13
+
14
+ ## Plugin structure
15
+
16
+ This repository is also a Codex plugin. The plugin manifest is at
17
+ `.codex-plugin/plugin.json`, and the MCP server config is at `.mcp.json`.
18
+
19
+ - Skills live in `skills/crispasr-transcription/`.
20
+ - The MCP server lives in `mcp_server/crispasr_mcp/`.
21
+ - Plugin assets (icons, README) live in `assets/`.
22
+ - Installation instructions are in `docs/plugin_install.md`.
23
+
24
+ When modifying the plugin manifest or MCP config, keep paths relative to the
25
+ repository root so the plugin works when cloned to any location.
26
+
27
+ ## npm installer
28
+
29
+ The `npm/` directory contains the public `npx` installer.
30
+
31
+ - Keep its version synchronized with `pyproject.toml`, `plugin.json`, and
32
+ `server.json`.
33
+ - Never add model downloads to the installer.
34
+ - Verify GitHub Release SHA-256 checksums before extracting plugin files.
35
+ - Preserve `models/`, `bin/`, and `outputs/` during updates and normal removal.
36
+ - Run `npm test` and `npm pack --dry-run` from the `npm/` directory after
37
+ changes.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 crispasr-agent-transcriber contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,366 @@
1
+ Metadata-Version: 2.4
2
+ Name: crispasr-agent-transcriber
3
+ Version: 0.3.2
4
+ Summary: Local CrispASR transcription workflow for Codex and MCP agents.
5
+ Author: crispasr-agent-transcriber contributors
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Requires-Python: >=3.11
9
+ Requires-Dist: httpx>=0.27
10
+ Requires-Dist: pydantic>=2
11
+ Provides-Extra: dev
12
+ Requires-Dist: pytest>=8; extra == 'dev'
13
+ Requires-Dist: ruff>=0.8; extra == 'dev'
14
+ Provides-Extra: mcp
15
+ Requires-Dist: mcp>=1.2; extra == 'mcp'
16
+ Description-Content-Type: text/markdown
17
+
18
+ # crispasr-agent-transcriber
19
+
20
+ <!-- mcp-name: io.github.emiyakatuz/crispasr-agent-transcriber -->
21
+
22
+ Local-only transcription for Codex and MCP-based AI agents, powered by
23
+ [CrispASR](https://github.com/CrispStrobe/CrispASR). No cloud uploads,
24
+ no API keys required for transcription.
25
+
26
+ ## What it does
27
+
28
+ Give it a local audio or video file. It:
29
+
30
+ 1. Probes the spoken language (English or Chinese) using CrispASR's FireRed LID.
31
+ 2. Starts a local CrispASR server with the right backend -- Cohere Transcribe
32
+ for English, Qwen3-ASR for Chinese.
33
+ 3. Extracts audio from video with ffmpeg when needed.
34
+ 4. Calls CrispASR's `/v1/audio/transcriptions` endpoint.
35
+ 5. Writes the transcript and metadata to disk.
36
+
37
+ Everything runs on your machine. Media never leaves it.
38
+
39
+ ## Quick install for Codex
40
+
41
+ The plugin includes the Codex Skill, command-line tool, and MCP server. Media
42
+ stays on your computer. Model files are never downloaded automatically.
43
+
44
+ ### 1. Install prerequisites
45
+
46
+ Install Node.js 20 or newer, [uv](https://docs.astral.sh/uv/), and
47
+ [ffmpeg](https://ffmpeg.org/). The installer uses `uv` to provide Python.
48
+
49
+ ```powershell
50
+ node --version
51
+ uv --version
52
+ ffmpeg -version
53
+ ```
54
+
55
+ ### 2. Run the installer
56
+
57
+ ```powershell
58
+ npx @emiyakatuz/crispasr-agent-transcriber@latest install
59
+ ```
60
+
61
+ The installer:
62
+
63
+ - downloads the matching GitHub Release and verifies its SHA-256 checksum;
64
+ - installs the plugin under `~/plugins/crispasr-agent-transcriber`;
65
+ - installs the Python and MCP dependencies;
66
+ - detects CUDA, Vulkan, or CPU and installs the best CrispASR build;
67
+ - registers the plugin in the Codex Personal marketplace;
68
+ - preserves existing models, binaries, and outputs during updates.
69
+
70
+ ### 3. Add the local models
71
+
72
+ When a model is missing, the installer prints its official source and stops.
73
+ Download the three files listed under [Required models](#required-models) into:
74
+
75
+ ```text
76
+ ~/plugins/crispasr-agent-transcriber/models/
77
+ ```
78
+
79
+ Then verify the complete installation:
80
+
81
+ ```powershell
82
+ npx @emiyakatuz/crispasr-agent-transcriber@latest doctor
83
+ ```
84
+
85
+ ### 4. Enable the plugin
86
+
87
+ With a Codex build that supports plugin commands, run:
88
+
89
+ ```powershell
90
+ codex plugin add crispasr-agent-transcriber@personal
91
+ ```
92
+
93
+ If the CLI has no `codex plugin` command, open the Codex desktop Plugins view
94
+ and install **CrispASR Transcriber** from the Personal marketplace. Start a new
95
+ conversation, then ask:
96
+
97
+ ```text
98
+ Transcribe C:\path\to\sample.mp4 with CrispASR using auto language detection.
99
+ Save a verbose JSON transcript and an SRT subtitle file.
100
+ ```
101
+
102
+ ### Update or uninstall
103
+
104
+ ```powershell
105
+ npx @emiyakatuz/crispasr-agent-transcriber@latest update
106
+ npx @emiyakatuz/crispasr-agent-transcriber@latest uninstall
107
+ ```
108
+
109
+ Uninstall preserves local models, CrispASR binaries, and outputs. Use
110
+ `uninstall --purge-data` only when those files should also be deleted. See
111
+ [Plugin installation](docs/plugin_install.md) for manual installation and
112
+ troubleshooting.
113
+
114
+ ## Direct command-line use
115
+
116
+ After installation, you can run the transcription script without Codex:
117
+
118
+ ```powershell
119
+ Set-Location (Join-Path $HOME "plugins\crispasr-agent-transcriber")
120
+ uv run python scripts/transcribe.py sample.mp4 --profile auto `
121
+ --manage-server `
122
+ --lid-backend firered --lid-model models\firered-lid-q2_k.gguf `
123
+ --model models\cohere-transcribe.gguf `
124
+ --format verbose_json
125
+ ```
126
+
127
+ ## Use with other AI agents
128
+
129
+ The MCP server is the cross-agent interface. Any agent that supports MCP stdio
130
+ can run the released package directly from GitHub:
131
+
132
+ ```powershell
133
+ uvx --from "crispasr-agent-transcriber[mcp] @ git+https://github.com/EmiyaKatuz/crispasr-agent-transcriber.git@v0.3.2" crispasr-agent-mcp
134
+ ```
135
+
136
+ Use the same command and arguments in Claude Desktop, Cursor, or another MCP
137
+ client. See [AI agent integrations](docs/agent_integrations.md) for a generic
138
+ MCP configuration and Codex CLI command.
139
+
140
+ ## Maintainer publishing
141
+
142
+ End users do not need the release steps. Maintainers should follow the
143
+ [publishing guide](docs/publishing.md) for Codex Marketplace, PyPI, MCP
144
+ Registry, and cross-agent distribution.
145
+
146
+ ## Required models
147
+
148
+ This tool does **not** download models automatically. Download these three
149
+ GGUF files and keep them in a local directory (the repo's `models/` folder
150
+ works well):
151
+
152
+ | Purpose | File | ~Size | Source |
153
+ |---|---|---|---|
154
+ | English ASR | `cohere-transcribe.gguf` | 3.9 GB | [Cohere on HuggingFace](https://huggingface.co/cstr) |
155
+ | Chinese ASR | `qwen3-asr-1.7b-q4_k.gguf` | 1.3 GB | [Qwen3-ASR GGUF](https://huggingface.co/cstr/qwen3-asr-1.7b-GGUF) |
156
+ | Language detection | `firered-lid-q2_k.gguf` | 350 MB | [FireRed LID GGUF](https://huggingface.co/cstr/firered-lid-GGUF) |
157
+
158
+ Pass them on every run:
159
+
160
+ ```powershell
161
+ --model models\cohere-transcribe.gguf
162
+ --lid-backend firered --lid-model models\firered-lid-q2_k.gguf
163
+ ```
164
+
165
+ ## CrispASR binary management
166
+
167
+ The tool auto-detects, installs, and updates the CrispASR binary from
168
+ [GitHub releases](https://github.com/CrispStrobe/CrispASR/releases).
169
+
170
+ | Flag | Effect |
171
+ |---|---|
172
+ | `--install-crispasr` | Download latest platform binary to `bin/` |
173
+ | `--update-crispasr` | Upgrade to newest release |
174
+ | `--crispasr-status` | Show installed version + update availability |
175
+ | `--crispasr-bin-dir PATH` | Custom directory (default `./bin`) |
176
+ | `--crispasr-bin PATH` | Exact path to `crispasr.exe` |
177
+
178
+ When `--manage-server` is set and no binary is found, it auto-installs before
179
+ starting the server.
180
+
181
+ ### GPU detection
182
+
183
+ On install and update, the tool checks your hardware:
184
+
185
+ 1. **CUDA** -- `nvidia-smi` available, or `CUDA_PATH` / `CUDA_HOME` set, or
186
+ CUDA in `PATH` -> downloads `crispasr-*-cuda` variant.
187
+ 2. **Vulkan** -- `vulkaninfo` or `VULKAN_SDK` set (only when CUDA is absent) ->
188
+ downloads `crispasr-*-vulkan` variant.
189
+ 3. **CPU** -- fallback when no GPU toolkit is detected.
190
+
191
+ macOS always uses the universal binary.
192
+
193
+ ## Profiles
194
+
195
+ | Profile | Backend | ASR model | Language hint |
196
+ |---|---|---|---|
197
+ | `english` | `cohere` | Cohere Transcribe 03-2026 | `en` |
198
+ | `chinese` | `qwen3-1.7b` | Qwen3-ASR 1.7B | `zh` |
199
+ | `auto` | determined by LID | determined by LID | detected |
200
+
201
+ `auto` mode runs FireRed language detection on the media, then routes English
202
+ to Cohere or Chinese to Qwen3-1.7B. Mixed or uncertain content stops with a
203
+ clear error asking you to re-run with `--profile english` or `--profile chinese`.
204
+
205
+ ## Usage
206
+
207
+ ### Managed server (tool starts CrispASR for you)
208
+
209
+ ```powershell
210
+ uv run python scripts/transcribe.py sample.wav `
211
+ --profile auto `
212
+ --manage-server `
213
+ --model models\qwen3-asr-1.7b-q4_k.gguf `
214
+ --lid-backend firered --lid-model models\firered-lid-q2_k.gguf `
215
+ --format srt `
216
+ --out-dir outputs
217
+ ```
218
+
219
+ Add `--keep-server` to leave the server running after transcription.
220
+
221
+ ### Manual server (you start CrispASR)
222
+
223
+ ```powershell
224
+ # Terminal 1 -- start the server
225
+ crispasr --server --backend cohere `
226
+ -m models\cohere-transcribe.gguf `
227
+ --port 8080
228
+
229
+ # Terminal 2 -- transcribe
230
+ uv run python scripts/transcribe.py sample.mp4 `
231
+ --profile english `
232
+ --server-url http://127.0.0.1:8080 `
233
+ --format verbose_json
234
+ ```
235
+
236
+ If the running server's backend doesn't match the selected profile, the tool
237
+ prints the exact command you need to start the correct server.
238
+
239
+ ### Output formats
240
+
241
+ | `--format` | File extension | Contents |
242
+ |---|---|---|
243
+ | `text` | `.txt` | Plain transcript |
244
+ | `verbose_json` | `.json` | Full response with segments |
245
+ | `srt` | `.srt` | SubRip subtitles |
246
+ | `vtt` | `.vtt` | WebVTT subtitles |
247
+
248
+ A `.metadata.json` sidecar is always written alongside the transcript.
249
+
250
+ ### Video files
251
+
252
+ Video files are detected automatically. ffmpeg extracts the audio track to a
253
+ temporary mono 16 kHz WAV before sending it to CrispASR. The temporary file
254
+ is deleted when transcription finishes.
255
+
256
+ ### All CLI flags
257
+
258
+ ```
259
+ --profile auto|english|chinese
260
+ --format text|verbose_json|srt|vtt
261
+ --out-dir PATH
262
+ --server-url URL
263
+ --allow-remote-server
264
+ --manage-server
265
+ --keep-server
266
+ --model PATH Local GGUF model path
267
+ --allow-model-auto-download
268
+ --lid-model PATH Local LID model path
269
+ --lid-backend firered|silero|ecapa|whisper
270
+ --host HOST Managed server host (default 127.0.0.1)
271
+ --port PORT Managed server port (default 8080)
272
+ --language CODE Language hint for transcription
273
+ --prompt TEXT Initial prompt/context
274
+ --vad Enable voice activity detection
275
+ --diarize Enable speaker diarization
276
+ --diarize-method METHOD
277
+ --hotwords WORD,WORD Comma-separated hotwords
278
+ --no-timestamps
279
+ --preprocess auto|always|never
280
+ --api-key KEY If CRISPASR_API_KEYS is enabled
281
+ --crispasr-bin-dir PATH
282
+ --crispasr-bin PATH
283
+ --install-crispasr
284
+ --update-crispasr
285
+ --crispasr-status
286
+ ```
287
+
288
+ ## MCP server
289
+
290
+ ```powershell
291
+ uv sync --extra mcp
292
+ uv run --extra mcp crispasr-agent-mcp
293
+ ```
294
+
295
+ Exposed tools:
296
+
297
+ | Tool | Description |
298
+ |---|---|
299
+ | `crispasr_health` | Check CrispASR server health |
300
+ | `crispasr_backends` | List available backends |
301
+ | `crispasr_detect_language` | Run language detection on a file |
302
+ | `transcribe_audio` | Transcribe an audio file |
303
+ | `transcribe_video` | Transcribe a video file |
304
+ | `transcribe_folder` | Batch-transcribe a folder |
305
+
306
+ ## Security model
307
+
308
+ - **No cloud uploads.** Media files stay on the local filesystem.
309
+ - **No remote servers by default.** `--server-url` only accepts localhost
310
+ unless `--allow-remote-server` is explicitly passed.
311
+ - **No URL inputs.** Only local file paths are accepted. URLs, S3, and other
312
+ remote schemes are rejected.
313
+ - **No shell injection.** ffmpeg is called with argument lists and
314
+ `shell=False`. No user-controlled strings are interpolated into shell
315
+ commands.
316
+ - **No model downloads by default.** CrispASR model auto-download (`-m auto`)
317
+ requires `--allow-model-auto-download`. The same guard applies to language
318
+ detection models.
319
+ - **Temporary files are cleaned up.** Converted WAV files and LID probe
320
+ windows are deleted when transcription finishes.
321
+ - **Binary downloads are explicit.** CrispASR binary installs only from the
322
+ official `CrispStrobe/CrispASR` GitHub releases.
323
+ - **Verified plugin releases.** The npm installer requires the plugin ZIP to
324
+ match the SHA-256 value published in the same GitHub Release.
325
+ - **Narrow installer writes.** The installer manages only its plugin directory
326
+ and the named Personal marketplace entry. Updates preserve local models,
327
+ binaries, and outputs.
328
+
329
+ ## Verify
330
+
331
+ ```powershell
332
+ uv run pytest
333
+ uv run ruff check . # zero lint warnings
334
+ ```
335
+
336
+ ## License
337
+
338
+ This project is licensed under the [MIT License](LICENSE).
339
+
340
+ ### Third-party components and attribution
341
+
342
+ This tool orchestrates several independently-licensed projects. It does not
343
+ bundle, fork, or redistribute their code -- it downloads pre-built binaries
344
+ and calls them as subprocesses or HTTP services at runtime.
345
+
346
+ | Component | License | Role |
347
+ |---|---|---|
348
+ | [CrispASR](https://github.com/CrispStrobe/CrispASR) | MIT | ASR engine, server, language detection |
349
+ | [ffmpeg](https://ffmpeg.org/) | LGPL 2.1+ / GPL 2+ | Media decoding and audio extraction |
350
+ | [Cohere Transcribe 03-2026](https://huggingface.co/cstr) | Cohere model license | English ASR model (loaded by CrispASR) |
351
+ | [Qwen3-ASR 1.7B](https://huggingface.co/cstr/qwen3-asr-1.7b-GGUF) | Apache 2.0 | Chinese ASR model (loaded by CrispASR) |
352
+ | [FireRed LID](https://huggingface.co/cstr/firered-lid-GGUF) | Apache 2.0 | Language detection model (loaded by CrispASR) |
353
+ | [httpx](https://github.com/encode/httpx) | BSD | HTTP client for CrispASR API |
354
+ | [MCP Python SDK](https://github.com/modelcontextprotocol/python-sdk) | MIT | MCP server framework |
355
+ | [Node.js](https://nodejs.org/) | MIT | npm installer runtime |
356
+ | [adm-zip](https://github.com/cthackers/adm-zip) | MIT | Verified plugin ZIP extraction |
357
+
358
+ Model files must be downloaded separately by the user from their respective
359
+ HuggingFace repositories. See [Required models](#required-models) above.
360
+
361
+ ## Related projects
362
+
363
+ - [CrispASR](https://github.com/CrispStrobe/CrispASR) -- the ASR engine this
364
+ tool wraps
365
+ - [CrisperWeaver](https://github.com/CrispStrobe/CrisperWeaver) -- CrispASR's
366
+ desktop GUI (not used by this tool)