crispasr-agent-transcriber 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crispasr_agent_transcriber-0.3.2/.codex-plugin/plugin.json +46 -0
- crispasr_agent_transcriber-0.3.2/.github/workflows/ci.yml +29 -0
- crispasr_agent_transcriber-0.3.2/.github/workflows/publish-npm.yml +48 -0
- crispasr_agent_transcriber-0.3.2/.github/workflows/publish-pypi.yml +27 -0
- crispasr_agent_transcriber-0.3.2/.github/workflows/release.yml +35 -0
- crispasr_agent_transcriber-0.3.2/.gitignore +36 -0
- crispasr_agent_transcriber-0.3.2/.mcp.json +10 -0
- crispasr_agent_transcriber-0.3.2/AGENTS.md +37 -0
- crispasr_agent_transcriber-0.3.2/LICENSE +21 -0
- crispasr_agent_transcriber-0.3.2/PKG-INFO +366 -0
- crispasr_agent_transcriber-0.3.2/README.md +349 -0
- crispasr_agent_transcriber-0.3.2/assets/README.md +113 -0
- crispasr_agent_transcriber-0.3.2/assets/crispasr-icon.svg +9 -0
- crispasr_agent_transcriber-0.3.2/assets/crispasr-small.svg +6 -0
- crispasr_agent_transcriber-0.3.2/docs/agent_integrations.md +81 -0
- crispasr_agent_transcriber-0.3.2/docs/codex_usage.md +26 -0
- crispasr_agent_transcriber-0.3.2/docs/crispasr_setup.md +35 -0
- crispasr_agent_transcriber-0.3.2/docs/mcp_usage.md +27 -0
- crispasr_agent_transcriber-0.3.2/docs/plugin_install.md +156 -0
- crispasr_agent_transcriber-0.3.2/docs/publishing.md +391 -0
- crispasr_agent_transcriber-0.3.2/docs/release.md +37 -0
- crispasr_agent_transcriber-0.3.2/docs/security.md +17 -0
- crispasr_agent_transcriber-0.3.2/examples/audio_transcription.md +13 -0
- crispasr_agent_transcriber-0.3.2/examples/codex_prompts.md +9 -0
- crispasr_agent_transcriber-0.3.2/examples/video_transcription.md +19 -0
- crispasr_agent_transcriber-0.3.2/mcp_server/crispasr_mcp/__init__.py +1 -0
- crispasr_agent_transcriber-0.3.2/mcp_server/crispasr_mcp/server.py +25 -0
- crispasr_agent_transcriber-0.3.2/mcp_server/crispasr_mcp/tools.py +198 -0
- crispasr_agent_transcriber-0.3.2/npm/LICENSE +21 -0
- crispasr_agent_transcriber-0.3.2/npm/README.md +41 -0
- crispasr_agent_transcriber-0.3.2/npm/bin/crispasr-agent-transcriber.js +13 -0
- crispasr_agent_transcriber-0.3.2/npm/package-lock.json +31 -0
- crispasr_agent_transcriber-0.3.2/npm/package.json +37 -0
- crispasr_agent_transcriber-0.3.2/npm/src/cli.js +174 -0
- crispasr_agent_transcriber-0.3.2/npm/src/constants.js +65 -0
- crispasr_agent_transcriber-0.3.2/npm/src/errors.js +16 -0
- crispasr_agent_transcriber-0.3.2/npm/src/installer.js +397 -0
- crispasr_agent_transcriber-0.3.2/npm/src/marketplace.js +100 -0
- crispasr_agent_transcriber-0.3.2/npm/src/release.js +122 -0
- crispasr_agent_transcriber-0.3.2/npm/src/system.js +63 -0
- crispasr_agent_transcriber-0.3.2/npm/test/cli.test.js +38 -0
- crispasr_agent_transcriber-0.3.2/npm/test/installer.test.js +163 -0
- crispasr_agent_transcriber-0.3.2/npm/test/marketplace.test.js +42 -0
- crispasr_agent_transcriber-0.3.2/npm/test/release.test.js +55 -0
- crispasr_agent_transcriber-0.3.2/pyproject.toml +46 -0
- crispasr_agent_transcriber-0.3.2/scripts/build_plugin_bundle.py +98 -0
- crispasr_agent_transcriber-0.3.2/scripts/setup.ps1 +36 -0
- crispasr_agent_transcriber-0.3.2/scripts/transcribe.py +14 -0
- crispasr_agent_transcriber-0.3.2/server.json +23 -0
- crispasr_agent_transcriber-0.3.2/skills/crispasr-transcription/SKILL.md +45 -0
- crispasr_agent_transcriber-0.3.2/skills/crispasr-transcription/references/crispasr_server.md +27 -0
- crispasr_agent_transcriber-0.3.2/skills/crispasr-transcription/scripts/transcribe.py +15 -0
- crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/__init__.py +12 -0
- crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/cli.py +222 -0
- crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/client.py +114 -0
- crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/crispasr_manager.py +271 -0
- crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/errors.py +59 -0
- crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/language.py +127 -0
- crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/media.py +300 -0
- crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/profiles.py +131 -0
- crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/schemas.py +101 -0
- crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/server_manager.py +134 -0
- crispasr_agent_transcriber-0.3.2/src/crispasr_agent_transcriber/workflow.py +237 -0
- crispasr_agent_transcriber-0.3.2/tests/__init__.py +1 -0
- crispasr_agent_transcriber-0.3.2/tests/helpers.py +17 -0
- crispasr_agent_transcriber-0.3.2/tests/test_cli.py +90 -0
- crispasr_agent_transcriber-0.3.2/tests/test_client.py +31 -0
- crispasr_agent_transcriber-0.3.2/tests/test_crispasr_manager.py +277 -0
- crispasr_agent_transcriber-0.3.2/tests/test_distribution_metadata.py +39 -0
- crispasr_agent_transcriber-0.3.2/tests/test_language.py +86 -0
- crispasr_agent_transcriber-0.3.2/tests/test_mcp_server.py +32 -0
- crispasr_agent_transcriber-0.3.2/tests/test_mcp_tools.py +118 -0
- crispasr_agent_transcriber-0.3.2/tests/test_media.py +59 -0
- crispasr_agent_transcriber-0.3.2/tests/test_plugin_bundle.py +39 -0
- crispasr_agent_transcriber-0.3.2/tests/test_profiles.py +33 -0
- crispasr_agent_transcriber-0.3.2/tests/test_server_manager.py +36 -0
- crispasr_agent_transcriber-0.3.2/uv.lock +813 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "crispasr-agent-transcriber",
|
|
3
|
+
"version": "0.3.2",
|
|
4
|
+
"description": "Transcribe local audio or video files through CrispASR with local models only. Auto-detects English vs Chinese, routes to Cohere Transcribe or Qwen3-ASR, and extracts audio from video with ffmpeg. No cloud uploads.",
|
|
5
|
+
"author": {
|
|
6
|
+
"name": "crispasr-agent-transcriber contributors",
|
|
7
|
+
"url": "https://github.com/EmiyaKatuz/crispasr-agent-transcriber"
|
|
8
|
+
},
|
|
9
|
+
"homepage": "https://github.com/EmiyaKatuz/crispasr-agent-transcriber",
|
|
10
|
+
"repository": "https://github.com/EmiyaKatuz/crispasr-agent-transcriber",
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"keywords": [
|
|
13
|
+
"transcription",
|
|
14
|
+
"asr",
|
|
15
|
+
"crispasr",
|
|
16
|
+
"audio",
|
|
17
|
+
"video",
|
|
18
|
+
"subtitle",
|
|
19
|
+
"srt",
|
|
20
|
+
"vtt",
|
|
21
|
+
"whisper",
|
|
22
|
+
"local-models",
|
|
23
|
+
"mcp"
|
|
24
|
+
],
|
|
25
|
+
"skills": "./skills/",
|
|
26
|
+
"mcpServers": "./.mcp.json",
|
|
27
|
+
"interface": {
|
|
28
|
+
"displayName": "CrispASR Transcriber",
|
|
29
|
+
"shortDescription": "Transcribe local audio/video with CrispASR, auto routing, and GPU acceleration",
|
|
30
|
+
"longDescription": "Use CrispASR Transcriber to transcribe local audio or video files entirely on your machine. Auto-detects English vs Chinese using FireRed LID, routes to Cohere Transcribe or Qwen3-ASR 1.7B, extracts audio from video with ffmpeg, and outputs text, verbose JSON, SRT, or VTT. No cloud uploads, no API keys. Supports GPU acceleration (CUDA > Vulkan > CPU).",
|
|
31
|
+
"developerName": "crispasr-agent-transcriber contributors",
|
|
32
|
+
"category": "Productivity",
|
|
33
|
+
"capabilities": [
|
|
34
|
+
"Read",
|
|
35
|
+
"Write"
|
|
36
|
+
],
|
|
37
|
+
"websiteURL": "https://github.com/EmiyaKatuz/crispasr-agent-transcriber",
|
|
38
|
+
"defaultPrompt": [
|
|
39
|
+
"Transcribe this local audio or video file with CrispASR"
|
|
40
|
+
],
|
|
41
|
+
"brandColor": "#2D7DD2",
|
|
42
|
+
"composerIcon": "./assets/crispasr-small.svg",
|
|
43
|
+
"logo": "./assets/crispasr-icon.svg",
|
|
44
|
+
"screenshots": []
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
pull_request:
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
test:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
steps:
|
|
11
|
+
- uses: actions/checkout@v4
|
|
12
|
+
- uses: astral-sh/setup-uv@v5
|
|
13
|
+
- uses: actions/setup-python@v5
|
|
14
|
+
with:
|
|
15
|
+
python-version: "3.11"
|
|
16
|
+
- run: uv sync --extra dev --extra mcp
|
|
17
|
+
- run: uv run pytest
|
|
18
|
+
- run: uv run ruff check .
|
|
19
|
+
- uses: actions/setup-node@v4
|
|
20
|
+
with:
|
|
21
|
+
node-version: "24"
|
|
22
|
+
cache: npm
|
|
23
|
+
cache-dependency-path: npm/package-lock.json
|
|
24
|
+
- run: npm ci
|
|
25
|
+
working-directory: npm
|
|
26
|
+
- run: npm test
|
|
27
|
+
working-directory: npm
|
|
28
|
+
- run: npm pack --dry-run
|
|
29
|
+
working-directory: npm
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
name: Publish npm installer
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
|
|
6
|
+
permissions:
|
|
7
|
+
contents: read
|
|
8
|
+
id-token: write
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
publish:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
environment: npm
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
- uses: actions/setup-node@v4
|
|
17
|
+
with:
|
|
18
|
+
node-version: "24"
|
|
19
|
+
registry-url: "https://registry.npmjs.org"
|
|
20
|
+
cache: npm
|
|
21
|
+
cache-dependency-path: npm/package-lock.json
|
|
22
|
+
- run: npm ci
|
|
23
|
+
working-directory: npm
|
|
24
|
+
- run: npm test
|
|
25
|
+
working-directory: npm
|
|
26
|
+
- run: npm pack --dry-run
|
|
27
|
+
working-directory: npm
|
|
28
|
+
- name: Verify the matching GitHub Release
|
|
29
|
+
env:
|
|
30
|
+
GH_TOKEN: ${{ github.token }}
|
|
31
|
+
run: |
|
|
32
|
+
VERSION=$(node -p "require('./npm/package.json').version")
|
|
33
|
+
gh release view "v${VERSION}" --json assets \
|
|
34
|
+
--jq '.assets[].name' | grep -Fx "crispasr-agent-transcriber-plugin-${VERSION}.zip"
|
|
35
|
+
gh release view "v${VERSION}" --json assets \
|
|
36
|
+
--jq '.assets[].name' | grep -Fx "SHA256SUMS"
|
|
37
|
+
- name: Publish public npm package
|
|
38
|
+
working-directory: npm
|
|
39
|
+
env:
|
|
40
|
+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
|
41
|
+
run: |
|
|
42
|
+
NAME=$(node -p "require('./package.json').name")
|
|
43
|
+
VERSION=$(node -p "require('./package.json').version")
|
|
44
|
+
if npm view "${NAME}@${VERSION}" version >/dev/null 2>&1; then
|
|
45
|
+
echo "${NAME}@${VERSION} is already published."
|
|
46
|
+
else
|
|
47
|
+
npm publish --access public --provenance
|
|
48
|
+
fi
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
id-token: write
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
publish:
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
environment: pypi
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
- uses: astral-sh/setup-uv@v5
|
|
20
|
+
- uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: "3.11"
|
|
23
|
+
- run: uv sync --extra dev --extra mcp
|
|
24
|
+
- run: uv run pytest
|
|
25
|
+
- run: uv run ruff check .
|
|
26
|
+
- run: uv build
|
|
27
|
+
- run: uv publish --trusted-publishing always
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: write
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
build-release:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
- uses: astral-sh/setup-uv@v5
|
|
17
|
+
- uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: "3.11"
|
|
20
|
+
- run: uv sync --extra dev --extra mcp
|
|
21
|
+
- run: uv run pytest
|
|
22
|
+
- run: uv run ruff check .
|
|
23
|
+
- run: uv build
|
|
24
|
+
- run: uv run python scripts/build_plugin_bundle.py
|
|
25
|
+
- name: Generate release checksums
|
|
26
|
+
run: |
|
|
27
|
+
cd dist
|
|
28
|
+
sha256sum *.whl *.tar.gz *.zip > SHA256SUMS
|
|
29
|
+
- name: Create or update GitHub Release
|
|
30
|
+
env:
|
|
31
|
+
GH_TOKEN: ${{ github.token }}
|
|
32
|
+
run: |
|
|
33
|
+
gh release view "$GITHUB_REF_NAME" >/dev/null 2>&1 || \
|
|
34
|
+
gh release create "$GITHUB_REF_NAME" --generate-notes
|
|
35
|
+
gh release upload "$GITHUB_REF_NAME" dist/* --clobber
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
.venv/
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
.pytest_cache/
|
|
5
|
+
.ruff_cache/
|
|
6
|
+
node_modules/
|
|
7
|
+
*.tgz
|
|
8
|
+
.codegraph/
|
|
9
|
+
|
|
10
|
+
# Local media, models, transcripts, and temporary outputs.
|
|
11
|
+
models/
|
|
12
|
+
outputs/
|
|
13
|
+
transcripts/
|
|
14
|
+
*.wav.tmp
|
|
15
|
+
*.mp3
|
|
16
|
+
*.mp4
|
|
17
|
+
*.m4a
|
|
18
|
+
*.mov
|
|
19
|
+
*.webm
|
|
20
|
+
*.flac
|
|
21
|
+
*.ogg
|
|
22
|
+
*.opus
|
|
23
|
+
*.srt
|
|
24
|
+
*.vtt
|
|
25
|
+
*.transcript.json
|
|
26
|
+
*.metadata.json
|
|
27
|
+
|
|
28
|
+
# OS/editor noise.
|
|
29
|
+
.DS_Store
|
|
30
|
+
Thumbs.db
|
|
31
|
+
|
|
32
|
+
# CrispASR binary
|
|
33
|
+
/bin/
|
|
34
|
+
|
|
35
|
+
# CPU build backup
|
|
36
|
+
/bin-cpu-backup/
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Project Instructions
|
|
2
|
+
|
|
3
|
+
This repository builds local-only transcription helpers for Codex and MCP agents.
|
|
4
|
+
|
|
5
|
+
- Do not upload media to cloud transcription services.
|
|
6
|
+
- Prefer the CrispASR HTTP server and `/v1/audio/transcriptions`.
|
|
7
|
+
- Do not automate the CrisperWeaver GUI as the main path.
|
|
8
|
+
- Keep file access narrow: accept local files only, validate paths, and reject URLs.
|
|
9
|
+
- Use ffmpeg with argument lists and `shell=False`.
|
|
10
|
+
- Do not commit model files, audio/video files, transcripts, generated outputs, or temp WAVs.
|
|
11
|
+
- Do not trigger model downloads during verification. If a local model is missing, stop and tell the user what to download.
|
|
12
|
+
- Before reporting back, run the available tests and explain the outcome in plain language.
|
|
13
|
+
|
|
14
|
+
## Plugin structure
|
|
15
|
+
|
|
16
|
+
This repository is also a Codex plugin. The plugin manifest is at
|
|
17
|
+
`.codex-plugin/plugin.json`, and the MCP server config is at `.mcp.json`.
|
|
18
|
+
|
|
19
|
+
- Skills live in `skills/crispasr-transcription/`.
|
|
20
|
+
- The MCP server lives in `mcp_server/crispasr_mcp/`.
|
|
21
|
+
- Plugin assets (icons, README) live in `assets/`.
|
|
22
|
+
- Installation instructions are in `docs/plugin_install.md`.
|
|
23
|
+
|
|
24
|
+
When modifying the plugin manifest or MCP config, keep paths relative to the
|
|
25
|
+
repository root so the plugin works when cloned to any location.
|
|
26
|
+
|
|
27
|
+
## npm installer
|
|
28
|
+
|
|
29
|
+
The `npm/` directory contains the public `npx` installer.
|
|
30
|
+
|
|
31
|
+
- Keep its version synchronized with `pyproject.toml`, `plugin.json`, and
|
|
32
|
+
`server.json`.
|
|
33
|
+
- Never add model downloads to the installer.
|
|
34
|
+
- Verify GitHub Release SHA-256 checksums before extracting plugin files.
|
|
35
|
+
- Preserve `models/`, `bin/`, and `outputs/` during updates and normal removal.
|
|
36
|
+
- Run `npm test` and `npm pack --dry-run` from the `npm/` directory after
|
|
37
|
+
changes.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 crispasr-agent-transcriber contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: crispasr-agent-transcriber
|
|
3
|
+
Version: 0.3.2
|
|
4
|
+
Summary: Local CrispASR transcription workflow for Codex and MCP agents.
|
|
5
|
+
Author: crispasr-agent-transcriber contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Python: >=3.11
|
|
9
|
+
Requires-Dist: httpx>=0.27
|
|
10
|
+
Requires-Dist: pydantic>=2
|
|
11
|
+
Provides-Extra: dev
|
|
12
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
13
|
+
Requires-Dist: ruff>=0.8; extra == 'dev'
|
|
14
|
+
Provides-Extra: mcp
|
|
15
|
+
Requires-Dist: mcp>=1.2; extra == 'mcp'
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# crispasr-agent-transcriber
|
|
19
|
+
|
|
20
|
+
<!-- mcp-name: io.github.emiyakatuz/crispasr-agent-transcriber -->
|
|
21
|
+
|
|
22
|
+
Local-only transcription for Codex and MCP-based AI agents, powered by
|
|
23
|
+
[CrispASR](https://github.com/CrispStrobe/CrispASR). No cloud uploads,
|
|
24
|
+
no API keys required for transcription.
|
|
25
|
+
|
|
26
|
+
## What it does
|
|
27
|
+
|
|
28
|
+
Give it a local audio or video file. It:
|
|
29
|
+
|
|
30
|
+
1. Probes the spoken language (English or Chinese) using CrispASR's FireRed LID.
|
|
31
|
+
2. Starts a local CrispASR server with the right backend -- Cohere Transcribe
|
|
32
|
+
for English, Qwen3-ASR for Chinese.
|
|
33
|
+
3. Extracts audio from video with ffmpeg when needed.
|
|
34
|
+
4. Calls CrispASR's `/v1/audio/transcriptions` endpoint.
|
|
35
|
+
5. Writes the transcript and metadata to disk.
|
|
36
|
+
|
|
37
|
+
Everything runs on your machine. Media never leaves it.
|
|
38
|
+
|
|
39
|
+
## Quick install for Codex
|
|
40
|
+
|
|
41
|
+
The plugin includes the Codex Skill, command-line tool, and MCP server. Media
|
|
42
|
+
stays on your computer. Model files are never downloaded automatically.
|
|
43
|
+
|
|
44
|
+
### 1. Install prerequisites
|
|
45
|
+
|
|
46
|
+
Install Node.js 20 or newer, [uv](https://docs.astral.sh/uv/), and
|
|
47
|
+
[ffmpeg](https://ffmpeg.org/). The installer uses `uv` to provide Python.
|
|
48
|
+
|
|
49
|
+
```powershell
|
|
50
|
+
node --version
|
|
51
|
+
uv --version
|
|
52
|
+
ffmpeg -version
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### 2. Run the installer
|
|
56
|
+
|
|
57
|
+
```powershell
|
|
58
|
+
npx @emiyakatuz/crispasr-agent-transcriber@latest install
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
The installer:
|
|
62
|
+
|
|
63
|
+
- downloads the matching GitHub Release and verifies its SHA-256 checksum;
|
|
64
|
+
- installs the plugin under `~/plugins/crispasr-agent-transcriber`;
|
|
65
|
+
- installs the Python and MCP dependencies;
|
|
66
|
+
- detects CUDA, Vulkan, or CPU and installs the best CrispASR build;
|
|
67
|
+
- registers the plugin in the Codex Personal marketplace;
|
|
68
|
+
- preserves existing models, binaries, and outputs during updates.
|
|
69
|
+
|
|
70
|
+
### 3. Add the local models
|
|
71
|
+
|
|
72
|
+
When a model is missing, the installer prints its official source and stops.
|
|
73
|
+
Download the three files listed under [Required models](#required-models) into:
|
|
74
|
+
|
|
75
|
+
```text
|
|
76
|
+
~/plugins/crispasr-agent-transcriber/models/
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Then verify the complete installation:
|
|
80
|
+
|
|
81
|
+
```powershell
|
|
82
|
+
npx @emiyakatuz/crispasr-agent-transcriber@latest doctor
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### 4. Enable the plugin
|
|
86
|
+
|
|
87
|
+
With a Codex build that supports plugin commands, run:
|
|
88
|
+
|
|
89
|
+
```powershell
|
|
90
|
+
codex plugin add crispasr-agent-transcriber@personal
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
If the CLI has no `codex plugin` command, open the Codex desktop Plugins view
|
|
94
|
+
and install **CrispASR Transcriber** from the Personal marketplace. Start a new
|
|
95
|
+
conversation, then ask:
|
|
96
|
+
|
|
97
|
+
```text
|
|
98
|
+
Transcribe C:\path\to\sample.mp4 with CrispASR using auto language detection.
|
|
99
|
+
Save a verbose JSON transcript and an SRT subtitle file.
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Update or uninstall
|
|
103
|
+
|
|
104
|
+
```powershell
|
|
105
|
+
npx @emiyakatuz/crispasr-agent-transcriber@latest update
|
|
106
|
+
npx @emiyakatuz/crispasr-agent-transcriber@latest uninstall
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Uninstall preserves local models, CrispASR binaries, and outputs. Use
|
|
110
|
+
`uninstall --purge-data` only when those files should also be deleted. See
|
|
111
|
+
[Plugin installation](docs/plugin_install.md) for manual installation and
|
|
112
|
+
troubleshooting.
|
|
113
|
+
|
|
114
|
+
## Direct command-line use
|
|
115
|
+
|
|
116
|
+
After installation, you can run the transcription script without Codex:
|
|
117
|
+
|
|
118
|
+
```powershell
|
|
119
|
+
Set-Location (Join-Path $HOME "plugins\crispasr-agent-transcriber")
|
|
120
|
+
uv run python scripts/transcribe.py sample.mp4 --profile auto `
|
|
121
|
+
--manage-server `
|
|
122
|
+
--lid-backend firered --lid-model models\firered-lid-q2_k.gguf `
|
|
123
|
+
--model models\cohere-transcribe.gguf `
|
|
124
|
+
--format verbose_json
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Use with other AI agents
|
|
128
|
+
|
|
129
|
+
The MCP server is the cross-agent interface. Any agent that supports MCP stdio
|
|
130
|
+
can run the released package directly from GitHub:
|
|
131
|
+
|
|
132
|
+
```powershell
|
|
133
|
+
uvx --from "crispasr-agent-transcriber[mcp] @ git+https://github.com/EmiyaKatuz/crispasr-agent-transcriber.git@v0.3.2" crispasr-agent-mcp
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Use the same command and arguments in Claude Desktop, Cursor, or another MCP
|
|
137
|
+
client. See [AI agent integrations](docs/agent_integrations.md) for a generic
|
|
138
|
+
MCP configuration and Codex CLI command.
|
|
139
|
+
|
|
140
|
+
## Maintainer publishing
|
|
141
|
+
|
|
142
|
+
End users do not need the release steps. Maintainers should follow the
|
|
143
|
+
[publishing guide](docs/publishing.md) for Codex Marketplace, PyPI, MCP
|
|
144
|
+
Registry, and cross-agent distribution.
|
|
145
|
+
|
|
146
|
+
## Required models
|
|
147
|
+
|
|
148
|
+
This tool does **not** download models automatically. Download these three
|
|
149
|
+
GGUF files and keep them in a local directory (the repo's `models/` folder
|
|
150
|
+
works well):
|
|
151
|
+
|
|
152
|
+
| Purpose | File | ~Size | Source |
|
|
153
|
+
|---|---|---|---|
|
|
154
|
+
| English ASR | `cohere-transcribe.gguf` | 3.9 GB | [Cohere on HuggingFace](https://huggingface.co/cstr) |
|
|
155
|
+
| Chinese ASR | `qwen3-asr-1.7b-q4_k.gguf` | 1.3 GB | [Qwen3-ASR GGUF](https://huggingface.co/cstr/qwen3-asr-1.7b-GGUF) |
|
|
156
|
+
| Language detection | `firered-lid-q2_k.gguf` | 350 MB | [FireRed LID GGUF](https://huggingface.co/cstr/firered-lid-GGUF) |
|
|
157
|
+
|
|
158
|
+
Pass them on every run:
|
|
159
|
+
|
|
160
|
+
```powershell
|
|
161
|
+
--model models\cohere-transcribe.gguf
|
|
162
|
+
--lid-backend firered --lid-model models\firered-lid-q2_k.gguf
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## CrispASR binary management
|
|
166
|
+
|
|
167
|
+
The tool auto-detects, installs, and updates the CrispASR binary from
|
|
168
|
+
[GitHub releases](https://github.com/CrispStrobe/CrispASR/releases).
|
|
169
|
+
|
|
170
|
+
| Flag | Effect |
|
|
171
|
+
|---|---|
|
|
172
|
+
| `--install-crispasr` | Download latest platform binary to `bin/` |
|
|
173
|
+
| `--update-crispasr` | Upgrade to newest release |
|
|
174
|
+
| `--crispasr-status` | Show installed version + update availability |
|
|
175
|
+
| `--crispasr-bin-dir PATH` | Custom directory (default `./bin`) |
|
|
176
|
+
| `--crispasr-bin PATH` | Exact path to `crispasr.exe` |
|
|
177
|
+
|
|
178
|
+
When `--manage-server` is set and no binary is found, it auto-installs before
|
|
179
|
+
starting the server.
|
|
180
|
+
|
|
181
|
+
### GPU detection
|
|
182
|
+
|
|
183
|
+
On install and update, the tool checks your hardware:
|
|
184
|
+
|
|
185
|
+
1. **CUDA** -- `nvidia-smi` available, or `CUDA_PATH` / `CUDA_HOME` set, or
|
|
186
|
+
CUDA in `PATH` -> downloads `crispasr-*-cuda` variant.
|
|
187
|
+
2. **Vulkan** -- `vulkaninfo` or `VULKAN_SDK` set (only when CUDA is absent) ->
|
|
188
|
+
downloads `crispasr-*-vulkan` variant.
|
|
189
|
+
3. **CPU** -- fallback when no GPU toolkit is detected.
|
|
190
|
+
|
|
191
|
+
macOS always uses the universal binary.
|
|
192
|
+
|
|
193
|
+
## Profiles
|
|
194
|
+
|
|
195
|
+
| Profile | Backend | ASR model | Language hint |
|
|
196
|
+
|---|---|---|---|
|
|
197
|
+
| `english` | `cohere` | Cohere Transcribe 03-2026 | `en` |
|
|
198
|
+
| `chinese` | `qwen3-1.7b` | Qwen3-ASR 1.7B | `zh` |
|
|
199
|
+
| `auto` | determined by LID | determined by LID | detected |
|
|
200
|
+
|
|
201
|
+
`auto` mode runs FireRed language detection on the media, then routes English
|
|
202
|
+
to Cohere or Chinese to Qwen3-1.7B. Mixed or uncertain content stops with a
|
|
203
|
+
clear error asking you to re-run with `--profile english` or `--profile chinese`.
|
|
204
|
+
|
|
205
|
+
## Usage
|
|
206
|
+
|
|
207
|
+
### Managed server (tool starts CrispASR for you)
|
|
208
|
+
|
|
209
|
+
```powershell
|
|
210
|
+
uv run python scripts/transcribe.py sample.wav `
|
|
211
|
+
--profile auto `
|
|
212
|
+
--manage-server `
|
|
213
|
+
--model models\qwen3-asr-1.7b-q4_k.gguf `
|
|
214
|
+
--lid-backend firered --lid-model models\firered-lid-q2_k.gguf `
|
|
215
|
+
--format srt `
|
|
216
|
+
--out-dir outputs
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Add `--keep-server` to leave the server running after transcription.
|
|
220
|
+
|
|
221
|
+
### Manual server (you start CrispASR)
|
|
222
|
+
|
|
223
|
+
```powershell
|
|
224
|
+
# Terminal 1 -- start the server
|
|
225
|
+
crispasr --server --backend cohere `
|
|
226
|
+
-m models\cohere-transcribe.gguf `
|
|
227
|
+
--port 8080
|
|
228
|
+
|
|
229
|
+
# Terminal 2 -- transcribe
|
|
230
|
+
uv run python scripts/transcribe.py sample.mp4 `
|
|
231
|
+
--profile english `
|
|
232
|
+
--server-url http://127.0.0.1:8080 `
|
|
233
|
+
--format verbose_json
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
If the running server's backend doesn't match the selected profile, the tool
|
|
237
|
+
prints the exact command you need to start the correct server.
|
|
238
|
+
|
|
239
|
+
### Output formats
|
|
240
|
+
|
|
241
|
+
| `--format` | File extension | Contents |
|
|
242
|
+
|---|---|---|
|
|
243
|
+
| `text` | `.txt` | Plain transcript |
|
|
244
|
+
| `verbose_json` | `.json` | Full response with segments |
|
|
245
|
+
| `srt` | `.srt` | SubRip subtitles |
|
|
246
|
+
| `vtt` | `.vtt` | WebVTT subtitles |
|
|
247
|
+
|
|
248
|
+
A `.metadata.json` sidecar is always written alongside the transcript.
|
|
249
|
+
|
|
250
|
+
### Video files
|
|
251
|
+
|
|
252
|
+
Video files are detected automatically. ffmpeg extracts the audio track to a
|
|
253
|
+
temporary mono 16 kHz WAV before sending it to CrispASR. The temporary file
|
|
254
|
+
is deleted when transcription finishes.
|
|
255
|
+
|
|
256
|
+
### All CLI flags
|
|
257
|
+
|
|
258
|
+
```
|
|
259
|
+
--profile auto|english|chinese
|
|
260
|
+
--format text|verbose_json|srt|vtt
|
|
261
|
+
--out-dir PATH
|
|
262
|
+
--server-url URL
|
|
263
|
+
--allow-remote-server
|
|
264
|
+
--manage-server
|
|
265
|
+
--keep-server
|
|
266
|
+
--model PATH Local GGUF model path
|
|
267
|
+
--allow-model-auto-download
|
|
268
|
+
--lid-model PATH Local LID model path
|
|
269
|
+
--lid-backend firered|silero|ecapa|whisper
|
|
270
|
+
--host HOST Managed server host (default 127.0.0.1)
|
|
271
|
+
--port PORT Managed server port (default 8080)
|
|
272
|
+
--language CODE Language hint for transcription
|
|
273
|
+
--prompt TEXT Initial prompt/context
|
|
274
|
+
--vad Enable voice activity detection
|
|
275
|
+
--diarize Enable speaker diarization
|
|
276
|
+
--diarize-method METHOD
|
|
277
|
+
--hotwords WORD,WORD Comma-separated hotwords
|
|
278
|
+
--no-timestamps
|
|
279
|
+
--preprocess auto|always|never
|
|
280
|
+
--api-key KEY If CRISPASR_API_KEYS is enabled
|
|
281
|
+
--crispasr-bin-dir PATH
|
|
282
|
+
--crispasr-bin PATH
|
|
283
|
+
--install-crispasr
|
|
284
|
+
--update-crispasr
|
|
285
|
+
--crispasr-status
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
## MCP server
|
|
289
|
+
|
|
290
|
+
```powershell
|
|
291
|
+
uv sync --extra mcp
|
|
292
|
+
uv run --extra mcp crispasr-agent-mcp
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
Exposed tools:
|
|
296
|
+
|
|
297
|
+
| Tool | Description |
|
|
298
|
+
|---|---|
|
|
299
|
+
| `crispasr_health` | Check CrispASR server health |
|
|
300
|
+
| `crispasr_backends` | List available backends |
|
|
301
|
+
| `crispasr_detect_language` | Run language detection on a file |
|
|
302
|
+
| `transcribe_audio` | Transcribe an audio file |
|
|
303
|
+
| `transcribe_video` | Transcribe a video file |
|
|
304
|
+
| `transcribe_folder` | Batch-transcribe a folder |
|
|
305
|
+
|
|
306
|
+
## Security model
|
|
307
|
+
|
|
308
|
+
- **No cloud uploads.** Media files stay on the local filesystem.
|
|
309
|
+
- **No remote servers by default.** `--server-url` only accepts localhost
|
|
310
|
+
unless `--allow-remote-server` is explicitly passed.
|
|
311
|
+
- **No URL inputs.** Only local file paths are accepted. URLs, S3, and other
|
|
312
|
+
remote schemes are rejected.
|
|
313
|
+
- **No shell injection.** ffmpeg is called with argument lists and
|
|
314
|
+
`shell=False`. No user-controlled strings are interpolated into shell
|
|
315
|
+
commands.
|
|
316
|
+
- **No model downloads by default.** CrispASR model auto-download (`-m auto`)
|
|
317
|
+
requires `--allow-model-auto-download`. The same guard applies to language
|
|
318
|
+
detection models.
|
|
319
|
+
- **Temporary files are cleaned up.** Converted WAV files and LID probe
|
|
320
|
+
windows are deleted when transcription finishes.
|
|
321
|
+
- **Binary downloads are explicit.** CrispASR binary installs only from the
|
|
322
|
+
official `CrispStrobe/CrispASR` GitHub releases.
|
|
323
|
+
- **Verified plugin releases.** The npm installer requires the plugin ZIP to
|
|
324
|
+
match the SHA-256 value published in the same GitHub Release.
|
|
325
|
+
- **Narrow installer writes.** The installer manages only its plugin directory
|
|
326
|
+
and the named Personal marketplace entry. Updates preserve local models,
|
|
327
|
+
binaries, and outputs.
|
|
328
|
+
|
|
329
|
+
## Verify
|
|
330
|
+
|
|
331
|
+
```powershell
|
|
332
|
+
uv run pytest
|
|
333
|
+
uv run ruff check . # zero lint warnings
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
## License
|
|
337
|
+
|
|
338
|
+
This project is licensed under the [MIT License](LICENSE).
|
|
339
|
+
|
|
340
|
+
### Third-party components and attribution
|
|
341
|
+
|
|
342
|
+
This tool orchestrates several independently-licensed projects. It does not
|
|
343
|
+
bundle, fork, or redistribute their code -- it downloads pre-built binaries
|
|
344
|
+
and calls them as subprocesses or HTTP services at runtime.
|
|
345
|
+
|
|
346
|
+
| Component | License | Role |
|
|
347
|
+
|---|---|---|
|
|
348
|
+
| [CrispASR](https://github.com/CrispStrobe/CrispASR) | MIT | ASR engine, server, language detection |
|
|
349
|
+
| [ffmpeg](https://ffmpeg.org/) | LGPL 2.1+ / GPL 2+ | Media decoding and audio extraction |
|
|
350
|
+
| [Cohere Transcribe 03-2026](https://huggingface.co/cstr) | Cohere model license | English ASR model (loaded by CrispASR) |
|
|
351
|
+
| [Qwen3-ASR 1.7B](https://huggingface.co/cstr/qwen3-asr-1.7b-GGUF) | Apache 2.0 | Chinese ASR model (loaded by CrispASR) |
|
|
352
|
+
| [FireRed LID](https://huggingface.co/cstr/firered-lid-GGUF) | Apache 2.0 | Language detection model (loaded by CrispASR) |
|
|
353
|
+
| [httpx](https://github.com/encode/httpx) | BSD | HTTP client for CrispASR API |
|
|
354
|
+
| [MCP Python SDK](https://github.com/modelcontextprotocol/python-sdk) | MIT | MCP server framework |
|
|
355
|
+
| [Node.js](https://nodejs.org/) | MIT | npm installer runtime |
|
|
356
|
+
| [adm-zip](https://github.com/cthackers/adm-zip) | MIT | Verified plugin ZIP extraction |
|
|
357
|
+
|
|
358
|
+
Model files must be downloaded separately by the user from their respective
|
|
359
|
+
HuggingFace repositories. See [Required models](#required-models) above.
|
|
360
|
+
|
|
361
|
+
## Related projects
|
|
362
|
+
|
|
363
|
+
- [CrispASR](https://github.com/CrispStrobe/CrispASR) -- the ASR engine this
|
|
364
|
+
tool wraps
|
|
365
|
+
- [CrisperWeaver](https://github.com/CrispStrobe/CrisperWeaver) -- CrispASR's
|
|
366
|
+
desktop GUI (not used by this tool)
|