whspr 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,30 +1,35 @@
1
1
  # whspr
2
2
 
3
+ [![npm version](https://img.shields.io/npm/v/whspr.svg)](https://www.npmjs.com/package/whspr)
4
+ [![MIT License](https://img.shields.io/badge/License-MIT-green.svg)](https://choosealicense.com/licenses/mit/)
5
+ [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](./CONTRIBUTING.md)
6
+
3
7
  A CLI tool that records audio from your microphone, transcribes it using Groq's Whisper API, and post-processes the transcription with AI to fix errors and apply custom vocabulary.
4
8
 
5
- ## Features
9
+ <p align="center">
10
+ <img src="./demo.gif" alt="whspr demo" width="600">
11
+ </p>
6
12
 
7
- - Live audio waveform visualization in the terminal
8
- - 15-minute max recording time
9
- - Transcription via Groq Whisper API
10
- - AI-powered post-processing to fix transcription errors
11
- - Custom vocabulary support via `WHSPR.md`
12
- - Automatic clipboard copy
13
+ ## Installation
13
14
 
14
- ## Requirements
15
+ ```bash
16
+ npm install -g whspr
17
+ ```
15
18
 
16
- - Node.js 18+
17
- - FFmpeg (`brew install ffmpeg` on macOS)
18
- - Groq API key
19
+ ### Optional: Alias as `whisper`
19
20
 
20
- ## Installation
21
+ If you'd like to use `whisper` instead of `whspr`, add this to your shell config (`~/.zshrc` or `~/.bashrc`):
21
22
 
22
23
  ```bash
23
- npm install
24
- npm run build
25
- npm link
24
+ alias whisper="whspr"
26
25
  ```
27
26
 
27
+ ## Requirements
28
+
29
+ - Node.js 18+
30
+ - FFmpeg (`brew install ffmpeg` on macOS)
31
+ - Groq API key
32
+
28
33
  ## Usage
29
34
 
30
35
  ```bash
@@ -40,9 +45,18 @@ whspr --verbose
40
45
 
41
46
  Press **Enter** to stop recording.
42
47
 
48
+ ## Features
49
+
50
+ - Live audio waveform visualization in the terminal
51
+ - 15-minute max recording time
52
+ - Transcription via Groq Whisper API
53
+ - AI-powered post-processing to fix transcription errors
54
+ - Custom vocabulary support via `WHSPR.md`
55
+ - Automatic clipboard copy
56
+
43
57
  ## Custom Vocabulary
44
58
 
45
- Create a `WHSPR.md` file in your current directory to provide custom vocabulary, names, or instructions for the AI post-processor:
59
+ Create a `WHSPR.md` (or `WHISPER.md`) file in your current directory to provide custom vocabulary, names, or instructions for the AI post-processor:
46
60
 
47
61
  ```markdown
48
62
  # Custom Vocabulary
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
package/dist/index.js ADDED
@@ -0,0 +1,123 @@
1
+ #!/usr/bin/env node
2
+ import { record, convertToMp3 } from "./recorder.js";
3
+ import { transcribe } from "./transcribe.js";
4
+ import { postprocess } from "./postprocess.js";
5
+ import { copyToClipboard } from "./utils/clipboard.js";
6
+ import chalk from "chalk";
7
+ import fs from "fs";
8
+ import path from "path";
9
+ import os from "os";
10
+ const verbose = process.argv.includes("--verbose") || process.argv.includes("-v");
11
+ function status(message) {
12
+ process.stdout.write(`\x1b[2K\r${chalk.blue(message)}`);
13
+ }
14
+ function clearStatus() {
15
+ process.stdout.write("\x1b[2K\r");
16
+ }
17
+ function formatDuration(seconds) {
18
+ const mins = Math.floor(seconds / 60);
19
+ const secs = seconds % 60;
20
+ if (mins > 0) {
21
+ return `${mins}m ${secs}s`;
22
+ }
23
+ return `${secs}s`;
24
+ }
25
+ async function main() {
26
+ // Check for API key before recording
27
+ if (!process.env.GROQ_API_KEY) {
28
+ console.error(chalk.red("Error: GROQ_API_KEY environment variable is not set"));
29
+ console.log(chalk.gray("Get your API key at https://console.groq.com/keys"));
30
+ console.log(chalk.gray("Then run: export GROQ_API_KEY=\"your-api-key\""));
31
+ process.exit(1);
32
+ }
33
+ try {
34
+ // 1. Record audio
35
+ const recording = await record(verbose);
36
+ const processStart = Date.now();
37
+ // 2. Convert to MP3
38
+ status("Converting to MP3...");
39
+ const mp3Path = await convertToMp3(recording.path);
40
+ try {
41
+ // 3. Transcribe with Whisper
42
+ status("Transcribing...");
43
+ const rawText = await transcribe(mp3Path);
44
+ if (verbose) {
45
+ clearStatus();
46
+ console.log(chalk.gray(`Raw: ${rawText}`));
47
+ }
48
+ // 4. Read WHSPR.md or WHISPER.md if exists
49
+ const whsprMdPath = path.join(process.cwd(), "WHSPR.md");
50
+ const whisperMdPath = path.join(process.cwd(), "WHISPER.md");
51
+ let customPrompt = null;
52
+ let vocabFile = null;
53
+ if (fs.existsSync(whsprMdPath)) {
54
+ customPrompt = fs.readFileSync(whsprMdPath, "utf-8");
55
+ vocabFile = "WHSPR.md";
56
+ }
57
+ else if (fs.existsSync(whisperMdPath)) {
58
+ customPrompt = fs.readFileSync(whisperMdPath, "utf-8");
59
+ vocabFile = "WHISPER.md";
60
+ }
61
+ if (customPrompt && verbose) {
62
+ console.log(chalk.gray(`Using custom vocabulary from ${vocabFile}`));
63
+ }
64
+ // 5. Post-process
65
+ status("Post-processing...");
66
+ const fixedText = await postprocess(rawText, customPrompt);
67
+ // 6. Output and copy
68
+ clearStatus();
69
+ const processTime = ((Date.now() - processStart) / 1000).toFixed(1);
70
+ const wordCount = fixedText.trim().split(/\s+/).filter(w => w.length > 0).length;
71
+ const charCount = fixedText.length;
72
+ // Log stats
73
+ console.log(chalk.dim("Audio: ") + chalk.white(formatDuration(recording.durationSeconds)) +
74
+ chalk.dim(" • Processing: ") + chalk.white(processTime + "s"));
75
+ // Draw box
76
+ const termWidth = Math.min(process.stdout.columns || 60, 80);
77
+ const lineWidth = termWidth - 2;
78
+ const label = " TRANSCRIPT ";
79
+ console.log(chalk.dim("┌─") + chalk.cyan(label) + chalk.dim("─".repeat(lineWidth - label.length - 1) + "┐"));
80
+ const lines = fixedText.split("\n");
81
+ for (const line of lines) {
82
+ // Wrap long lines
83
+ let remaining = line;
84
+ while (remaining.length > 0) {
85
+ const chunk = remaining.slice(0, lineWidth - 2);
86
+ remaining = remaining.slice(lineWidth - 2);
87
+ console.log(chalk.dim("│ ") + chalk.white(chunk.padEnd(lineWidth - 2)) + chalk.dim(" │"));
88
+ }
89
+ if (line.length === 0) {
90
+ console.log(chalk.dim("│ " + " ".repeat(lineWidth - 2) + " │"));
91
+ }
92
+ }
93
+ const stats = ` ${wordCount} words • ${charCount} chars `;
94
+ const bottomLine = "─".repeat(lineWidth - stats.length - 1) + " ";
95
+ console.log(chalk.dim("└" + bottomLine) + chalk.dim(stats) + chalk.dim("┘"));
96
+ await copyToClipboard(fixedText);
97
+ console.log(chalk.green("✓") + chalk.gray(" Copied to clipboard"));
98
+ // 7. Clean up
99
+ fs.unlinkSync(mp3Path);
100
+ }
101
+ catch (error) {
102
+ clearStatus();
103
+ // Save recording on failure
104
+ const backupDir = path.join(os.homedir(), ".whspr", "recordings");
105
+ fs.mkdirSync(backupDir, { recursive: true });
106
+ const backupPath = path.join(backupDir, `recording-${Date.now()}.mp3`);
107
+ fs.renameSync(mp3Path, backupPath);
108
+ console.error(chalk.red(`Error: ${error}`));
109
+ console.log(chalk.yellow(`Recording saved to: ${backupPath}`));
110
+ process.exit(1);
111
+ }
112
+ }
113
+ catch (error) {
114
+ clearStatus();
115
+ // Silent exit on user cancel
116
+ if (error instanceof Error && error.message === "cancelled") {
117
+ process.exit(0);
118
+ }
119
+ console.error(chalk.red(`Recording error: ${error}`));
120
+ process.exit(1);
121
+ }
122
+ }
123
+ main();
@@ -0,0 +1 @@
1
+ export declare function postprocess(rawTranscription: string, customPrompt: string | null): Promise<string>;
@@ -0,0 +1,30 @@
1
+ import { generateObject } from "ai";
2
+ import { z } from "zod";
3
+ import { withRetry } from "./utils/retry.js";
4
+ import { groq } from "./utils/groq.js";
5
+ const MODEL = "openai/gpt-oss-120b";
6
+ const outputSchema = z.object({
7
+ fixed_transcription: z.string(),
8
+ });
9
+ export async function postprocess(rawTranscription, customPrompt) {
10
+ const result = await withRetry(async () => {
11
+ const response = await generateObject({
12
+ model: groq(MODEL),
13
+ schema: outputSchema,
14
+ messages: [
15
+ {
16
+ role: "system",
17
+ content: "Your task is to clean up/fix transcribed text generated from mic input by the user according to the user's own prompt, this prompt may contain custom vocabulary, instructions, etc. Please return the user's transcription with the fixes made (e.g. the AI might hear \"PostgreSQL\" as \"post crest QL\" you need to use your own reasoning to fix these mistakes in the transcription)"
18
+ },
19
+ {
20
+ role: "user",
21
+ content: customPrompt
22
+ ? `Here's my custom user prompt:\n\`\`\`\n${customPrompt}\n\`\`\`\n\nHere's my raw transcription output that I need you to edit:\n\`\`\`\n${rawTranscription}\n\`\`\``
23
+ : `Here's my raw transcription output that I need you to edit:\n\`\`\`\n${rawTranscription}\n\`\`\``
24
+ }
25
+ ],
26
+ });
27
+ return response.object;
28
+ }, 3, "postprocess");
29
+ return result.fixed_transcription;
30
+ }
@@ -0,0 +1,6 @@
1
+ export interface RecordingResult {
2
+ path: string;
3
+ durationSeconds: number;
4
+ }
5
+ export declare function record(verbose?: boolean): Promise<RecordingResult>;
6
+ export declare function convertToMp3(wavPath: string): Promise<string>;
@@ -0,0 +1,206 @@
1
+ import { spawn } from "child_process";
2
+ import fs from "fs";
3
+ import path from "path";
4
+ import os from "os";
5
+ import chalk from "chalk";
6
+ const MAX_DURATION_SECONDS = 900; // 15 minutes
7
+ const DEFAULT_WAVE_WIDTH = 60;
8
+ const STATUS_TEXT_WIDTH = 45; // " Recording [00:00 / 15:00] Press Enter to stop"
9
+ // Horizontal bar characters for waveform (quiet to loud)
10
+ const WAVE_CHARS = ["·", "-", "=", "≡", "■", "█"];
11
+ function formatTime(seconds) {
12
+ const mins = Math.floor(seconds / 60);
13
+ const secs = seconds % 60;
14
+ return `${mins.toString().padStart(2, "0")}:${secs.toString().padStart(2, "0")}`;
15
+ }
16
+ function dbToChar(db) {
17
+ // Adjusted range: -45 (quiet) to -18 (normal speech peaks)
18
+ const clamped = Math.max(-45, Math.min(-18, db));
19
+ const normalized = (clamped + 45) / 27;
20
+ const index = Math.min(WAVE_CHARS.length - 1, Math.floor(normalized * WAVE_CHARS.length));
21
+ return WAVE_CHARS[index];
22
+ }
23
+ function getWaveWidth() {
24
+ const termWidth = process.stdout.columns || 80;
25
+ // If terminal is wide enough for single line, use default
26
+ if (termWidth >= DEFAULT_WAVE_WIDTH + STATUS_TEXT_WIDTH) {
27
+ return DEFAULT_WAVE_WIDTH;
28
+ }
29
+ // Otherwise, use full terminal width for wave (will wrap text to next line)
30
+ return Math.max(10, termWidth - 2);
31
+ }
32
+ export async function record(verbose = false) {
33
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "whspr-"));
34
+ const wavPath = path.join(tmpDir, "recording.wav");
35
+ return new Promise((resolve, reject) => {
36
+ // Initialize waveform buffer
37
+ let waveWidth = getWaveWidth();
38
+ const waveBuffer = new Array(waveWidth).fill(" ");
39
+ let currentDb = -60;
40
+ let cancelled = false;
41
+ // Spawn FFmpeg with ebur128 filter to get volume levels
42
+ const ffmpeg = spawn("ffmpeg", [
43
+ "-f",
44
+ "avfoundation",
45
+ "-i",
46
+ ":0",
47
+ "-af",
48
+ "ebur128=peak=true",
49
+ "-t",
50
+ MAX_DURATION_SECONDS.toString(),
51
+ "-y",
52
+ wavPath,
53
+ ], {
54
+ stdio: ["pipe", "pipe", "pipe"],
55
+ });
56
+ let elapsedSeconds = 0;
57
+ let stopped = false;
58
+ function renderTUI() {
59
+ const elapsed = formatTime(elapsedSeconds);
60
+ const max = formatTime(MAX_DURATION_SECONDS);
61
+ const wave = waveBuffer.join("");
62
+ const termWidth = process.stdout.columns || 80;
63
+ const singleLineWidth = waveWidth + STATUS_TEXT_WIDTH;
64
+ if (termWidth >= singleLineWidth) {
65
+ // Single line layout
66
+ process.stdout.write(`\x1b[2K\r${chalk.cyan(wave)} ${chalk.blue("Recording")} [${chalk.yellow(elapsed)} / ${max}] ${chalk.gray("Press Enter to stop")}`);
67
+ }
68
+ else {
69
+ // Two line layout: wave on first line, status on second
70
+ process.stdout.write(`\x1b[2K\r${chalk.cyan(wave)}\n\x1b[2K${chalk.blue("Recording")} [${chalk.yellow(elapsed)} / ${max}] ${chalk.gray("Press Enter to stop")}\x1b[A\r`);
71
+ }
72
+ }
73
+ // Update timer every second
74
+ const timer = setInterval(() => {
75
+ if (stopped)
76
+ return;
77
+ elapsedSeconds++;
78
+ renderTUI();
79
+ if (elapsedSeconds >= MAX_DURATION_SECONDS) {
80
+ clearInterval(timer);
81
+ }
82
+ }, 1000);
83
+ // Update waveform more frequently
84
+ const waveTimer = setInterval(() => {
85
+ if (stopped)
86
+ return;
87
+ // Push new character based on current dB level
88
+ waveBuffer.shift();
89
+ waveBuffer.push(dbToChar(currentDb));
90
+ renderTUI();
91
+ }, 50);
92
+ // Initial display
93
+ renderTUI();
94
+ // Parse stderr for volume levels from ebur128
95
+ ffmpeg.stderr?.on("data", (data) => {
96
+ const output = data.toString();
97
+ // Look for FTPK (frame true peak) from ebur128 output
98
+ // Format: "FTPK: -XX.X -XX.X dBFS"
99
+ const ftpkMatch = output.match(/FTPK:\s*(-?[\d.]+)\s+(-?[\d.]+)\s+dBFS/);
100
+ if (ftpkMatch) {
101
+ // Average the left and right channels
102
+ const left = parseFloat(ftpkMatch[1]);
103
+ const right = parseFloat(ftpkMatch[2]);
104
+ if (!isNaN(left) && !isNaN(right)) {
105
+ currentDb = (left + right) / 2;
106
+ }
107
+ }
108
+ });
109
+ // Listen for Enter to stop, Ctrl+C to cancel
110
+ const onKeypress = (data) => {
111
+ const key = data.toString();
112
+ const isEnter = key.includes("\n") || key.includes("\r");
113
+ const isCtrlC = key.includes("\x03");
114
+ if (isEnter || isCtrlC) {
115
+ stopped = true;
116
+ cancelled = isCtrlC;
117
+ clearInterval(timer);
118
+ clearInterval(waveTimer);
119
+ process.stdin.removeListener("data", onKeypress);
120
+ process.stdin.setRawMode(false);
121
+ process.stdin.pause();
122
+ // Send SIGINT to FFmpeg to stop gracefully
123
+ ffmpeg.kill("SIGINT");
124
+ }
125
+ };
126
+ if (process.stdin.isTTY) {
127
+ process.stdin.setRawMode(true);
128
+ process.stdin.resume();
129
+ process.stdin.on("data", onKeypress);
130
+ }
131
+ ffmpeg.on("close", (code) => {
132
+ clearInterval(timer);
133
+ clearInterval(waveTimer);
134
+ const termWidth = process.stdout.columns || 80;
135
+ const singleLineWidth = waveWidth + STATUS_TEXT_WIDTH;
136
+ if (termWidth >= singleLineWidth) {
137
+ process.stdout.write("\x1b[2K\r"); // Clear the line
138
+ }
139
+ else {
140
+ process.stdout.write("\x1b[2K\n\x1b[2K\x1b[A\r"); // Clear both lines
141
+ }
142
+ if (cancelled) {
143
+ // User pressed Ctrl+C - clean up and reject
144
+ if (fs.existsSync(wavPath)) {
145
+ fs.unlinkSync(wavPath);
146
+ }
147
+ reject(new Error("cancelled"));
148
+ }
149
+ else if (stopped || code === 0 || code === 255) {
150
+ // FFmpeg returns 255 when interrupted with SIGINT
151
+ if (fs.existsSync(wavPath)) {
152
+ if (verbose) {
153
+ console.log(chalk.green(`Recording complete (${formatTime(elapsedSeconds)})`));
154
+ }
155
+ resolve({ path: wavPath, durationSeconds: elapsedSeconds });
156
+ }
157
+ else {
158
+ reject(new Error("Recording failed: no output file created"));
159
+ }
160
+ }
161
+ else {
162
+ reject(new Error(`FFmpeg exited with code ${code}`));
163
+ }
164
+ });
165
+ ffmpeg.on("error", (err) => {
166
+ clearInterval(timer);
167
+ clearInterval(waveTimer);
168
+ stopped = true;
169
+ if (process.stdin.isTTY) {
170
+ process.stdin.setRawMode(false);
171
+ process.stdin.pause();
172
+ }
173
+ reject(new Error(`Failed to start FFmpeg: ${err.message}`));
174
+ });
175
+ });
176
+ }
177
+ export async function convertToMp3(wavPath) {
178
+ const mp3Path = wavPath.replace(/\.wav$/, ".mp3");
179
+ return new Promise((resolve, reject) => {
180
+ const ffmpeg = spawn("ffmpeg", [
181
+ "-i",
182
+ wavPath,
183
+ "-codec:a",
184
+ "libmp3lame",
185
+ "-qscale:a",
186
+ "2",
187
+ "-y",
188
+ mp3Path,
189
+ ], {
190
+ stdio: ["pipe", "pipe", "pipe"],
191
+ });
192
+ ffmpeg.on("close", (code) => {
193
+ if (code === 0) {
194
+ // Delete the WAV file after successful conversion
195
+ fs.unlinkSync(wavPath);
196
+ resolve(mp3Path);
197
+ }
198
+ else {
199
+ reject(new Error(`MP3 conversion failed with code ${code}`));
200
+ }
201
+ });
202
+ ffmpeg.on("error", (err) => {
203
+ reject(new Error(`Failed to convert to MP3: ${err.message}`));
204
+ });
205
+ });
206
+ }
@@ -0,0 +1 @@
1
+ export declare function transcribe(audioPath: string): Promise<string>;
@@ -0,0 +1,12 @@
1
+ import Groq from "groq-sdk";
2
+ import fs from "fs";
3
+ const groq = new Groq(); // Uses GROQ_API_KEY env var
4
+ export async function transcribe(audioPath) {
5
+ const transcription = await groq.audio.transcriptions.create({
6
+ file: fs.createReadStream(audioPath),
7
+ model: "whisper-large-v3-turbo",
8
+ temperature: 0,
9
+ language: "en",
10
+ });
11
+ return transcription.text;
12
+ }
@@ -0,0 +1,8 @@
1
+ export interface RecordingResult {
2
+ wavPath: string;
3
+ duration: number;
4
+ }
5
+ export interface TranscriptionResult {
6
+ text: string;
7
+ language?: string;
8
+ }
package/dist/types.js ADDED
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export declare function copyToClipboard(text: string): Promise<void>;
@@ -0,0 +1,4 @@
1
+ import clipboard from "clipboardy";
2
+ export async function copyToClipboard(text) {
3
+ await clipboard.write(text);
4
+ }
@@ -0,0 +1 @@
1
+ export declare const groq: import("@ai-sdk/groq").GroqProvider;
@@ -1,3 +1,2 @@
1
1
  import { createGroq } from "@ai-sdk/groq";
2
-
3
2
  export const groq = createGroq();
@@ -0,0 +1 @@
1
+ export declare function withRetry<T>(fn: () => Promise<T>, maxAttempts?: number, label?: string): Promise<T>;
@@ -0,0 +1,16 @@
1
+ export async function withRetry(fn, maxAttempts = 3, label = "API call") {
2
+ let lastError;
3
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
4
+ try {
5
+ return await fn();
6
+ }
7
+ catch (error) {
8
+ lastError = error instanceof Error ? error : new Error(String(error));
9
+ console.warn(`${label} attempt ${attempt}/${maxAttempts} failed:`, lastError.message);
10
+ if (attempt < maxAttempts) {
11
+ await new Promise((resolve) => setTimeout(resolve, 1000 * attempt));
12
+ }
13
+ }
14
+ }
15
+ throw lastError;
16
+ }
package/package.json CHANGED
@@ -1,21 +1,32 @@
1
1
  {
2
2
  "name": "whspr",
3
- "version": "1.0.0",
3
+ "version": "1.0.2",
4
4
  "description": "CLI tool for audio transcription with Groq Whisper API",
5
5
  "type": "module",
6
6
  "bin": {
7
7
  "whspr": "./bin/whspr.js"
8
8
  },
9
- "keywords": ["whisper", "transcription", "audio", "cli", "groq"],
9
+ "keywords": [
10
+ "whisper",
11
+ "transcription",
12
+ "audio",
13
+ "cli",
14
+ "groq"
15
+ ],
10
16
  "author": "Merkie",
11
17
  "license": "MIT",
12
18
  "repository": {
13
19
  "type": "git",
14
20
  "url": "https://github.com/Merkie/whspr"
15
21
  },
22
+ "files": [
23
+ "bin",
24
+ "dist"
25
+ ],
16
26
  "scripts": {
17
27
  "build": "tsc",
18
- "dev": "tsx src/index.ts"
28
+ "dev": "tsx src/index.ts",
29
+ "prepublishOnly": "npm run build"
19
30
  },
20
31
  "dependencies": {
21
32
  "@ai-sdk/groq": "^1.x",
@@ -1,32 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Read",
5
- "Edit",
6
- "Write",
7
- "Glob",
8
- "Grep",
9
- "Bash(npm:*)",
10
- "Bash(npx:*)",
11
- "Bash(git:*)",
12
- "Bash(whspr:*)"
13
- ],
14
- "deny": [
15
- "Bash(rm -rf:*)",
16
- "Bash(sudo:*)"
17
- ]
18
- },
19
- "hooks": {
20
- "PostToolUse": [
21
- {
22
- "matcher": "Edit|Write",
23
- "hooks": [
24
- {
25
- "type": "command",
26
- "command": "npx prettier --write $CLAUDE_FILE_PATHS"
27
- }
28
- ]
29
- }
30
- ]
31
- }
32
- }
package/CLAUDE.md DELETED
@@ -1,61 +0,0 @@
1
- # whspr
2
-
3
- A CLI tool that records audio from your microphone, transcribes it using Groq's Whisper API, and post-processes with AI to fix errors.
4
-
5
- ## Stack
6
-
7
- - Language: TypeScript (ES2022, NodeNext modules)
8
- - Runtime: Node.js 18+
9
- - Package manager: npm
10
- - External: FFmpeg (required for audio recording)
11
-
12
- ## Structure
13
-
14
- - `src/` - Main source code
15
- - `index.ts` - CLI entry point and main flow
16
- - `recorder.ts` - FFmpeg audio recording with waveform TUI
17
- - `transcribe.ts` - Groq Whisper API integration
18
- - `postprocess.ts` - AI post-processing for corrections
19
- - `utils/` - Shared utilities (retry, clipboard, groq client)
20
- - `bin/whspr.js` - CLI entrypoint
21
- - `dist/` - Compiled output
22
-
23
- ## Commands
24
-
25
- ```bash
26
- # Install dependencies
27
- npm install
28
-
29
- # Build
30
- npm run build
31
-
32
- # Development (run without build)
33
- npm run dev
34
-
35
- # Link globally after build
36
- npm link
37
-
38
- # Run the CLI
39
- whspr
40
- whspr --verbose
41
- ```
42
-
43
- ## Environment
44
-
45
- Requires `GROQ_API_KEY` environment variable.
46
-
47
- ## Key Conventions
48
-
49
- - Uses Groq SDK for both Whisper transcription and AI post-processing
50
- - Recording uses FFmpeg's avfoundation (macOS) with ebur128 for volume levels
51
- - Max recording duration: 15 minutes
52
- - Failed recordings are saved to `~/.whspr/recordings/` for recovery
53
- - Custom vocabulary via `WHSPR.md` in current directory
54
-
55
- ## API Flow
56
-
57
- 1. Record audio → WAV file (FFmpeg)
58
- 2. Convert WAV → MP3
59
- 3. Transcribe MP3 → text (Groq Whisper)
60
- 4. Post-process text → fixed text (Groq AI)
61
- 5. Copy result to clipboard
package/src/index.ts DELETED
@@ -1,132 +0,0 @@
1
- #!/usr/bin/env node
2
- import { record, convertToMp3, RecordingResult } from "./recorder.js";
3
- import { transcribe } from "./transcribe.js";
4
- import { postprocess } from "./postprocess.js";
5
- import { copyToClipboard } from "./utils/clipboard.js";
6
- import chalk from "chalk";
7
- import fs from "fs";
8
- import path from "path";
9
- import os from "os";
10
-
11
- const verbose = process.argv.includes("--verbose") || process.argv.includes("-v");
12
-
13
- function status(message: string) {
14
- process.stdout.write(`\x1b[2K\r${chalk.blue(message)}`);
15
- }
16
-
17
- function clearStatus() {
18
- process.stdout.write("\x1b[2K\r");
19
- }
20
-
21
- function formatDuration(seconds: number): string {
22
- const mins = Math.floor(seconds / 60);
23
- const secs = seconds % 60;
24
- if (mins > 0) {
25
- return `${mins}m ${secs}s`;
26
- }
27
- return `${secs}s`;
28
- }
29
-
30
- async function main() {
31
- try {
32
- // 1. Record audio
33
- const recording = await record(verbose);
34
- const processStart = Date.now();
35
-
36
- // 2. Convert to MP3
37
- status("Converting to MP3...");
38
- const mp3Path = await convertToMp3(recording.path);
39
-
40
- try {
41
- // 3. Transcribe with Whisper
42
- status("Transcribing...");
43
- const rawText = await transcribe(mp3Path);
44
-
45
- if (verbose) {
46
- clearStatus();
47
- console.log(chalk.gray(`Raw: ${rawText}`));
48
- }
49
-
50
- // 4. Read WHSPR.md or WHISPER.md if exists
51
- const whsprMdPath = path.join(process.cwd(), "WHSPR.md");
52
- const whisperMdPath = path.join(process.cwd(), "WHISPER.md");
53
- let customPrompt: string | null = null;
54
- let vocabFile: string | null = null;
55
-
56
- if (fs.existsSync(whsprMdPath)) {
57
- customPrompt = fs.readFileSync(whsprMdPath, "utf-8");
58
- vocabFile = "WHSPR.md";
59
- } else if (fs.existsSync(whisperMdPath)) {
60
- customPrompt = fs.readFileSync(whisperMdPath, "utf-8");
61
- vocabFile = "WHISPER.md";
62
- }
63
-
64
- if (customPrompt && verbose) {
65
- console.log(chalk.gray(`Using custom vocabulary from ${vocabFile}`));
66
- }
67
-
68
- // 5. Post-process
69
- status("Post-processing...");
70
- const fixedText = await postprocess(rawText, customPrompt);
71
-
72
- // 6. Output and copy
73
- clearStatus();
74
- const processTime = ((Date.now() - processStart) / 1000).toFixed(1);
75
- const wordCount = fixedText.trim().split(/\s+/).filter(w => w.length > 0).length;
76
- const charCount = fixedText.length;
77
-
78
- // Log stats
79
- console.log(
80
- chalk.dim("Audio: ") + chalk.white(formatDuration(recording.durationSeconds)) +
81
- chalk.dim(" • Processing: ") + chalk.white(processTime + "s")
82
- );
83
-
84
- // Draw box
85
- const termWidth = Math.min(process.stdout.columns || 60, 80);
86
- const lineWidth = termWidth - 2;
87
- const label = " TRANSCRIPT ";
88
- console.log(chalk.dim("┌─") + chalk.cyan(label) + chalk.dim("─".repeat(lineWidth - label.length - 1) + "┐"));
89
- const lines = fixedText.split("\n");
90
- for (const line of lines) {
91
- // Wrap long lines
92
- let remaining = line;
93
- while (remaining.length > 0) {
94
- const chunk = remaining.slice(0, lineWidth - 2);
95
- remaining = remaining.slice(lineWidth - 2);
96
- console.log(chalk.dim("│ ") + chalk.white(chunk.padEnd(lineWidth - 2)) + chalk.dim(" │"));
97
- }
98
- if (line.length === 0) {
99
- console.log(chalk.dim("│ " + " ".repeat(lineWidth - 2) + " │"));
100
- }
101
- }
102
- const stats = ` ${wordCount} words • ${charCount} chars `;
103
- const bottomLine = "─".repeat(lineWidth - stats.length - 1) + " ";
104
- console.log(chalk.dim("└" + bottomLine) + chalk.dim(stats) + chalk.dim("┘"));
105
- await copyToClipboard(fixedText);
106
- console.log(chalk.green("✓") + chalk.gray(" Copied to clipboard"));
107
-
108
- // 7. Clean up
109
- fs.unlinkSync(mp3Path);
110
- } catch (error) {
111
- clearStatus();
112
- // Save recording on failure
113
- const backupDir = path.join(os.homedir(), ".whspr", "recordings");
114
- fs.mkdirSync(backupDir, { recursive: true });
115
- const backupPath = path.join(backupDir, `recording-${Date.now()}.mp3`);
116
- fs.renameSync(mp3Path, backupPath);
117
- console.error(chalk.red(`Error: ${error}`));
118
- console.log(chalk.yellow(`Recording saved to: ${backupPath}`));
119
- process.exit(1);
120
- }
121
- } catch (error) {
122
- clearStatus();
123
- // Silent exit on user cancel
124
- if (error instanceof Error && error.message === "cancelled") {
125
- process.exit(0);
126
- }
127
- console.error(chalk.red(`Recording error: ${error}`));
128
- process.exit(1);
129
- }
130
- }
131
-
132
- main();
@@ -1,37 +0,0 @@
1
- import { generateObject } from "ai";
2
- import { z } from "zod";
3
- import { withRetry } from "./utils/retry.js";
4
- import { groq } from "./utils/groq.js";
5
-
6
- const MODEL = "openai/gpt-oss-120b";
7
-
8
- const outputSchema = z.object({
9
- fixed_transcription: z.string(),
10
- });
11
-
12
- export async function postprocess(
13
- rawTranscription: string,
14
- customPrompt: string | null
15
- ): Promise<string> {
16
- const result = await withRetry(async () => {
17
- const response = await generateObject({
18
- model: groq(MODEL),
19
- schema: outputSchema,
20
- messages: [
21
- {
22
- role: "system",
23
- content: "Your task is to clean up/fix transcribed text generated from mic input by the user according to the user's own prompt, this prompt may contain custom vocabulary, instructions, etc. Please return the user's transcription with the fixes made (e.g. the AI might hear \"PostgreSQL\" as \"post crest QL\" you need to use your own reasoning to fix these mistakes in the transcription)"
24
- },
25
- {
26
- role: "user",
27
- content: customPrompt
28
- ? `Here's my custom user prompt:\n\`\`\`\n${customPrompt}\n\`\`\`\n\nHere's my raw transcription output that I need you to edit:\n\`\`\`\n${rawTranscription}\n\`\`\``
29
- : `Here's my raw transcription output that I need you to edit:\n\`\`\`\n${rawTranscription}\n\`\`\``
30
- }
31
- ],
32
- });
33
- return response.object;
34
- }, 3, "postprocess");
35
-
36
- return result.fixed_transcription;
37
- }
package/src/recorder.ts DELETED
@@ -1,248 +0,0 @@
1
- import { spawn, ChildProcess } from "child_process";
2
- import fs from "fs";
3
- import path from "path";
4
- import os from "os";
5
- import chalk from "chalk";
6
-
7
- const MAX_DURATION_SECONDS = 900; // 15 minutes
8
- const DEFAULT_WAVE_WIDTH = 60;
9
- const STATUS_TEXT_WIDTH = 45; // " Recording [00:00 / 15:00] Press Enter to stop"
10
-
11
- // Horizontal bar characters for waveform (quiet to loud)
12
- const WAVE_CHARS = ["·", "-", "=", "≡", "■", "█"];
13
-
14
- function formatTime(seconds: number): string {
15
- const mins = Math.floor(seconds / 60);
16
- const secs = seconds % 60;
17
- return `${mins.toString().padStart(2, "0")}:${secs.toString().padStart(2, "0")}`;
18
- }
19
-
20
- function dbToChar(db: number): string {
21
- // Adjusted range: -45 (quiet) to -18 (normal speech peaks)
22
- const clamped = Math.max(-45, Math.min(-18, db));
23
- const normalized = (clamped + 45) / 27;
24
- const index = Math.min(
25
- WAVE_CHARS.length - 1,
26
- Math.floor(normalized * WAVE_CHARS.length),
27
- );
28
- return WAVE_CHARS[index];
29
- }
30
-
31
- function getWaveWidth(): number {
32
- const termWidth = process.stdout.columns || 80;
33
- // If terminal is wide enough for single line, use default
34
- if (termWidth >= DEFAULT_WAVE_WIDTH + STATUS_TEXT_WIDTH) {
35
- return DEFAULT_WAVE_WIDTH;
36
- }
37
- // Otherwise, use full terminal width for wave (will wrap text to next line)
38
- return Math.max(10, termWidth - 2);
39
- }
40
-
41
- export interface RecordingResult {
42
- path: string;
43
- durationSeconds: number;
44
- }
45
-
46
- export async function record(verbose = false): Promise<RecordingResult> {
47
- const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "whspr-"));
48
- const wavPath = path.join(tmpDir, "recording.wav");
49
-
50
- return new Promise((resolve, reject) => {
51
- // Initialize waveform buffer
52
- let waveWidth = getWaveWidth();
53
- const waveBuffer: string[] = new Array(waveWidth).fill(" ");
54
- let currentDb = -60;
55
- let cancelled = false;
56
-
57
- // Spawn FFmpeg with ebur128 filter to get volume levels
58
- const ffmpeg: ChildProcess = spawn(
59
- "ffmpeg",
60
- [
61
- "-f",
62
- "avfoundation",
63
- "-i",
64
- ":0",
65
- "-af",
66
- "ebur128=peak=true",
67
- "-t",
68
- MAX_DURATION_SECONDS.toString(),
69
- "-y",
70
- wavPath,
71
- ],
72
- {
73
- stdio: ["pipe", "pipe", "pipe"],
74
- },
75
- );
76
-
77
- let elapsedSeconds = 0;
78
- let stopped = false;
79
-
80
- function renderTUI() {
81
- const elapsed = formatTime(elapsedSeconds);
82
- const max = formatTime(MAX_DURATION_SECONDS);
83
- const wave = waveBuffer.join("");
84
- const termWidth = process.stdout.columns || 80;
85
- const singleLineWidth = waveWidth + STATUS_TEXT_WIDTH;
86
-
87
- if (termWidth >= singleLineWidth) {
88
- // Single line layout
89
- process.stdout.write(
90
- `\x1b[2K\r${chalk.cyan(wave)} ${chalk.blue("Recording")} [${chalk.yellow(elapsed)} / ${max}] ${chalk.gray("Press Enter to stop")}`,
91
- );
92
- } else {
93
- // Two line layout: wave on first line, status on second
94
- process.stdout.write(
95
- `\x1b[2K\r${chalk.cyan(wave)}\n\x1b[2K${chalk.blue("Recording")} [${chalk.yellow(elapsed)} / ${max}] ${chalk.gray("Press Enter to stop")}\x1b[A\r`,
96
- );
97
- }
98
- }
99
-
100
- // Update timer every second
101
- const timer = setInterval(() => {
102
- if (stopped) return;
103
- elapsedSeconds++;
104
- renderTUI();
105
-
106
- if (elapsedSeconds >= MAX_DURATION_SECONDS) {
107
- clearInterval(timer);
108
- }
109
- }, 1000);
110
-
111
- // Update waveform more frequently
112
- const waveTimer = setInterval(() => {
113
- if (stopped) return;
114
- // Push new character based on current dB level
115
- waveBuffer.shift();
116
- waveBuffer.push(dbToChar(currentDb));
117
- renderTUI();
118
- }, 50);
119
-
120
- // Initial display
121
- renderTUI();
122
-
123
- // Parse stderr for volume levels from ebur128
124
- ffmpeg.stderr?.on("data", (data: Buffer) => {
125
- const output = data.toString();
126
-
127
- // Look for FTPK (frame true peak) from ebur128 output
128
- // Format: "FTPK: -XX.X -XX.X dBFS"
129
- const ftpkMatch = output.match(/FTPK:\s*(-?[\d.]+)\s+(-?[\d.]+)\s+dBFS/);
130
- if (ftpkMatch) {
131
- // Average the left and right channels
132
- const left = parseFloat(ftpkMatch[1]);
133
- const right = parseFloat(ftpkMatch[2]);
134
- if (!isNaN(left) && !isNaN(right)) {
135
- currentDb = (left + right) / 2;
136
- }
137
- }
138
- });
139
-
140
- // Listen for Enter to stop, Ctrl+C to cancel
141
- const onKeypress = (data: Buffer) => {
142
- const key = data.toString();
143
- const isEnter = key.includes("\n") || key.includes("\r");
144
- const isCtrlC = key.includes("\x03");
145
-
146
- if (isEnter || isCtrlC) {
147
- stopped = true;
148
- cancelled = isCtrlC;
149
- clearInterval(timer);
150
- clearInterval(waveTimer);
151
- process.stdin.removeListener("data", onKeypress);
152
- process.stdin.setRawMode(false);
153
- process.stdin.pause();
154
-
155
- // Send SIGINT to FFmpeg to stop gracefully
156
- ffmpeg.kill("SIGINT");
157
- }
158
- };
159
-
160
- if (process.stdin.isTTY) {
161
- process.stdin.setRawMode(true);
162
- process.stdin.resume();
163
- process.stdin.on("data", onKeypress);
164
- }
165
-
166
- ffmpeg.on("close", (code) => {
167
- clearInterval(timer);
168
- clearInterval(waveTimer);
169
- const termWidth = process.stdout.columns || 80;
170
- const singleLineWidth = waveWidth + STATUS_TEXT_WIDTH;
171
- if (termWidth >= singleLineWidth) {
172
- process.stdout.write("\x1b[2K\r"); // Clear the line
173
- } else {
174
- process.stdout.write("\x1b[2K\n\x1b[2K\x1b[A\r"); // Clear both lines
175
- }
176
-
177
- if (cancelled) {
178
- // User pressed Ctrl+C - clean up and reject
179
- if (fs.existsSync(wavPath)) {
180
- fs.unlinkSync(wavPath);
181
- }
182
- reject(new Error("cancelled"));
183
- } else if (stopped || code === 0 || code === 255) {
184
- // FFmpeg returns 255 when interrupted with SIGINT
185
- if (fs.existsSync(wavPath)) {
186
- if (verbose) {
187
- console.log(
188
- chalk.green(`Recording complete (${formatTime(elapsedSeconds)})`),
189
- );
190
- }
191
- resolve({ path: wavPath, durationSeconds: elapsedSeconds });
192
- } else {
193
- reject(new Error("Recording failed: no output file created"));
194
- }
195
- } else {
196
- reject(new Error(`FFmpeg exited with code ${code}`));
197
- }
198
- });
199
-
200
- ffmpeg.on("error", (err) => {
201
- clearInterval(timer);
202
- clearInterval(waveTimer);
203
- stopped = true;
204
- if (process.stdin.isTTY) {
205
- process.stdin.setRawMode(false);
206
- process.stdin.pause();
207
- }
208
- reject(new Error(`Failed to start FFmpeg: ${err.message}`));
209
- });
210
- });
211
- }
212
-
213
- export async function convertToMp3(wavPath: string): Promise<string> {
214
- const mp3Path = wavPath.replace(/\.wav$/, ".mp3");
215
-
216
- return new Promise((resolve, reject) => {
217
- const ffmpeg = spawn(
218
- "ffmpeg",
219
- [
220
- "-i",
221
- wavPath,
222
- "-codec:a",
223
- "libmp3lame",
224
- "-qscale:a",
225
- "2",
226
- "-y",
227
- mp3Path,
228
- ],
229
- {
230
- stdio: ["pipe", "pipe", "pipe"],
231
- },
232
- );
233
-
234
- ffmpeg.on("close", (code) => {
235
- if (code === 0) {
236
- // Delete the WAV file after successful conversion
237
- fs.unlinkSync(wavPath);
238
- resolve(mp3Path);
239
- } else {
240
- reject(new Error(`MP3 conversion failed with code ${code}`));
241
- }
242
- });
243
-
244
- ffmpeg.on("error", (err) => {
245
- reject(new Error(`Failed to convert to MP3: ${err.message}`));
246
- });
247
- });
248
- }
package/src/transcribe.ts DELETED
@@ -1,14 +0,0 @@
1
- import Groq from "groq-sdk";
2
- import fs from "fs";
3
-
4
- const groq = new Groq(); // Uses GROQ_API_KEY env var
5
-
6
- export async function transcribe(audioPath: string): Promise<string> {
7
- const transcription = await groq.audio.transcriptions.create({
8
- file: fs.createReadStream(audioPath),
9
- model: "whisper-large-v3-turbo",
10
- temperature: 0,
11
- language: "en",
12
- });
13
- return transcription.text;
14
- }
package/src/types.ts DELETED
@@ -1,9 +0,0 @@
1
- export interface RecordingResult {
2
- wavPath: string;
3
- duration: number;
4
- }
5
-
6
- export interface TranscriptionResult {
7
- text: string;
8
- language?: string;
9
- }
@@ -1,5 +0,0 @@
1
- import clipboard from "clipboardy";
2
-
3
- export async function copyToClipboard(text: string): Promise<void> {
4
- await clipboard.write(text);
5
- }
@@ -1,19 +0,0 @@
1
- export async function withRetry<T>(
2
- fn: () => Promise<T>,
3
- maxAttempts = 3,
4
- label = "API call"
5
- ): Promise<T> {
6
- let lastError: Error | undefined;
7
- for (let attempt = 1; attempt <= maxAttempts; attempt++) {
8
- try {
9
- return await fn();
10
- } catch (error) {
11
- lastError = error instanceof Error ? error : new Error(String(error));
12
- console.warn(`${label} attempt ${attempt}/${maxAttempts} failed:`, lastError.message);
13
- if (attempt < maxAttempts) {
14
- await new Promise((resolve) => setTimeout(resolve, 1000 * attempt));
15
- }
16
- }
17
- }
18
- throw lastError;
19
- }
package/tsconfig.json DELETED
@@ -1,16 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "target": "ES2022",
4
- "module": "NodeNext",
5
- "moduleResolution": "NodeNext",
6
- "strict": true,
7
- "esModuleInterop": true,
8
- "skipLibCheck": true,
9
- "forceConsistentCasingInFileNames": true,
10
- "outDir": "./dist",
11
- "rootDir": "./src",
12
- "declaration": true
13
- },
14
- "include": ["src/**/*"],
15
- "exclude": ["node_modules", "dist"]
16
- }