npm - whspr - Versions diffs - 1.0.0 - Mend

whspr 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/.claude/settings.json ADDED Viewed

@@ -0,0 +1,32 @@
+{
+  "permissions": {
+    "allow": [
+      "Read",
+      "Edit",
+      "Write",
+      "Glob",
+      "Grep",
+      "Bash(npm:*)",
+      "Bash(npx:*)",
+      "Bash(git:*)",
+      "Bash(whspr:*)"
+    ],
+    "deny": [
+      "Bash(rm -rf:*)",
+      "Bash(sudo:*)"
+    ]
+  },
+  "hooks": {
+    "PostToolUse": [
+      {
+        "matcher": "Edit|Write",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "npx prettier --write $CLAUDE_FILE_PATHS"
+          }
+        ]
+      }
+    ]
+  }
+}

package/CLAUDE.md ADDED Viewed

@@ -0,0 +1,61 @@
+# whspr
+A CLI tool that records audio from your microphone, transcribes it using Groq's Whisper API, and post-processes with AI to fix errors.
+## Stack
+- Language: TypeScript (ES2022, NodeNext modules)
+- Runtime: Node.js 18+
+- Package manager: npm
+- External: FFmpeg (required for audio recording)
+## Structure
+- `src/` - Main source code
+  - `index.ts` - CLI entry point and main flow
+  - `recorder.ts` - FFmpeg audio recording with waveform TUI
+  - `transcribe.ts` - Groq Whisper API integration
+  - `postprocess.ts` - AI post-processing for corrections
+  - `utils/` - Shared utilities (retry, clipboard, groq client)
+- `bin/whspr.js` - CLI entrypoint
+- `dist/` - Compiled output
+## Commands
+```bash
+# Install dependencies
+npm install
+# Build
+npm run build
+# Development (run without build)
+npm run dev
+# Link globally after build
+npm link
+# Run the CLI
+whspr
+whspr --verbose
+```
+## Environment
+Requires `GROQ_API_KEY` environment variable.
+## Key Conventions
+- Uses Groq SDK for both Whisper transcription and AI post-processing
+- Recording uses FFmpeg's avfoundation (macOS) with ebur128 for volume levels
+- Max recording duration: 15 minutes
+- Failed recordings are saved to `~/.whspr/recordings/` for recovery
+- Custom vocabulary via `WHSPR.md` in current directory
+## API Flow
+1. Record audio → WAV file (FFmpeg)
+2. Convert WAV → MP3
+3. Transcribe MP3 → text (Groq Whisper)
+4. Post-process text → fixed text (Groq AI)
+5. Copy result to clipboard

package/README.md ADDED Viewed

@@ -0,0 +1,69 @@
+# whspr
+A CLI tool that records audio from your microphone, transcribes it using Groq's Whisper API, and post-processes the transcription with AI to fix errors and apply custom vocabulary.
+## Features
+- Live audio waveform visualization in the terminal
+- 15-minute max recording time
+- Transcription via Groq Whisper API
+- AI-powered post-processing to fix transcription errors
+- Custom vocabulary support via `WHSPR.md`
+- Automatic clipboard copy
+## Requirements
+- Node.js 18+
+- FFmpeg (`brew install ffmpeg` on macOS)
+- Groq API key
+## Installation
+```bash
+npm install
+npm run build
+npm link
+```
+## Usage
+```bash
+# Set your API key
+export GROQ_API_KEY="your-api-key"
+# Run the tool
+whspr
+# With verbose output
+whspr --verbose
+```
+Press **Enter** to stop recording.
+## Custom Vocabulary
+Create a `WHSPR.md` file in your current directory to provide custom vocabulary, names, or instructions for the AI post-processor:
+```markdown
+# Custom Vocabulary
+- PostgreSQL (not "post crest QL")
+- Kubernetes (not "cooper netties")
+- My colleague's name is "Priya" not "Maria"
+```
+## How It Works
+1. Records audio from your default microphone using FFmpeg
+2. Displays a live waveform visualization based on audio levels
+3. Converts the recording to MP3
+4. Sends audio to Groq's Whisper API for transcription
+5. Reads `WHSPR.md` from current directory (if exists)
+6. Sends transcription + custom vocabulary to AI for post-processing
+7. Prints result and copies to clipboard
+If transcription fails, the recording is saved to `~/.whspr/recordings/` for manual recovery.
+## License
+MIT

package/bin/whspr.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ #!/usr/bin/env node
2	+ import "../dist/index.js";

package/package.json ADDED Viewed

@@ -0,0 +1,34 @@
+{
+  "name": "whspr",
+  "version": "1.0.0",
+  "description": "CLI tool for audio transcription with Groq Whisper API",
+  "type": "module",
+  "bin": {
+    "whspr": "./bin/whspr.js"
+  },
+  "keywords": ["whisper", "transcription", "audio", "cli", "groq"],
+  "author": "Merkie",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/Merkie/whspr"
+  },
+  "scripts": {
+    "build": "tsc",
+    "dev": "tsx src/index.ts"
+  },
+  "dependencies": {
+    "@ai-sdk/groq": "^1.x",
+    "ai": "^4.x",
+    "chalk": "^5.x",
+    "clipboardy": "^4.x",
+    "groq-sdk": "^0.x",
+    "zod": "^3.x"
+  },
+  "devDependencies": {
+    "@types/node": "^22.x",
+    "prettier": "^3.8.0",
+    "tsx": "^4.x",
+    "typescript": "^5.x"
+  }
+}

package/src/index.ts ADDED Viewed

@@ -0,0 +1,132 @@
+#!/usr/bin/env node
+import { record, convertToMp3, RecordingResult } from "./recorder.js";
+import { transcribe } from "./transcribe.js";
+import { postprocess } from "./postprocess.js";
+import { copyToClipboard } from "./utils/clipboard.js";
+import chalk from "chalk";
+import fs from "fs";
+import path from "path";
+import os from "os";
+const verbose = process.argv.includes("--verbose") || process.argv.includes("-v");
+function status(message: string) {
+  process.stdout.write(`\x1b[2K\r${chalk.blue(message)}`);
+}
+function clearStatus() {
+  process.stdout.write("\x1b[2K\r");
+}
+function formatDuration(seconds: number): string {
+  const mins = Math.floor(seconds / 60);
+  const secs = seconds % 60;
+  if (mins > 0) {
+    return `${mins}m ${secs}s`;
+  }
+  return `${secs}s`;
+}
+async function main() {
+  try {
+    // 1. Record audio
+    const recording = await record(verbose);
+    const processStart = Date.now();
+    // 2. Convert to MP3
+    status("Converting to MP3...");
+    const mp3Path = await convertToMp3(recording.path);
+    try {
+      // 3. Transcribe with Whisper
+      status("Transcribing...");
+      const rawText = await transcribe(mp3Path);
+      if (verbose) {
+        clearStatus();
+        console.log(chalk.gray(`Raw: ${rawText}`));
+      }
+      // 4. Read WHSPR.md or WHISPER.md if exists
+      const whsprMdPath = path.join(process.cwd(), "WHSPR.md");
+      const whisperMdPath = path.join(process.cwd(), "WHISPER.md");
+      let customPrompt: string | null = null;
+      let vocabFile: string | null = null;
+      if (fs.existsSync(whsprMdPath)) {
+        customPrompt = fs.readFileSync(whsprMdPath, "utf-8");
+        vocabFile = "WHSPR.md";
+      } else if (fs.existsSync(whisperMdPath)) {
+        customPrompt = fs.readFileSync(whisperMdPath, "utf-8");
+        vocabFile = "WHISPER.md";
+      }
+      if (customPrompt && verbose) {
+        console.log(chalk.gray(`Using custom vocabulary from ${vocabFile}`));
+      }
+      // 5. Post-process
+      status("Post-processing...");
+      const fixedText = await postprocess(rawText, customPrompt);
+      // 6. Output and copy
+      clearStatus();
+      const processTime = ((Date.now() - processStart) / 1000).toFixed(1);
+      const wordCount = fixedText.trim().split(/\s+/).filter(w => w.length > 0).length;
+      const charCount = fixedText.length;
+      // Log stats
+      console.log(
+        chalk.dim("Audio: ") + chalk.white(formatDuration(recording.durationSeconds)) +
+        chalk.dim(" • Processing: ") + chalk.white(processTime + "s")
+      );
+      // Draw box
+      const termWidth = Math.min(process.stdout.columns || 60, 80);
+      const lineWidth = termWidth - 2;
+      const label = " TRANSCRIPT ";
+      console.log(chalk.dim("┌─") + chalk.cyan(label) + chalk.dim("─".repeat(lineWidth - label.length - 1) + "┐"));
+      const lines = fixedText.split("\n");
+      for (const line of lines) {
+        // Wrap long lines
+        let remaining = line;
+        while (remaining.length > 0) {
+          const chunk = remaining.slice(0, lineWidth - 2);
+          remaining = remaining.slice(lineWidth - 2);
+          console.log(chalk.dim("│ ") + chalk.white(chunk.padEnd(lineWidth - 2)) + chalk.dim(" │"));
+        }
+        if (line.length === 0) {
+          console.log(chalk.dim("│ " + " ".repeat(lineWidth - 2) + " │"));
+        }
+      }
+      const stats = ` ${wordCount} words • ${charCount} chars `;
+      const bottomLine = "─".repeat(lineWidth - stats.length - 1) + " ";
+      console.log(chalk.dim("└" + bottomLine) + chalk.dim(stats) + chalk.dim("┘"));
+      await copyToClipboard(fixedText);
+      console.log(chalk.green("✓") + chalk.gray(" Copied to clipboard"));
+      // 7. Clean up
+      fs.unlinkSync(mp3Path);
+    } catch (error) {
+      clearStatus();
+      // Save recording on failure
+      const backupDir = path.join(os.homedir(), ".whspr", "recordings");
+      fs.mkdirSync(backupDir, { recursive: true });
+      const backupPath = path.join(backupDir, `recording-${Date.now()}.mp3`);
+      fs.renameSync(mp3Path, backupPath);
+      console.error(chalk.red(`Error: ${error}`));
+      console.log(chalk.yellow(`Recording saved to: ${backupPath}`));
+      process.exit(1);
+    }
+  } catch (error) {
+    clearStatus();
+    // Silent exit on user cancel
+    if (error instanceof Error && error.message === "cancelled") {
+      process.exit(0);
+    }
+    console.error(chalk.red(`Recording error: ${error}`));
+    process.exit(1);
+  }
+}
+main();

package/src/postprocess.ts ADDED Viewed

@@ -0,0 +1,37 @@
+import { generateObject } from "ai";
+import { z } from "zod";
+import { withRetry } from "./utils/retry.js";
+import { groq } from "./utils/groq.js";
+const MODEL = "openai/gpt-oss-120b";
+const outputSchema = z.object({
+  fixed_transcription: z.string(),
+});
+export async function postprocess(
+  rawTranscription: string,
+  customPrompt: string | null
+): Promise<string> {
+  const result = await withRetry(async () => {
+    const response = await generateObject({
+      model: groq(MODEL),
+      schema: outputSchema,
+      messages: [
+        {
+          role: "system",
+          content: "Your task is to clean up/fix transcribed text generated from mic input by the user according to the user's own prompt, this prompt may contain custom vocabulary, instructions, etc. Please return the user's transcription with the fixes made (e.g. the AI might hear \"PostgreSQL\" as \"post crest QL\" you need to use your own reasoning to fix these mistakes in the transcription)"
+        },
+        {
+          role: "user",
+          content: customPrompt
+            ? `Here's my custom user prompt:\n\`\`\`\n${customPrompt}\n\`\`\`\n\nHere's my raw transcription output that I need you to edit:\n\`\`\`\n${rawTranscription}\n\`\`\``
+            : `Here's my raw transcription output that I need you to edit:\n\`\`\`\n${rawTranscription}\n\`\`\``
+        }
+      ],
+    });
+    return response.object;
+  }, 3, "postprocess");
+  return result.fixed_transcription;
+}

package/src/recorder.ts ADDED Viewed

@@ -0,0 +1,248 @@
+import { spawn, ChildProcess } from "child_process";
+import fs from "fs";
+import path from "path";
+import os from "os";
+import chalk from "chalk";
+const MAX_DURATION_SECONDS = 900; // 15 minutes
+const DEFAULT_WAVE_WIDTH = 60;
+const STATUS_TEXT_WIDTH = 45; // " Recording [00:00 / 15:00] Press Enter to stop"
+// Horizontal bar characters for waveform (quiet to loud)
+const WAVE_CHARS = ["·", "-", "=", "≡", "■", "█"];
+function formatTime(seconds: number): string {
+  const mins = Math.floor(seconds / 60);
+  const secs = seconds % 60;
+  return `${mins.toString().padStart(2, "0")}:${secs.toString().padStart(2, "0")}`;
+}
+function dbToChar(db: number): string {
+  // Adjusted range: -45 (quiet) to -18 (normal speech peaks)
+  const clamped = Math.max(-45, Math.min(-18, db));
+  const normalized = (clamped + 45) / 27;
+  const index = Math.min(
+    WAVE_CHARS.length - 1,
+    Math.floor(normalized * WAVE_CHARS.length),
+  );
+  return WAVE_CHARS[index];
+}
+function getWaveWidth(): number {
+  const termWidth = process.stdout.columns || 80;
+  // If terminal is wide enough for single line, use default
+  if (termWidth >= DEFAULT_WAVE_WIDTH + STATUS_TEXT_WIDTH) {
+    return DEFAULT_WAVE_WIDTH;
+  }
+  // Otherwise, use full terminal width for wave (will wrap text to next line)
+  return Math.max(10, termWidth - 2);
+}
+export interface RecordingResult {
+  path: string;
+  durationSeconds: number;
+}
+export async function record(verbose = false): Promise<RecordingResult> {
+  const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "whspr-"));
+  const wavPath = path.join(tmpDir, "recording.wav");
+  return new Promise((resolve, reject) => {
+    // Initialize waveform buffer
+    let waveWidth = getWaveWidth();
+    const waveBuffer: string[] = new Array(waveWidth).fill(" ");
+    let currentDb = -60;
+    let cancelled = false;
+    // Spawn FFmpeg with ebur128 filter to get volume levels
+    const ffmpeg: ChildProcess = spawn(
+      "ffmpeg",
+      [
+        "-f",
+        "avfoundation",
+        "-i",
+        ":0",
+        "-af",
+        "ebur128=peak=true",
+        "-t",
+        MAX_DURATION_SECONDS.toString(),
+        "-y",
+        wavPath,
+      ],
+      {
+        stdio: ["pipe", "pipe", "pipe"],
+      },
+    );
+    let elapsedSeconds = 0;
+    let stopped = false;
+    function renderTUI() {
+      const elapsed = formatTime(elapsedSeconds);
+      const max = formatTime(MAX_DURATION_SECONDS);
+      const wave = waveBuffer.join("");
+      const termWidth = process.stdout.columns || 80;
+      const singleLineWidth = waveWidth + STATUS_TEXT_WIDTH;
+      if (termWidth >= singleLineWidth) {
+        // Single line layout
+        process.stdout.write(
+          `\x1b[2K\r${chalk.cyan(wave)} ${chalk.blue("Recording")} [${chalk.yellow(elapsed)} / ${max}] ${chalk.gray("Press Enter to stop")}`,
+        );
+      } else {
+        // Two line layout: wave on first line, status on second
+        process.stdout.write(
+          `\x1b[2K\r${chalk.cyan(wave)}\n\x1b[2K${chalk.blue("Recording")} [${chalk.yellow(elapsed)} / ${max}] ${chalk.gray("Press Enter to stop")}\x1b[A\r`,
+        );
+      }
+    }
+    // Update timer every second
+    const timer = setInterval(() => {
+      if (stopped) return;
+      elapsedSeconds++;
+      renderTUI();
+      if (elapsedSeconds >= MAX_DURATION_SECONDS) {
+        clearInterval(timer);
+      }
+    }, 1000);
+    // Update waveform more frequently
+    const waveTimer = setInterval(() => {
+      if (stopped) return;
+      // Push new character based on current dB level
+      waveBuffer.shift();
+      waveBuffer.push(dbToChar(currentDb));
+      renderTUI();
+    }, 50);
+    // Initial display
+    renderTUI();
+    // Parse stderr for volume levels from ebur128
+    ffmpeg.stderr?.on("data", (data: Buffer) => {
+      const output = data.toString();
+      // Look for FTPK (frame true peak) from ebur128 output
+      // Format: "FTPK: -XX.X -XX.X dBFS"
+      const ftpkMatch = output.match(/FTPK:\s*(-?[\d.]+)\s+(-?[\d.]+)\s+dBFS/);
+      if (ftpkMatch) {
+        // Average the left and right channels
+        const left = parseFloat(ftpkMatch[1]);
+        const right = parseFloat(ftpkMatch[2]);
+        if (!isNaN(left) && !isNaN(right)) {
+          currentDb = (left + right) / 2;
+        }
+      }
+    });
+    // Listen for Enter to stop, Ctrl+C to cancel
+    const onKeypress = (data: Buffer) => {
+      const key = data.toString();
+      const isEnter = key.includes("\n") || key.includes("\r");
+      const isCtrlC = key.includes("\x03");
+      if (isEnter || isCtrlC) {
+        stopped = true;
+        cancelled = isCtrlC;
+        clearInterval(timer);
+        clearInterval(waveTimer);
+        process.stdin.removeListener("data", onKeypress);
+        process.stdin.setRawMode(false);
+        process.stdin.pause();
+        // Send SIGINT to FFmpeg to stop gracefully
+        ffmpeg.kill("SIGINT");
+      }
+    };
+    if (process.stdin.isTTY) {
+      process.stdin.setRawMode(true);
+      process.stdin.resume();
+      process.stdin.on("data", onKeypress);
+    }
+    ffmpeg.on("close", (code) => {
+      clearInterval(timer);
+      clearInterval(waveTimer);
+      const termWidth = process.stdout.columns || 80;
+      const singleLineWidth = waveWidth + STATUS_TEXT_WIDTH;
+      if (termWidth >= singleLineWidth) {
+        process.stdout.write("\x1b[2K\r"); // Clear the line
+      } else {
+        process.stdout.write("\x1b[2K\n\x1b[2K\x1b[A\r"); // Clear both lines
+      }
+      if (cancelled) {
+        // User pressed Ctrl+C - clean up and reject
+        if (fs.existsSync(wavPath)) {
+          fs.unlinkSync(wavPath);
+        }
+        reject(new Error("cancelled"));
+      } else if (stopped || code === 0 || code === 255) {
+        // FFmpeg returns 255 when interrupted with SIGINT
+        if (fs.existsSync(wavPath)) {
+          if (verbose) {
+            console.log(
+              chalk.green(`Recording complete (${formatTime(elapsedSeconds)})`),
+            );
+          }
+          resolve({ path: wavPath, durationSeconds: elapsedSeconds });
+        } else {
+          reject(new Error("Recording failed: no output file created"));
+        }
+      } else {
+        reject(new Error(`FFmpeg exited with code ${code}`));
+      }
+    });
+    ffmpeg.on("error", (err) => {
+      clearInterval(timer);
+      clearInterval(waveTimer);
+      stopped = true;
+      if (process.stdin.isTTY) {
+        process.stdin.setRawMode(false);
+        process.stdin.pause();
+      }
+      reject(new Error(`Failed to start FFmpeg: ${err.message}`));
+    });
+  });
+}
+export async function convertToMp3(wavPath: string): Promise<string> {
+  const mp3Path = wavPath.replace(/\.wav$/, ".mp3");
+  return new Promise((resolve, reject) => {
+    const ffmpeg = spawn(
+      "ffmpeg",
+      [
+        "-i",
+        wavPath,
+        "-codec:a",
+        "libmp3lame",
+        "-qscale:a",
+        "2",
+        "-y",
+        mp3Path,
+      ],
+      {
+        stdio: ["pipe", "pipe", "pipe"],
+      },
+    );
+    ffmpeg.on("close", (code) => {
+      if (code === 0) {
+        // Delete the WAV file after successful conversion
+        fs.unlinkSync(wavPath);
+        resolve(mp3Path);
+      } else {
+        reject(new Error(`MP3 conversion failed with code ${code}`));
+      }
+    });
+    ffmpeg.on("error", (err) => {
+      reject(new Error(`Failed to convert to MP3: ${err.message}`));
+    });
+  });
+}

package/src/transcribe.ts ADDED Viewed

@@ -0,0 +1,14 @@
+import Groq from "groq-sdk";
+import fs from "fs";
+const groq = new Groq(); // Uses GROQ_API_KEY env var
+export async function transcribe(audioPath: string): Promise<string> {
+  const transcription = await groq.audio.transcriptions.create({
+    file: fs.createReadStream(audioPath),
+    model: "whisper-large-v3-turbo",
+    temperature: 0,
+    language: "en",
+  });
+  return transcription.text;
+}

package/src/types.ts ADDED Viewed

@@ -0,0 +1,9 @@
+export interface RecordingResult {
+  wavPath: string;
+  duration: number;
+}
+export interface TranscriptionResult {
+  text: string;
+  language?: string;
+}

package/src/utils/clipboard.ts ADDED Viewed

@@ -0,0 +1,5 @@
+import clipboard from "clipboardy";
+export async function copyToClipboard(text: string): Promise<void> {
+  await clipboard.write(text);
+}

package/src/utils/groq.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import { createGroq } from "@ai-sdk/groq";
+export const groq = createGroq();

package/src/utils/retry.ts ADDED Viewed

@@ -0,0 +1,19 @@
+export async function withRetry<T>(
+  fn: () => Promise<T>,
+  maxAttempts = 3,
+  label = "API call"
+): Promise<T> {
+  let lastError: Error | undefined;
+  for (let attempt = 1; attempt <= maxAttempts; attempt++) {
+    try {
+      return await fn();
+    } catch (error) {
+      lastError = error instanceof Error ? error : new Error(String(error));
+      console.warn(`${label} attempt ${attempt}/${maxAttempts} failed:`, lastError.message);
+      if (attempt < maxAttempts) {
+        await new Promise((resolve) => setTimeout(resolve, 1000 * attempt));
+      }
+    }
+  }
+  throw lastError;
+}

package/tsconfig.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "NodeNext",
+    "moduleResolution": "NodeNext",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "declaration": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist"]
+}