whspr 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,2 +1,17 @@
1
1
  #!/usr/bin/env node
2
- export {};
2
+ export declare const DEFAULTS: {
3
+ transcriptionModel: "whisper-large-v3-turbo";
4
+ language: string;
5
+ systemPrompt: string;
6
+ customPromptPrefix: string;
7
+ transcriptionPrefix: string;
8
+ };
9
+ export interface WhsprSettings {
10
+ verbose?: boolean;
11
+ suffix?: string;
12
+ transcriptionModel?: "whisper-large-v3" | "whisper-large-v3-turbo";
13
+ language?: string;
14
+ systemPrompt?: string;
15
+ customPromptPrefix?: string;
16
+ transcriptionPrefix?: string;
17
+ }
package/dist/index.js CHANGED
@@ -7,7 +7,69 @@ import chalk from "chalk";
7
7
  import fs from "fs";
8
8
  import path from "path";
9
9
  import os from "os";
10
- const verbose = process.argv.includes("--verbose") || process.argv.includes("-v");
10
+ // Default prompts (can be overridden in settings.json)
11
+ export const DEFAULTS = {
12
+ transcriptionModel: "whisper-large-v3-turbo",
13
+ language: "en",
14
+ systemPrompt: 'Your task is to clean up/fix transcribed text generated from mic input by the user according to the user\'s own prompt, this prompt may contain custom vocabulary, instructions, etc. Please return the user\'s transcription with the fixes made (e.g. the AI might hear "PostgreSQL" as "post crest QL" you need to use your own reasoning to fix these mistakes in the transcription)',
15
+ customPromptPrefix: "Here's my custom user prompt:",
16
+ transcriptionPrefix: "Here's my raw transcription output that I need you to edit:",
17
+ };
18
+ const WHSPR_DIR = path.join(os.homedir(), ".whspr");
19
+ const SETTINGS_PATH = path.join(WHSPR_DIR, "settings.json");
20
+ function loadSettings() {
21
+ try {
22
+ if (fs.existsSync(SETTINGS_PATH)) {
23
+ const content = fs.readFileSync(SETTINGS_PATH, "utf-8");
24
+ return JSON.parse(content);
25
+ }
26
+ }
27
+ catch (error) {
28
+ // Silently ignore invalid settings file
29
+ }
30
+ return {};
31
+ }
32
+ function loadCustomPrompt(verbose) {
33
+ const sources = [];
34
+ let globalPrompt = null;
35
+ let localPrompt = null;
36
+ // Check for global WHSPR.md or WHISPER.md in ~/.whspr/
37
+ const globalWhsprPath = path.join(WHSPR_DIR, "WHSPR.md");
38
+ const globalWhisperPath = path.join(WHSPR_DIR, "WHISPER.md");
39
+ if (fs.existsSync(globalWhsprPath)) {
40
+ globalPrompt = fs.readFileSync(globalWhsprPath, "utf-8");
41
+ sources.push("~/.whspr/WHSPR.md");
42
+ }
43
+ else if (fs.existsSync(globalWhisperPath)) {
44
+ globalPrompt = fs.readFileSync(globalWhisperPath, "utf-8");
45
+ sources.push("~/.whspr/WHISPER.md");
46
+ }
47
+ // Check for local WHSPR.md or WHISPER.md in current directory
48
+ const localWhsprPath = path.join(process.cwd(), "WHSPR.md");
49
+ const localWhisperPath = path.join(process.cwd(), "WHISPER.md");
50
+ if (fs.existsSync(localWhsprPath)) {
51
+ localPrompt = fs.readFileSync(localWhsprPath, "utf-8");
52
+ sources.push("./WHSPR.md");
53
+ }
54
+ else if (fs.existsSync(localWhisperPath)) {
55
+ localPrompt = fs.readFileSync(localWhisperPath, "utf-8");
56
+ sources.push("./WHISPER.md");
57
+ }
58
+ // Combine prompts: global first, then local
59
+ let combinedPrompt = null;
60
+ if (globalPrompt && localPrompt) {
61
+ combinedPrompt = globalPrompt + "\n\n" + localPrompt;
62
+ }
63
+ else if (globalPrompt) {
64
+ combinedPrompt = globalPrompt;
65
+ }
66
+ else if (localPrompt) {
67
+ combinedPrompt = localPrompt;
68
+ }
69
+ return { prompt: combinedPrompt, sources };
70
+ }
71
+ const settings = loadSettings();
72
+ const verbose = settings.verbose || process.argv.includes("--verbose") || process.argv.includes("-v");
11
73
  function status(message) {
12
74
  process.stdout.write(`\x1b[2K\r${chalk.blue(message)}`);
13
75
  }
@@ -40,31 +102,28 @@ async function main() {
40
102
  try {
41
103
  // 3. Transcribe with Whisper
42
104
  status("Transcribing...");
43
- const rawText = await transcribe(mp3Path);
105
+ const rawText = await transcribe(mp3Path, settings.transcriptionModel ?? DEFAULTS.transcriptionModel, settings.language ?? DEFAULTS.language);
44
106
  if (verbose) {
45
107
  clearStatus();
46
108
  console.log(chalk.gray(`Raw: ${rawText}`));
47
109
  }
48
- // 4. Read WHSPR.md or WHISPER.md if exists
49
- const whsprMdPath = path.join(process.cwd(), "WHSPR.md");
50
- const whisperMdPath = path.join(process.cwd(), "WHISPER.md");
51
- let customPrompt = null;
52
- let vocabFile = null;
53
- if (fs.existsSync(whsprMdPath)) {
54
- customPrompt = fs.readFileSync(whsprMdPath, "utf-8");
55
- vocabFile = "WHSPR.md";
56
- }
57
- else if (fs.existsSync(whisperMdPath)) {
58
- customPrompt = fs.readFileSync(whisperMdPath, "utf-8");
59
- vocabFile = "WHISPER.md";
60
- }
110
+ // 4. Read WHSPR.md or WHISPER.md (global from ~/.whspr/ and/or local)
111
+ const { prompt: customPrompt, sources: vocabSources } = loadCustomPrompt(verbose);
61
112
  if (customPrompt && verbose) {
62
- console.log(chalk.gray(`Using custom vocabulary from ${vocabFile}`));
113
+ console.log(chalk.gray(`Using custom vocabulary from: ${vocabSources.join(" + ")}`));
63
114
  }
64
115
  // 5. Post-process
65
116
  status("Post-processing...");
66
- const fixedText = await postprocess(rawText, customPrompt);
67
- // 6. Output and copy
117
+ let fixedText = await postprocess(rawText, customPrompt, {
118
+ systemPrompt: settings.systemPrompt ?? DEFAULTS.systemPrompt,
119
+ customPromptPrefix: settings.customPromptPrefix ?? DEFAULTS.customPromptPrefix,
120
+ transcriptionPrefix: settings.transcriptionPrefix ?? DEFAULTS.transcriptionPrefix,
121
+ });
122
+ // 6. Apply suffix if configured
123
+ if (settings.suffix) {
124
+ fixedText = fixedText + settings.suffix;
125
+ }
126
+ // 7. Output and copy
68
127
  clearStatus();
69
128
  const processTime = ((Date.now() - processStart) / 1000).toFixed(1);
70
129
  const wordCount = fixedText.trim().split(/\s+/).filter(w => w.length > 0).length;
@@ -95,7 +154,7 @@ async function main() {
95
154
  console.log(chalk.dim("└" + bottomLine) + chalk.dim(stats) + chalk.dim("┘"));
96
155
  await copyToClipboard(fixedText);
97
156
  console.log(chalk.green("✓") + chalk.gray(" Copied to clipboard"));
98
- // 7. Clean up
157
+ // 8. Clean up
99
158
  fs.unlinkSync(mp3Path);
100
159
  }
101
160
  catch (error) {
@@ -1 +1,6 @@
1
- export declare function postprocess(rawTranscription: string, customPrompt: string | null): Promise<string>;
1
+ export interface PostprocessOptions {
2
+ systemPrompt: string;
3
+ customPromptPrefix: string;
4
+ transcriptionPrefix: string;
5
+ }
6
+ export declare function postprocess(rawTranscription: string, customPrompt: string | null, options: PostprocessOptions): Promise<string>;
@@ -6,7 +6,8 @@ const MODEL = "openai/gpt-oss-120b";
6
6
  const outputSchema = z.object({
7
7
  fixed_transcription: z.string(),
8
8
  });
9
- export async function postprocess(rawTranscription, customPrompt) {
9
+ export async function postprocess(rawTranscription, customPrompt, options) {
10
+ const { systemPrompt, customPromptPrefix, transcriptionPrefix } = options;
10
11
  const result = await withRetry(async () => {
11
12
  const response = await generateObject({
12
13
  model: groq(MODEL),
@@ -14,14 +15,20 @@ export async function postprocess(rawTranscription, customPrompt) {
14
15
  messages: [
15
16
  {
16
17
  role: "system",
17
- content: "Your task is to clean up/fix transcribed text generated from mic input by the user according to the user's own prompt, this prompt may contain custom vocabulary, instructions, etc. Please return the user's transcription with the fixes made (e.g. the AI might hear \"PostgreSQL\" as \"post crest QL\" you need to use your own reasoning to fix these mistakes in the transcription)"
18
+ content: systemPrompt,
18
19
  },
19
20
  {
20
21
  role: "user",
21
- content: customPrompt
22
- ? `Here's my custom user prompt:\n\`\`\`\n${customPrompt}\n\`\`\`\n\nHere's my raw transcription output that I need you to edit:\n\`\`\`\n${rawTranscription}\n\`\`\``
23
- : `Here's my raw transcription output that I need you to edit:\n\`\`\`\n${rawTranscription}\n\`\`\``
24
- }
22
+ content: [
23
+ customPrompt
24
+ ? `${customPromptPrefix}\n\`\`\`\n${customPrompt}\n\`\`\`\n\n`
25
+ : null,
26
+ `${transcriptionPrefix}\n\`\`\`\n${rawTranscription}\n\`\`\``,
27
+ ]
28
+ .filter(Boolean)
29
+ .join("")
30
+ .trim(),
31
+ },
25
32
  ],
26
33
  });
27
34
  return response.object;
package/dist/recorder.js CHANGED
@@ -5,7 +5,7 @@ import os from "os";
5
5
  import chalk from "chalk";
6
6
  const MAX_DURATION_SECONDS = 900; // 15 minutes
7
7
  const DEFAULT_WAVE_WIDTH = 60;
8
- const STATUS_TEXT_WIDTH = 45; // " Recording [00:00 / 15:00] Press Enter to stop"
8
+ const BRACKET_WIDTH = 2; // For "[" and "]" wrapping the waveform
9
9
  // Horizontal bar characters for waveform (quiet to loud)
10
10
  const WAVE_CHARS = ["·", "-", "=", "≡", "■", "█"];
11
11
  function formatTime(seconds) {
@@ -22,12 +22,9 @@ function dbToChar(db) {
22
22
  }
23
23
  function getWaveWidth() {
24
24
  const termWidth = process.stdout.columns || 80;
25
- // If terminal is wide enough for single line, use default
26
- if (termWidth >= DEFAULT_WAVE_WIDTH + STATUS_TEXT_WIDTH) {
27
- return DEFAULT_WAVE_WIDTH;
28
- }
29
- // Otherwise, use full terminal width for wave (will wrap text to next line)
30
- return Math.max(10, termWidth - 2);
25
+ // Use full terminal width minus brackets and small margin
26
+ const availableWidth = termWidth - BRACKET_WIDTH - 2;
27
+ return Math.max(10, Math.min(DEFAULT_WAVE_WIDTH, availableWidth));
31
28
  }
32
29
  export async function record(verbose = false) {
33
30
  const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "whspr-"));
@@ -59,16 +56,8 @@ export async function record(verbose = false) {
59
56
  const elapsed = formatTime(elapsedSeconds);
60
57
  const max = formatTime(MAX_DURATION_SECONDS);
61
58
  const wave = waveBuffer.join("");
62
- const termWidth = process.stdout.columns || 80;
63
- const singleLineWidth = waveWidth + STATUS_TEXT_WIDTH;
64
- if (termWidth >= singleLineWidth) {
65
- // Single line layout
66
- process.stdout.write(`\x1b[2K\r${chalk.cyan(wave)} ${chalk.blue("Recording")} [${chalk.yellow(elapsed)} / ${max}] ${chalk.gray("Press Enter to stop")}`);
67
- }
68
- else {
69
- // Two line layout: wave on first line, status on second
70
- process.stdout.write(`\x1b[2K\r${chalk.cyan(wave)}\n\x1b[2K${chalk.blue("Recording")} [${chalk.yellow(elapsed)} / ${max}] ${chalk.gray("Press Enter to stop")}\x1b[A\r`);
71
- }
59
+ // Always render waveform on its own line, wrapped in brackets
60
+ process.stdout.write(`\x1b[2K\r${chalk.cyan(`[${wave}]`)}\n\x1b[2K${chalk.blue("Recording")} [${chalk.yellow(elapsed)} / ${max}] ${chalk.gray("Press Enter to stop")}\x1b[A\r`);
72
61
  }
73
62
  // Update timer every second
74
63
  const timer = setInterval(() => {
@@ -131,14 +120,8 @@ export async function record(verbose = false) {
131
120
  ffmpeg.on("close", (code) => {
132
121
  clearInterval(timer);
133
122
  clearInterval(waveTimer);
134
- const termWidth = process.stdout.columns || 80;
135
- const singleLineWidth = waveWidth + STATUS_TEXT_WIDTH;
136
- if (termWidth >= singleLineWidth) {
137
- process.stdout.write("\x1b[2K\r"); // Clear the line
138
- }
139
- else {
140
- process.stdout.write("\x1b[2K\n\x1b[2K\x1b[A\r"); // Clear both lines
141
- }
123
+ // Clear both lines (waveform and status)
124
+ process.stdout.write("\x1b[2K\n\x1b[2K\x1b[A\r");
142
125
  if (cancelled) {
143
126
  // User pressed Ctrl+C - clean up and reject
144
127
  if (fs.existsSync(wavPath)) {
@@ -1 +1,2 @@
1
- export declare function transcribe(audioPath: string): Promise<string>;
1
+ export type TranscriptionModel = "whisper-large-v3" | "whisper-large-v3-turbo";
2
+ export declare function transcribe(audioPath: string, model?: TranscriptionModel, language?: string): Promise<string>;
@@ -1,12 +1,12 @@
1
1
  import Groq from "groq-sdk";
2
2
  import fs from "fs";
3
3
  const groq = new Groq(); // Uses GROQ_API_KEY env var
4
- export async function transcribe(audioPath) {
4
+ export async function transcribe(audioPath, model = "whisper-large-v3-turbo", language = "en") {
5
5
  const transcription = await groq.audio.transcriptions.create({
6
6
  file: fs.createReadStream(audioPath),
7
- model: "whisper-large-v3-turbo",
7
+ model,
8
8
  temperature: 0,
9
- language: "en",
9
+ language,
10
10
  });
11
11
  return transcription.text;
12
12
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "whspr",
3
- "version": "1.0.2",
3
+ "version": "1.0.4",
4
4
  "description": "CLI tool for audio transcription with Groq Whisper API",
5
5
  "type": "module",
6
6
  "bin": {