whspr 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -16
- package/dist/index.d.ts +16 -1
- package/dist/index.js +85 -19
- package/dist/postprocess.d.ts +6 -1
- package/dist/postprocess.js +13 -6
- package/dist/transcribe.d.ts +2 -1
- package/dist/transcribe.js +3 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,30 +1,35 @@
|
|
|
1
1
|
# whspr
|
|
2
2
|
|
|
3
|
+
[](https://www.npmjs.com/package/whspr)
|
|
4
|
+
[](https://choosealicense.com/licenses/mit/)
|
|
5
|
+
[](./CONTRIBUTING.md)
|
|
6
|
+
|
|
3
7
|
A CLI tool that records audio from your microphone, transcribes it using Groq's Whisper API, and post-processes the transcription with AI to fix errors and apply custom vocabulary.
|
|
4
8
|
|
|
5
|
-
|
|
9
|
+
<p align="center">
|
|
10
|
+
<img src="./demo.gif" alt="whspr demo" width="600">
|
|
11
|
+
</p>
|
|
6
12
|
|
|
7
|
-
|
|
8
|
-
- 15-minute max recording time
|
|
9
|
-
- Transcription via Groq Whisper API
|
|
10
|
-
- AI-powered post-processing to fix transcription errors
|
|
11
|
-
- Custom vocabulary support via `WHSPR.md`
|
|
12
|
-
- Automatic clipboard copy
|
|
13
|
+
## Installation
|
|
13
14
|
|
|
14
|
-
|
|
15
|
+
```bash
|
|
16
|
+
npm install -g whspr
|
|
17
|
+
```
|
|
15
18
|
|
|
16
|
-
|
|
17
|
-
- FFmpeg (`brew install ffmpeg` on macOS)
|
|
18
|
-
- Groq API key
|
|
19
|
+
### Optional: Alias as `whisper`
|
|
19
20
|
|
|
20
|
-
|
|
21
|
+
If you'd like to use `whisper` instead of `whspr`, add this to your shell config (`~/.zshrc` or `~/.bashrc`):
|
|
21
22
|
|
|
22
23
|
```bash
|
|
23
|
-
|
|
24
|
-
npm run build
|
|
25
|
-
npm link
|
|
24
|
+
alias whisper="whspr"
|
|
26
25
|
```
|
|
27
26
|
|
|
27
|
+
## Requirements
|
|
28
|
+
|
|
29
|
+
- Node.js 18+
|
|
30
|
+
- FFmpeg (`brew install ffmpeg` on macOS)
|
|
31
|
+
- Groq API key
|
|
32
|
+
|
|
28
33
|
## Usage
|
|
29
34
|
|
|
30
35
|
```bash
|
|
@@ -40,9 +45,18 @@ whspr --verbose
|
|
|
40
45
|
|
|
41
46
|
Press **Enter** to stop recording.
|
|
42
47
|
|
|
48
|
+
## Features
|
|
49
|
+
|
|
50
|
+
- Live audio waveform visualization in the terminal
|
|
51
|
+
- 15-minute max recording time
|
|
52
|
+
- Transcription via Groq Whisper API
|
|
53
|
+
- AI-powered post-processing to fix transcription errors
|
|
54
|
+
- Custom vocabulary support via `WHSPR.md`
|
|
55
|
+
- Automatic clipboard copy
|
|
56
|
+
|
|
43
57
|
## Custom Vocabulary
|
|
44
58
|
|
|
45
|
-
Create a `WHSPR.md` file in your current directory to provide custom vocabulary, names, or instructions for the AI post-processor:
|
|
59
|
+
Create a `WHSPR.md` (or `WHISPER.md`) file in your current directory to provide custom vocabulary, names, or instructions for the AI post-processor:
|
|
46
60
|
|
|
47
61
|
```markdown
|
|
48
62
|
# Custom Vocabulary
|
package/dist/index.d.ts
CHANGED
|
@@ -1,2 +1,17 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
export {
|
|
2
|
+
export declare const DEFAULTS: {
|
|
3
|
+
transcriptionModel: "whisper-large-v3-turbo";
|
|
4
|
+
language: string;
|
|
5
|
+
systemPrompt: string;
|
|
6
|
+
customPromptPrefix: string;
|
|
7
|
+
transcriptionPrefix: string;
|
|
8
|
+
};
|
|
9
|
+
export interface WhsprSettings {
|
|
10
|
+
verbose?: boolean;
|
|
11
|
+
suffix?: string;
|
|
12
|
+
transcriptionModel?: "whisper-large-v3" | "whisper-large-v3-turbo";
|
|
13
|
+
language?: string;
|
|
14
|
+
systemPrompt?: string;
|
|
15
|
+
customPromptPrefix?: string;
|
|
16
|
+
transcriptionPrefix?: string;
|
|
17
|
+
}
|
package/dist/index.js
CHANGED
|
@@ -7,7 +7,69 @@ import chalk from "chalk";
|
|
|
7
7
|
import fs from "fs";
|
|
8
8
|
import path from "path";
|
|
9
9
|
import os from "os";
|
|
10
|
-
|
|
10
|
+
// Default prompts (can be overridden in settings.json)
|
|
11
|
+
export const DEFAULTS = {
|
|
12
|
+
transcriptionModel: "whisper-large-v3-turbo",
|
|
13
|
+
language: "en",
|
|
14
|
+
systemPrompt: 'Your task is to clean up/fix transcribed text generated from mic input by the user according to the user\'s own prompt, this prompt may contain custom vocabulary, instructions, etc. Please return the user\'s transcription with the fixes made (e.g. the AI might hear "PostgreSQL" as "post crest QL" you need to use your own reasoning to fix these mistakes in the transcription)',
|
|
15
|
+
customPromptPrefix: "Here's my custom user prompt:",
|
|
16
|
+
transcriptionPrefix: "Here's my raw transcription output that I need you to edit:",
|
|
17
|
+
};
|
|
18
|
+
const WHSPR_DIR = path.join(os.homedir(), ".whspr");
|
|
19
|
+
const SETTINGS_PATH = path.join(WHSPR_DIR, "settings.json");
|
|
20
|
+
function loadSettings() {
|
|
21
|
+
try {
|
|
22
|
+
if (fs.existsSync(SETTINGS_PATH)) {
|
|
23
|
+
const content = fs.readFileSync(SETTINGS_PATH, "utf-8");
|
|
24
|
+
return JSON.parse(content);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
catch (error) {
|
|
28
|
+
// Silently ignore invalid settings file
|
|
29
|
+
}
|
|
30
|
+
return {};
|
|
31
|
+
}
|
|
32
|
+
function loadCustomPrompt(verbose) {
|
|
33
|
+
const sources = [];
|
|
34
|
+
let globalPrompt = null;
|
|
35
|
+
let localPrompt = null;
|
|
36
|
+
// Check for global WHSPR.md or WHISPER.md in ~/.whspr/
|
|
37
|
+
const globalWhsprPath = path.join(WHSPR_DIR, "WHSPR.md");
|
|
38
|
+
const globalWhisperPath = path.join(WHSPR_DIR, "WHISPER.md");
|
|
39
|
+
if (fs.existsSync(globalWhsprPath)) {
|
|
40
|
+
globalPrompt = fs.readFileSync(globalWhsprPath, "utf-8");
|
|
41
|
+
sources.push("~/.whspr/WHSPR.md");
|
|
42
|
+
}
|
|
43
|
+
else if (fs.existsSync(globalWhisperPath)) {
|
|
44
|
+
globalPrompt = fs.readFileSync(globalWhisperPath, "utf-8");
|
|
45
|
+
sources.push("~/.whspr/WHISPER.md");
|
|
46
|
+
}
|
|
47
|
+
// Check for local WHSPR.md or WHISPER.md in current directory
|
|
48
|
+
const localWhsprPath = path.join(process.cwd(), "WHSPR.md");
|
|
49
|
+
const localWhisperPath = path.join(process.cwd(), "WHISPER.md");
|
|
50
|
+
if (fs.existsSync(localWhsprPath)) {
|
|
51
|
+
localPrompt = fs.readFileSync(localWhsprPath, "utf-8");
|
|
52
|
+
sources.push("./WHSPR.md");
|
|
53
|
+
}
|
|
54
|
+
else if (fs.existsSync(localWhisperPath)) {
|
|
55
|
+
localPrompt = fs.readFileSync(localWhisperPath, "utf-8");
|
|
56
|
+
sources.push("./WHISPER.md");
|
|
57
|
+
}
|
|
58
|
+
// Combine prompts: global first, then local
|
|
59
|
+
let combinedPrompt = null;
|
|
60
|
+
if (globalPrompt && localPrompt) {
|
|
61
|
+
combinedPrompt = globalPrompt + "\n\n" + localPrompt;
|
|
62
|
+
}
|
|
63
|
+
else if (globalPrompt) {
|
|
64
|
+
combinedPrompt = globalPrompt;
|
|
65
|
+
}
|
|
66
|
+
else if (localPrompt) {
|
|
67
|
+
combinedPrompt = localPrompt;
|
|
68
|
+
}
|
|
69
|
+
return { prompt: combinedPrompt, sources };
|
|
70
|
+
}
|
|
71
|
+
const settings = loadSettings();
|
|
72
|
+
const verbose = settings.verbose || process.argv.includes("--verbose") || process.argv.includes("-v");
|
|
11
73
|
function status(message) {
|
|
12
74
|
process.stdout.write(`\x1b[2K\r${chalk.blue(message)}`);
|
|
13
75
|
}
|
|
@@ -23,6 +85,13 @@ function formatDuration(seconds) {
|
|
|
23
85
|
return `${secs}s`;
|
|
24
86
|
}
|
|
25
87
|
async function main() {
|
|
88
|
+
// Check for API key before recording
|
|
89
|
+
if (!process.env.GROQ_API_KEY) {
|
|
90
|
+
console.error(chalk.red("Error: GROQ_API_KEY environment variable is not set"));
|
|
91
|
+
console.log(chalk.gray("Get your API key at https://console.groq.com/keys"));
|
|
92
|
+
console.log(chalk.gray("Then run: export GROQ_API_KEY=\"your-api-key\""));
|
|
93
|
+
process.exit(1);
|
|
94
|
+
}
|
|
26
95
|
try {
|
|
27
96
|
// 1. Record audio
|
|
28
97
|
const recording = await record(verbose);
|
|
@@ -33,31 +102,28 @@ async function main() {
|
|
|
33
102
|
try {
|
|
34
103
|
// 3. Transcribe with Whisper
|
|
35
104
|
status("Transcribing...");
|
|
36
|
-
const rawText = await transcribe(mp3Path);
|
|
105
|
+
const rawText = await transcribe(mp3Path, settings.transcriptionModel ?? DEFAULTS.transcriptionModel, settings.language ?? DEFAULTS.language);
|
|
37
106
|
if (verbose) {
|
|
38
107
|
clearStatus();
|
|
39
108
|
console.log(chalk.gray(`Raw: ${rawText}`));
|
|
40
109
|
}
|
|
41
|
-
// 4. Read WHSPR.md or WHISPER.md
|
|
42
|
-
const
|
|
43
|
-
const whisperMdPath = path.join(process.cwd(), "WHISPER.md");
|
|
44
|
-
let customPrompt = null;
|
|
45
|
-
let vocabFile = null;
|
|
46
|
-
if (fs.existsSync(whsprMdPath)) {
|
|
47
|
-
customPrompt = fs.readFileSync(whsprMdPath, "utf-8");
|
|
48
|
-
vocabFile = "WHSPR.md";
|
|
49
|
-
}
|
|
50
|
-
else if (fs.existsSync(whisperMdPath)) {
|
|
51
|
-
customPrompt = fs.readFileSync(whisperMdPath, "utf-8");
|
|
52
|
-
vocabFile = "WHISPER.md";
|
|
53
|
-
}
|
|
110
|
+
// 4. Read WHSPR.md or WHISPER.md (global from ~/.whspr/ and/or local)
|
|
111
|
+
const { prompt: customPrompt, sources: vocabSources } = loadCustomPrompt(verbose);
|
|
54
112
|
if (customPrompt && verbose) {
|
|
55
|
-
console.log(chalk.gray(`Using custom vocabulary from ${
|
|
113
|
+
console.log(chalk.gray(`Using custom vocabulary from: ${vocabSources.join(" + ")}`));
|
|
56
114
|
}
|
|
57
115
|
// 5. Post-process
|
|
58
116
|
status("Post-processing...");
|
|
59
|
-
|
|
60
|
-
|
|
117
|
+
let fixedText = await postprocess(rawText, customPrompt, {
|
|
118
|
+
systemPrompt: settings.systemPrompt ?? DEFAULTS.systemPrompt,
|
|
119
|
+
customPromptPrefix: settings.customPromptPrefix ?? DEFAULTS.customPromptPrefix,
|
|
120
|
+
transcriptionPrefix: settings.transcriptionPrefix ?? DEFAULTS.transcriptionPrefix,
|
|
121
|
+
});
|
|
122
|
+
// 6. Apply suffix if configured
|
|
123
|
+
if (settings.suffix) {
|
|
124
|
+
fixedText = fixedText + settings.suffix;
|
|
125
|
+
}
|
|
126
|
+
// 7. Output and copy
|
|
61
127
|
clearStatus();
|
|
62
128
|
const processTime = ((Date.now() - processStart) / 1000).toFixed(1);
|
|
63
129
|
const wordCount = fixedText.trim().split(/\s+/).filter(w => w.length > 0).length;
|
|
@@ -88,7 +154,7 @@ async function main() {
|
|
|
88
154
|
console.log(chalk.dim("└" + bottomLine) + chalk.dim(stats) + chalk.dim("┘"));
|
|
89
155
|
await copyToClipboard(fixedText);
|
|
90
156
|
console.log(chalk.green("✓") + chalk.gray(" Copied to clipboard"));
|
|
91
|
-
//
|
|
157
|
+
// 8. Clean up
|
|
92
158
|
fs.unlinkSync(mp3Path);
|
|
93
159
|
}
|
|
94
160
|
catch (error) {
|
package/dist/postprocess.d.ts
CHANGED
|
@@ -1 +1,6 @@
|
|
|
1
|
-
export
|
|
1
|
+
export interface PostprocessOptions {
|
|
2
|
+
systemPrompt: string;
|
|
3
|
+
customPromptPrefix: string;
|
|
4
|
+
transcriptionPrefix: string;
|
|
5
|
+
}
|
|
6
|
+
export declare function postprocess(rawTranscription: string, customPrompt: string | null, options: PostprocessOptions): Promise<string>;
|
package/dist/postprocess.js
CHANGED
|
@@ -6,7 +6,8 @@ const MODEL = "openai/gpt-oss-120b";
|
|
|
6
6
|
const outputSchema = z.object({
|
|
7
7
|
fixed_transcription: z.string(),
|
|
8
8
|
});
|
|
9
|
-
export async function postprocess(rawTranscription, customPrompt) {
|
|
9
|
+
export async function postprocess(rawTranscription, customPrompt, options) {
|
|
10
|
+
const { systemPrompt, customPromptPrefix, transcriptionPrefix } = options;
|
|
10
11
|
const result = await withRetry(async () => {
|
|
11
12
|
const response = await generateObject({
|
|
12
13
|
model: groq(MODEL),
|
|
@@ -14,14 +15,20 @@ export async function postprocess(rawTranscription, customPrompt) {
|
|
|
14
15
|
messages: [
|
|
15
16
|
{
|
|
16
17
|
role: "system",
|
|
17
|
-
content:
|
|
18
|
+
content: systemPrompt,
|
|
18
19
|
},
|
|
19
20
|
{
|
|
20
21
|
role: "user",
|
|
21
|
-
content:
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
content: [
|
|
23
|
+
customPrompt
|
|
24
|
+
? `${customPromptPrefix}\n\`\`\`\n${customPrompt}\n\`\`\`\n\n`
|
|
25
|
+
: null,
|
|
26
|
+
`${transcriptionPrefix}\n\`\`\`\n${rawTranscription}\n\`\`\``,
|
|
27
|
+
]
|
|
28
|
+
.filter(Boolean)
|
|
29
|
+
.join("")
|
|
30
|
+
.trim(),
|
|
31
|
+
},
|
|
25
32
|
],
|
|
26
33
|
});
|
|
27
34
|
return response.object;
|
package/dist/transcribe.d.ts
CHANGED
|
@@ -1 +1,2 @@
|
|
|
1
|
-
export
|
|
1
|
+
export type TranscriptionModel = "whisper-large-v3" | "whisper-large-v3-turbo";
|
|
2
|
+
export declare function transcribe(audioPath: string, model?: TranscriptionModel, language?: string): Promise<string>;
|
package/dist/transcribe.js
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import Groq from "groq-sdk";
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
const groq = new Groq(); // Uses GROQ_API_KEY env var
|
|
4
|
-
export async function transcribe(audioPath) {
|
|
4
|
+
export async function transcribe(audioPath, model = "whisper-large-v3-turbo", language = "en") {
|
|
5
5
|
const transcription = await groq.audio.transcriptions.create({
|
|
6
6
|
file: fs.createReadStream(audioPath),
|
|
7
|
-
model
|
|
7
|
+
model,
|
|
8
8
|
temperature: 0,
|
|
9
|
-
language
|
|
9
|
+
language,
|
|
10
10
|
});
|
|
11
11
|
return transcription.text;
|
|
12
12
|
}
|