whspr 1.0.5 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -5
- package/dist/index.d.ts +2 -0
- package/dist/index.js +46 -5
- package/dist/postprocess.d.ts +3 -0
- package/dist/postprocess.js +4 -4
- package/dist/utils/providers.d.ts +4 -0
- package/dist/utils/providers.js +14 -0
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -51,30 +51,75 @@ Press **Enter** to stop recording.
|
|
|
51
51
|
- 15-minute max recording time
|
|
52
52
|
- Transcription via Groq Whisper API
|
|
53
53
|
- AI-powered post-processing to fix transcription errors
|
|
54
|
-
- Custom vocabulary support via `WHSPR.md`
|
|
54
|
+
- Custom vocabulary support via `WHSPR.md` (global and local)
|
|
55
|
+
- Configurable settings via `~/.whspr/settings.json`
|
|
55
56
|
- Automatic clipboard copy
|
|
56
57
|
|
|
58
|
+
## Settings
|
|
59
|
+
|
|
60
|
+
Create `~/.whspr/settings.json` to customize whspr's behavior:
|
|
61
|
+
|
|
62
|
+
```json
|
|
63
|
+
{
|
|
64
|
+
"verbose": false,
|
|
65
|
+
"suffix": "\n\n(Transcribed via Whisper)",
|
|
66
|
+
"transcriptionModel": "whisper-large-v3-turbo",
|
|
67
|
+
"language": "en",
|
|
68
|
+
"systemPrompt": "Your task is to clean up transcribed text...",
|
|
69
|
+
"customPromptPrefix": "Here's my custom user prompt:",
|
|
70
|
+
"transcriptionPrefix": "Here's my raw transcription output:"
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
| Option | Type | Default | Description |
|
|
75
|
+
|--------|------|---------|-------------|
|
|
76
|
+
| `verbose` | boolean | `false` | Enable verbose output |
|
|
77
|
+
| `suffix` | string | none | Text appended to all transcriptions |
|
|
78
|
+
| `transcriptionModel` | string | `"whisper-large-v3-turbo"` | Whisper model (`"whisper-large-v3"` or `"whisper-large-v3-turbo"`) |
|
|
79
|
+
| `language` | string | `"en"` | ISO 639-1 language code (e.g., `"en"`, `"zh"`, `"es"`) |
|
|
80
|
+
| `systemPrompt` | string | (built-in) | System prompt for AI post-processing |
|
|
81
|
+
| `customPromptPrefix` | string | `"Here's my custom user prompt:"` | Prefix before custom prompt content |
|
|
82
|
+
| `transcriptionPrefix` | string | `"Here's my raw transcription output that I need you to edit:"` | Prefix before raw transcription |
|
|
83
|
+
|
|
57
84
|
## Custom Vocabulary
|
|
58
85
|
|
|
59
|
-
Create a `WHSPR.md` (or `WHISPER.md`) file
|
|
86
|
+
Create a `WHSPR.md` (or `WHISPER.md`) file to provide custom vocabulary, names, or instructions for the AI post-processor.
|
|
87
|
+
|
|
88
|
+
### Global Prompts
|
|
89
|
+
|
|
90
|
+
Place in `~/.whspr/WHSPR.md` for vocabulary that applies everywhere:
|
|
60
91
|
|
|
61
92
|
```markdown
|
|
62
|
-
#
|
|
93
|
+
# Global Vocabulary
|
|
94
|
+
|
|
95
|
+
- My name is "Alex" not "Alec"
|
|
96
|
+
- Common terms: API, CLI, JSON, OAuth
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Local Prompts
|
|
100
|
+
|
|
101
|
+
Place in your current directory (`./WHSPR.md`) for project-specific vocabulary:
|
|
102
|
+
|
|
103
|
+
```markdown
|
|
104
|
+
# Project Vocabulary
|
|
63
105
|
|
|
64
106
|
- PostgreSQL (not "post crest QL")
|
|
65
107
|
- Kubernetes (not "cooper netties")
|
|
66
108
|
- My colleague's name is "Priya" not "Maria"
|
|
67
109
|
```
|
|
68
110
|
|
|
111
|
+
When both exist, they are combined (global first, then local).
|
|
112
|
+
|
|
69
113
|
## How It Works
|
|
70
114
|
|
|
71
115
|
1. Records audio from your default microphone using FFmpeg
|
|
72
116
|
2. Displays a live waveform visualization based on audio levels
|
|
73
117
|
3. Converts the recording to MP3
|
|
74
118
|
4. Sends audio to Groq's Whisper API for transcription
|
|
75
|
-
5.
|
|
119
|
+
5. Loads custom prompts from `~/.whspr/WHSPR.md` and/or `./WHSPR.md`
|
|
76
120
|
6. Sends transcription + custom vocabulary to AI for post-processing
|
|
77
|
-
7.
|
|
121
|
+
7. Applies suffix (if configured)
|
|
122
|
+
8. Prints result and copies to clipboard
|
|
78
123
|
|
|
79
124
|
If transcription fails, the recording is saved to `~/.whspr/recordings/` for manual recovery.
|
|
80
125
|
|
package/dist/index.d.ts
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
export declare const DEFAULTS: {
|
|
3
3
|
transcriptionModel: "whisper-large-v3-turbo";
|
|
4
4
|
language: string;
|
|
5
|
+
model: "groq:openai/gpt-oss-120b";
|
|
5
6
|
systemPrompt: string;
|
|
6
7
|
customPromptPrefix: string;
|
|
7
8
|
transcriptionPrefix: string;
|
|
@@ -11,6 +12,7 @@ export interface WhsprSettings {
|
|
|
11
12
|
suffix?: string;
|
|
12
13
|
transcriptionModel?: "whisper-large-v3" | "whisper-large-v3-turbo";
|
|
13
14
|
language?: string;
|
|
15
|
+
model?: string;
|
|
14
16
|
systemPrompt?: string;
|
|
15
17
|
customPromptPrefix?: string;
|
|
16
18
|
transcriptionPrefix?: string;
|
package/dist/index.js
CHANGED
|
@@ -11,18 +11,46 @@ import os from "os";
|
|
|
11
11
|
export const DEFAULTS = {
|
|
12
12
|
transcriptionModel: "whisper-large-v3-turbo",
|
|
13
13
|
language: "en",
|
|
14
|
-
|
|
14
|
+
model: "groq:openai/gpt-oss-120b",
|
|
15
|
+
systemPrompt: `Your task is to fix spelling errors and proper names in transcribed text.
|
|
16
|
+
IMPORTANT: Only correct spelling mistakes and proper nouns (names, places, technical terms).
|
|
17
|
+
Do NOT change wording, phrasing, or sentence structure.
|
|
18
|
+
Do NOT rephrase or rewrite any part of the transcription.
|
|
19
|
+
Preserve the original voice and speaking style exactly as transcribed.`,
|
|
15
20
|
customPromptPrefix: "Here's my custom user prompt:",
|
|
16
21
|
transcriptionPrefix: "Here's my raw transcription output that I need you to edit:",
|
|
17
22
|
};
|
|
23
|
+
// Default settings that will be written to settings.json
|
|
24
|
+
const DEFAULT_SETTINGS = {
|
|
25
|
+
model: DEFAULTS.model,
|
|
26
|
+
};
|
|
18
27
|
const WHSPR_DIR = path.join(os.homedir(), ".whspr");
|
|
19
28
|
const SETTINGS_PATH = path.join(WHSPR_DIR, "settings.json");
|
|
29
|
+
function parseModelProvider(model) {
|
|
30
|
+
const colonIndex = model.indexOf(":");
|
|
31
|
+
if (colonIndex === -1) {
|
|
32
|
+
throw new Error(`Invalid model format: "${model}". Expected "provider:model-name" (e.g., "groq:openai/gpt-oss-120b")`);
|
|
33
|
+
}
|
|
34
|
+
const provider = model.slice(0, colonIndex);
|
|
35
|
+
const modelName = model.slice(colonIndex + 1);
|
|
36
|
+
if (provider !== "groq" && provider !== "anthropic") {
|
|
37
|
+
throw new Error(`Unknown provider: "${provider}". Supported providers: groq, anthropic`);
|
|
38
|
+
}
|
|
39
|
+
return { provider, modelName };
|
|
40
|
+
}
|
|
20
41
|
function loadSettings() {
|
|
21
42
|
try {
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
43
|
+
// Ensure ~/.whspr/ directory exists
|
|
44
|
+
if (!fs.existsSync(WHSPR_DIR)) {
|
|
45
|
+
fs.mkdirSync(WHSPR_DIR, { recursive: true });
|
|
25
46
|
}
|
|
47
|
+
// Create settings.json with defaults if it doesn't exist
|
|
48
|
+
if (!fs.existsSync(SETTINGS_PATH)) {
|
|
49
|
+
fs.writeFileSync(SETTINGS_PATH, JSON.stringify(DEFAULT_SETTINGS, null, 2) + "\n", "utf-8");
|
|
50
|
+
return { ...DEFAULT_SETTINGS };
|
|
51
|
+
}
|
|
52
|
+
const content = fs.readFileSync(SETTINGS_PATH, "utf-8");
|
|
53
|
+
return JSON.parse(content);
|
|
26
54
|
}
|
|
27
55
|
catch (error) {
|
|
28
56
|
// Silently ignore invalid settings file
|
|
@@ -85,13 +113,24 @@ function formatDuration(seconds) {
|
|
|
85
113
|
return `${secs}s`;
|
|
86
114
|
}
|
|
87
115
|
async function main() {
|
|
88
|
-
//
|
|
116
|
+
// Parse model configuration
|
|
117
|
+
const modelConfig = settings.model ?? DEFAULTS.model;
|
|
118
|
+
const { provider, modelName } = parseModelProvider(modelConfig);
|
|
119
|
+
// Check for required API keys before recording
|
|
120
|
+
// Always need GROQ_API_KEY for Whisper transcription
|
|
89
121
|
if (!process.env.GROQ_API_KEY) {
|
|
90
122
|
console.error(chalk.red("Error: GROQ_API_KEY environment variable is not set"));
|
|
91
123
|
console.log(chalk.gray("Get your API key at https://console.groq.com/keys"));
|
|
92
124
|
console.log(chalk.gray("Then run: export GROQ_API_KEY=\"your-api-key\""));
|
|
93
125
|
process.exit(1);
|
|
94
126
|
}
|
|
127
|
+
// Check for provider-specific API key for post-processing
|
|
128
|
+
if (provider === "anthropic" && !process.env.ANTHROPIC_API_KEY) {
|
|
129
|
+
console.error(chalk.red("Error: ANTHROPIC_API_KEY environment variable is not set"));
|
|
130
|
+
console.log(chalk.gray("Get your API key at https://console.anthropic.com/settings/keys"));
|
|
131
|
+
console.log(chalk.gray("Then run: export ANTHROPIC_API_KEY=\"your-api-key\""));
|
|
132
|
+
process.exit(1);
|
|
133
|
+
}
|
|
95
134
|
try {
|
|
96
135
|
// 1. Record audio
|
|
97
136
|
const recording = await record(verbose);
|
|
@@ -115,6 +154,8 @@ async function main() {
|
|
|
115
154
|
// 5. Post-process
|
|
116
155
|
status("Post-processing...");
|
|
117
156
|
let fixedText = await postprocess(rawText, customPrompt, {
|
|
157
|
+
provider,
|
|
158
|
+
modelName,
|
|
118
159
|
systemPrompt: settings.systemPrompt ?? DEFAULTS.systemPrompt,
|
|
119
160
|
customPromptPrefix: settings.customPromptPrefix ?? DEFAULTS.customPromptPrefix,
|
|
120
161
|
transcriptionPrefix: settings.transcriptionPrefix ?? DEFAULTS.transcriptionPrefix,
|
package/dist/postprocess.d.ts
CHANGED
package/dist/postprocess.js
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import { generateObject } from "ai";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { withRetry } from "./utils/retry.js";
|
|
4
|
-
import {
|
|
5
|
-
const MODEL = "openai/gpt-oss-120b";
|
|
4
|
+
import { getProvider } from "./utils/providers.js";
|
|
6
5
|
const outputSchema = z.object({
|
|
7
6
|
fixed_transcription: z.string(),
|
|
8
7
|
});
|
|
9
8
|
export async function postprocess(rawTranscription, customPrompt, options) {
|
|
10
|
-
const { systemPrompt, customPromptPrefix, transcriptionPrefix } = options;
|
|
9
|
+
const { provider, modelName, systemPrompt, customPromptPrefix, transcriptionPrefix } = options;
|
|
10
|
+
const providerInstance = getProvider(provider);
|
|
11
11
|
const result = await withRetry(async () => {
|
|
12
12
|
const response = await generateObject({
|
|
13
|
-
model:
|
|
13
|
+
model: providerInstance(modelName),
|
|
14
14
|
schema: outputSchema,
|
|
15
15
|
messages: [
|
|
16
16
|
{
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export declare const groq: import("@ai-sdk/groq").GroqProvider;
|
|
2
|
+
export declare const anthropic: import("@ai-sdk/anthropic").AnthropicProvider;
|
|
3
|
+
export type ProviderType = "groq" | "anthropic";
|
|
4
|
+
export declare function getProvider(provider: ProviderType): import("@ai-sdk/groq").GroqProvider | import("@ai-sdk/anthropic").AnthropicProvider;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { createGroq } from "@ai-sdk/groq";
|
|
2
|
+
import { createAnthropic } from "@ai-sdk/anthropic";
|
|
3
|
+
export const groq = createGroq();
|
|
4
|
+
export const anthropic = createAnthropic();
|
|
5
|
+
export function getProvider(provider) {
|
|
6
|
+
switch (provider) {
|
|
7
|
+
case "groq":
|
|
8
|
+
return groq;
|
|
9
|
+
case "anthropic":
|
|
10
|
+
return anthropic;
|
|
11
|
+
default:
|
|
12
|
+
throw new Error(`Unknown provider: ${provider}`);
|
|
13
|
+
}
|
|
14
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "whspr",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.8",
|
|
4
4
|
"description": "CLI tool for audio transcription with Groq Whisper API",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -29,6 +29,7 @@
|
|
|
29
29
|
"prepublishOnly": "npm run build"
|
|
30
30
|
},
|
|
31
31
|
"dependencies": {
|
|
32
|
+
"@ai-sdk/anthropic": "^1.x",
|
|
32
33
|
"@ai-sdk/groq": "^1.x",
|
|
33
34
|
"ai": "^4.x",
|
|
34
35
|
"chalk": "^5.x",
|