whspr 1.0.5 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -51,30 +51,75 @@ Press **Enter** to stop recording.
51
51
  - 15-minute max recording time
52
52
  - Transcription via Groq Whisper API
53
53
  - AI-powered post-processing to fix transcription errors
54
- - Custom vocabulary support via `WHSPR.md`
54
+ - Custom vocabulary support via `WHSPR.md` (global and local)
55
+ - Configurable settings via `~/.whspr/settings.json`
55
56
  - Automatic clipboard copy
56
57
 
58
+ ## Settings
59
+
60
+ Create `~/.whspr/settings.json` to customize whspr's behavior:
61
+
62
+ ```json
63
+ {
64
+ "verbose": false,
65
+ "suffix": "\n\n(Transcribed via Whisper)",
66
+ "transcriptionModel": "whisper-large-v3-turbo",
67
+ "language": "en",
68
+ "systemPrompt": "Your task is to clean up transcribed text...",
69
+ "customPromptPrefix": "Here's my custom user prompt:",
70
+ "transcriptionPrefix": "Here's my raw transcription output:"
71
+ }
72
+ ```
73
+
74
+ | Option | Type | Default | Description |
75
+ |--------|------|---------|-------------|
76
+ | `verbose` | boolean | `false` | Enable verbose output |
77
+ | `suffix` | string | none | Text appended to all transcriptions |
78
+ | `transcriptionModel` | string | `"whisper-large-v3-turbo"` | Whisper model (`"whisper-large-v3"` or `"whisper-large-v3-turbo"`) |
79
+ | `language` | string | `"en"` | ISO 639-1 language code (e.g., `"en"`, `"zh"`, `"es"`) |
80
+ | `systemPrompt` | string | (built-in) | System prompt for AI post-processing |
81
+ | `customPromptPrefix` | string | `"Here's my custom user prompt:"` | Prefix before custom prompt content |
82
+ | `transcriptionPrefix` | string | `"Here's my raw transcription output that I need you to edit:"` | Prefix before raw transcription |
83
+
57
84
  ## Custom Vocabulary
58
85
 
59
- Create a `WHSPR.md` (or `WHISPER.md`) file in your current directory to provide custom vocabulary, names, or instructions for the AI post-processor:
86
+ Create a `WHSPR.md` (or `WHISPER.md`) file to provide custom vocabulary, names, or instructions for the AI post-processor.
87
+
88
+ ### Global Prompts
89
+
90
+ Place in `~/.whspr/WHSPR.md` for vocabulary that applies everywhere:
60
91
 
61
92
  ```markdown
62
- # Custom Vocabulary
93
+ # Global Vocabulary
94
+
95
+ - My name is "Alex" not "Alec"
96
+ - Common terms: API, CLI, JSON, OAuth
97
+ ```
98
+
99
+ ### Local Prompts
100
+
101
+ Place in your current directory (`./WHSPR.md`) for project-specific vocabulary:
102
+
103
+ ```markdown
104
+ # Project Vocabulary
63
105
 
64
106
  - PostgreSQL (not "post crest QL")
65
107
  - Kubernetes (not "cooper netties")
66
108
  - My colleague's name is "Priya" not "Maria"
67
109
  ```
68
110
 
111
+ When both exist, they are combined (global first, then local).
112
+
69
113
  ## How It Works
70
114
 
71
115
  1. Records audio from your default microphone using FFmpeg
72
116
  2. Displays a live waveform visualization based on audio levels
73
117
  3. Converts the recording to MP3
74
118
  4. Sends audio to Groq's Whisper API for transcription
75
- 5. Reads `WHSPR.md` from current directory (if exists)
119
+ 5. Loads custom prompts from `~/.whspr/WHSPR.md` and/or `./WHSPR.md`
76
120
  6. Sends transcription + custom vocabulary to AI for post-processing
77
- 7. Prints result and copies to clipboard
121
+ 7. Applies suffix (if configured)
122
+ 8. Prints result and copies to clipboard
78
123
 
79
124
  If transcription fails, the recording is saved to `~/.whspr/recordings/` for manual recovery.
80
125
 
package/dist/index.d.ts CHANGED
@@ -2,6 +2,7 @@
2
2
  export declare const DEFAULTS: {
3
3
  transcriptionModel: "whisper-large-v3-turbo";
4
4
  language: string;
5
+ model: "groq:openai/gpt-oss-120b";
5
6
  systemPrompt: string;
6
7
  customPromptPrefix: string;
7
8
  transcriptionPrefix: string;
@@ -11,6 +12,7 @@ export interface WhsprSettings {
11
12
  suffix?: string;
12
13
  transcriptionModel?: "whisper-large-v3" | "whisper-large-v3-turbo";
13
14
  language?: string;
15
+ model?: string;
14
16
  systemPrompt?: string;
15
17
  customPromptPrefix?: string;
16
18
  transcriptionPrefix?: string;
package/dist/index.js CHANGED
@@ -11,18 +11,46 @@ import os from "os";
11
11
  export const DEFAULTS = {
12
12
  transcriptionModel: "whisper-large-v3-turbo",
13
13
  language: "en",
14
- systemPrompt: 'Your task is to clean up/fix transcribed text generated from mic input by the user according to the user\'s own prompt, this prompt may contain custom vocabulary, instructions, etc. Please return the user\'s transcription with the fixes made (e.g. the AI might hear "PostgreSQL" as "post crest QL" you need to use your own reasoning to fix these mistakes in the transcription)',
14
+ model: "groq:openai/gpt-oss-120b",
15
+ systemPrompt: `Your task is to fix spelling errors and proper names in transcribed text.
16
+ IMPORTANT: Only correct spelling mistakes and proper nouns (names, places, technical terms).
17
+ Do NOT change wording, phrasing, or sentence structure.
18
+ Do NOT rephrase or rewrite any part of the transcription.
19
+ Preserve the original voice and speaking style exactly as transcribed.`,
15
20
  customPromptPrefix: "Here's my custom user prompt:",
16
21
  transcriptionPrefix: "Here's my raw transcription output that I need you to edit:",
17
22
  };
23
+ // Default settings that will be written to settings.json
24
+ const DEFAULT_SETTINGS = {
25
+ model: DEFAULTS.model,
26
+ };
18
27
  const WHSPR_DIR = path.join(os.homedir(), ".whspr");
19
28
  const SETTINGS_PATH = path.join(WHSPR_DIR, "settings.json");
29
+ function parseModelProvider(model) {
30
+ const colonIndex = model.indexOf(":");
31
+ if (colonIndex === -1) {
32
+ throw new Error(`Invalid model format: "${model}". Expected "provider:model-name" (e.g., "groq:openai/gpt-oss-120b")`);
33
+ }
34
+ const provider = model.slice(0, colonIndex);
35
+ const modelName = model.slice(colonIndex + 1);
36
+ if (provider !== "groq" && provider !== "anthropic") {
37
+ throw new Error(`Unknown provider: "${provider}". Supported providers: groq, anthropic`);
38
+ }
39
+ return { provider, modelName };
40
+ }
20
41
  function loadSettings() {
21
42
  try {
22
- if (fs.existsSync(SETTINGS_PATH)) {
23
- const content = fs.readFileSync(SETTINGS_PATH, "utf-8");
24
- return JSON.parse(content);
43
+ // Ensure ~/.whspr/ directory exists
44
+ if (!fs.existsSync(WHSPR_DIR)) {
45
+ fs.mkdirSync(WHSPR_DIR, { recursive: true });
25
46
  }
47
+ // Create settings.json with defaults if it doesn't exist
48
+ if (!fs.existsSync(SETTINGS_PATH)) {
49
+ fs.writeFileSync(SETTINGS_PATH, JSON.stringify(DEFAULT_SETTINGS, null, 2) + "\n", "utf-8");
50
+ return { ...DEFAULT_SETTINGS };
51
+ }
52
+ const content = fs.readFileSync(SETTINGS_PATH, "utf-8");
53
+ return JSON.parse(content);
26
54
  }
27
55
  catch (error) {
28
56
  // Silently ignore invalid settings file
@@ -85,13 +113,24 @@ function formatDuration(seconds) {
85
113
  return `${secs}s`;
86
114
  }
87
115
  async function main() {
88
- // Check for API key before recording
116
+ // Parse model configuration
117
+ const modelConfig = settings.model ?? DEFAULTS.model;
118
+ const { provider, modelName } = parseModelProvider(modelConfig);
119
+ // Check for required API keys before recording
120
+ // Always need GROQ_API_KEY for Whisper transcription
89
121
  if (!process.env.GROQ_API_KEY) {
90
122
  console.error(chalk.red("Error: GROQ_API_KEY environment variable is not set"));
91
123
  console.log(chalk.gray("Get your API key at https://console.groq.com/keys"));
92
124
  console.log(chalk.gray("Then run: export GROQ_API_KEY=\"your-api-key\""));
93
125
  process.exit(1);
94
126
  }
127
+ // Check for provider-specific API key for post-processing
128
+ if (provider === "anthropic" && !process.env.ANTHROPIC_API_KEY) {
129
+ console.error(chalk.red("Error: ANTHROPIC_API_KEY environment variable is not set"));
130
+ console.log(chalk.gray("Get your API key at https://console.anthropic.com/settings/keys"));
131
+ console.log(chalk.gray("Then run: export ANTHROPIC_API_KEY=\"your-api-key\""));
132
+ process.exit(1);
133
+ }
95
134
  try {
96
135
  // 1. Record audio
97
136
  const recording = await record(verbose);
@@ -115,6 +154,8 @@ async function main() {
115
154
  // 5. Post-process
116
155
  status("Post-processing...");
117
156
  let fixedText = await postprocess(rawText, customPrompt, {
157
+ provider,
158
+ modelName,
118
159
  systemPrompt: settings.systemPrompt ?? DEFAULTS.systemPrompt,
119
160
  customPromptPrefix: settings.customPromptPrefix ?? DEFAULTS.customPromptPrefix,
120
161
  transcriptionPrefix: settings.transcriptionPrefix ?? DEFAULTS.transcriptionPrefix,
@@ -1,4 +1,7 @@
1
+ import { ProviderType } from "./utils/providers.js";
1
2
  export interface PostprocessOptions {
3
+ provider: ProviderType;
4
+ modelName: string;
2
5
  systemPrompt: string;
3
6
  customPromptPrefix: string;
4
7
  transcriptionPrefix: string;
@@ -1,16 +1,16 @@
1
1
  import { generateObject } from "ai";
2
2
  import { z } from "zod";
3
3
  import { withRetry } from "./utils/retry.js";
4
- import { groq } from "./utils/groq.js";
5
- const MODEL = "openai/gpt-oss-120b";
4
+ import { getProvider } from "./utils/providers.js";
6
5
  const outputSchema = z.object({
7
6
  fixed_transcription: z.string(),
8
7
  });
9
8
  export async function postprocess(rawTranscription, customPrompt, options) {
10
- const { systemPrompt, customPromptPrefix, transcriptionPrefix } = options;
9
+ const { provider, modelName, systemPrompt, customPromptPrefix, transcriptionPrefix } = options;
10
+ const providerInstance = getProvider(provider);
11
11
  const result = await withRetry(async () => {
12
12
  const response = await generateObject({
13
- model: groq(MODEL),
13
+ model: providerInstance(modelName),
14
14
  schema: outputSchema,
15
15
  messages: [
16
16
  {
@@ -0,0 +1,4 @@
1
+ export declare const groq: import("@ai-sdk/groq").GroqProvider;
2
+ export declare const anthropic: import("@ai-sdk/anthropic").AnthropicProvider;
3
+ export type ProviderType = "groq" | "anthropic";
4
+ export declare function getProvider(provider: ProviderType): import("@ai-sdk/groq").GroqProvider | import("@ai-sdk/anthropic").AnthropicProvider;
@@ -0,0 +1,14 @@
1
+ import { createGroq } from "@ai-sdk/groq";
2
+ import { createAnthropic } from "@ai-sdk/anthropic";
3
+ export const groq = createGroq();
4
+ export const anthropic = createAnthropic();
5
+ export function getProvider(provider) {
6
+ switch (provider) {
7
+ case "groq":
8
+ return groq;
9
+ case "anthropic":
10
+ return anthropic;
11
+ default:
12
+ throw new Error(`Unknown provider: ${provider}`);
13
+ }
14
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "whspr",
3
- "version": "1.0.5",
3
+ "version": "1.0.8",
4
4
  "description": "CLI tool for audio transcription with Groq Whisper API",
5
5
  "type": "module",
6
6
  "bin": {
@@ -29,6 +29,7 @@
29
29
  "prepublishOnly": "npm run build"
30
30
  },
31
31
  "dependencies": {
32
+ "@ai-sdk/anthropic": "^1.x",
32
33
  "@ai-sdk/groq": "^1.x",
33
34
  "ai": "^4.x",
34
35
  "chalk": "^5.x",