@p8n.ai/pi-listens 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -1
- package/README.md +22 -10
- package/package.json +1 -1
- package/skills/pi-listens/SKILL.md +7 -0
- package/src/commands.ts +84 -16
- package/src/index.ts +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,21 @@ This project follows [Semantic Versioning](https://semver.org/).
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
## [0.2.0] - 2026-05-09
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- `/init` command to create a global settings file (`~/.pi/pi-listens.json`) with sensible defaults.
|
|
14
|
+
- `/voice-check` command (replaces `/voice-status`) with improved diagnostic output.
|
|
15
|
+
|
|
16
|
+
### Changed
|
|
17
|
+
|
|
18
|
+
- `/voice-on` now enables auto-speak by default for a full hands-free experience. Use `--no-speak` to opt out.
|
|
19
|
+
- Rename `/voice-status` to `/voice-check` to better communicate its diagnostic purpose.
|
|
20
|
+
|
|
21
|
+
### Removed
|
|
22
|
+
|
|
23
|
+
- `/listen` slash command. Use `/voice-on` for the hands-free voice loop, or the `voice_input` agent tool for programmatic speech input.
|
|
9
24
|
## [0.1.2] - 2026-05-09
|
|
10
25
|
|
|
11
26
|
### Changed
|
|
@@ -39,7 +54,8 @@ This project follows [Semantic Versioning](https://semver.org/).
|
|
|
39
54
|
- Stop active audio capture/playback subprocesses when voice mode is closed or the Pi session shuts down.
|
|
40
55
|
- Clean up generated audio files when spoken playback is interrupted.
|
|
41
56
|
|
|
42
|
-
[Unreleased]: https://github.com/p8n-ai/pi-listens/compare/v0.
|
|
57
|
+
[Unreleased]: https://github.com/p8n-ai/pi-listens/compare/v0.2.0...HEAD
|
|
43
58
|
[0.1.0]: https://github.com/p8n-ai/pi-listens/releases/tag/v0.1.0
|
|
44
59
|
[0.1.1]: https://github.com/p8n-ai/pi-listens/releases/tag/v0.1.1
|
|
45
60
|
[0.1.2]: https://github.com/p8n-ai/pi-listens/releases/tag/v0.1.2
|
|
61
|
+
[0.2.0]: https://github.com/p8n-ai/pi-listens/releases/tag/v0.2.0
|
package/README.md
CHANGED
|
@@ -7,14 +7,27 @@ Speech-first Pi package powered by [Sarvam AI](https://www.sarvam.ai/). It gives
|
|
|
7
7
|
- voice-first clarification loops where the agent speaks a question, listens, transcribes, and continues
|
|
8
8
|
- interactive TUI and headless/RPC usage through Pi extension tools and UI fallback
|
|
9
9
|
|
|
10
|
-
##
|
|
10
|
+
## Quick start
|
|
11
11
|
|
|
12
12
|
```bash
|
|
13
13
|
pi install npm:@p8n.ai/pi-listens
|
|
14
|
-
export SARVAM_API_KEY="your-sarvam-api-key"
|
|
15
14
|
pi
|
|
16
15
|
```
|
|
17
16
|
|
|
17
|
+
Inside Pi, run `/init` to create a global settings file with sensible defaults:
|
|
18
|
+
|
|
19
|
+
```
|
|
20
|
+
/init
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Then open `~/.pi/pi-listens.json` and replace the `apiKey` placeholder with your [Sarvam AI API key](https://dashboard.sarvam.ai).
|
|
24
|
+
|
|
25
|
+
Alternatively, set the key via environment variable:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
export SARVAM_API_KEY="your-sarvam-api-key"
|
|
29
|
+
```
|
|
30
|
+
|
|
18
31
|
For local development from this checkout:
|
|
19
32
|
|
|
20
33
|
```bash
|
|
@@ -88,16 +101,15 @@ The extension also injects voice guidance into the system prompt:
|
|
|
88
101
|
|
|
89
102
|
| Command | Purpose |
|
|
90
103
|
| --- | --- |
|
|
91
|
-
| `/
|
|
104
|
+
| `/init` | Create a global settings file at `~/.pi/pi-listens.json` with sensible defaults. Use `--overwrite` to replace an existing file. |
|
|
92
105
|
| `/speak <text>` | Speak text with Sarvam TTS. |
|
|
93
|
-
| `/voice-on [--speak] [--manual] [--no-listen] [seconds]` |
|
|
94
|
-
| `/voice-
|
|
95
|
-
| `/voice-status` | Show setup and voice-mode status. |
|
|
106
|
+
| `/voice-on [--no-speak] [--manual] [--no-listen] [seconds]` | Start the hands-free voice loop. Auto-speaks assistant replies and auto-listens by default. `--no-speak` disables reading replies aloud. `--manual` disables auto-listen (press Space to listen). |
|
|
107
|
+
| `/voice-check` | Show setup diagnostics and voice-mode status. |
|
|
96
108
|
|
|
97
109
|
Voice panel controls in interactive mode:
|
|
98
|
-
- Space: listen now; press again while listening to stop
|
|
99
|
-
- A: auto-listen
|
|
100
|
-
- S: read
|
|
110
|
+
- Space: listen now; press again while listening to stop; if Pi is speaking, stops playback first
|
|
111
|
+
- A: toggle auto-listen (listen again after each assistant reply)
|
|
112
|
+
- S: toggle read-aloud (speak assistant replies)
|
|
101
113
|
- Q: close the panel and stop any active listening or speaking
|
|
102
114
|
- Click the orb: visual ripple feedback (terminals with mouse reporting)
|
|
103
115
|
|
|
@@ -143,7 +155,7 @@ Example config file:
|
|
|
143
155
|
"ttsSampleRate": 24000,
|
|
144
156
|
"ttsOutputCodec": "wav",
|
|
145
157
|
"textFallback": true,
|
|
146
|
-
"autoSpeakAssistant":
|
|
158
|
+
"autoSpeakAssistant": true,
|
|
147
159
|
"maxAutoSpeakChars": 320
|
|
148
160
|
}
|
|
149
161
|
```
|
package/package.json
CHANGED
|
@@ -15,6 +15,13 @@ This Pi package provides voice tools backed by Sarvam AI.
|
|
|
15
15
|
- `voice_transcribe_file`: transcribe an existing audio file.
|
|
16
16
|
- `voice_setup_check`: diagnose API key, recorder, player, and voice settings.
|
|
17
17
|
|
|
18
|
+
## Commands
|
|
19
|
+
|
|
20
|
+
- `/init`: create a global settings file with defaults. User only needs to set their Sarvam API key.
|
|
21
|
+
- `/speak <text>`: speak text with Sarvam TTS.
|
|
22
|
+
- `/voice-on`: start hands-free voice loop (auto-speaks replies and auto-listens by default).
|
|
23
|
+
- `/voice-check`: show setup diagnostics and voice-mode status.
|
|
24
|
+
|
|
18
25
|
## Usage rules
|
|
19
26
|
|
|
20
27
|
1. When you need user input, clarification, or confirmation, use `voice_ask` instead of asking only in text.
|
package/src/commands.ts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
3
|
+
import { homedir } from "node:os";
|
|
2
4
|
import { join } from "node:path";
|
|
3
5
|
import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@earendil-works/pi-coding-agent";
|
|
4
6
|
import type { VoiceToolServices } from "./tools.js";
|
|
5
7
|
import { conciseTranscript, prepareSpokenText } from "./text.js";
|
|
6
|
-
import { audioExtensionForCodec } from "./config.js";
|
|
8
|
+
import { audioExtensionForCodec, maskSecret } from "./config.js";
|
|
7
9
|
import { applyVoiceChrome, installVoiceUi, uninstallVoiceUi } from "./voice-ui.js";
|
|
8
10
|
|
|
9
11
|
export type VoiceLoopStatus = "idle" | "listening" | "transcribing" | "agent" | "speaking" | "error";
|
|
@@ -26,10 +28,10 @@ export interface VoiceModeState {
|
|
|
26
28
|
}
|
|
27
29
|
|
|
28
30
|
export function registerVoiceCommands(pi: ExtensionAPI, services: VoiceToolServices, state: VoiceModeState) {
|
|
29
|
-
pi.registerCommand("
|
|
30
|
-
description: "
|
|
31
|
+
pi.registerCommand("init", {
|
|
32
|
+
description: "Create a global pi-listens settings file at ~/.pi/pi-listens.json with sensible defaults",
|
|
31
33
|
handler: async (args, ctx) => {
|
|
32
|
-
await
|
|
34
|
+
await initSettings(services, ctx, args.includes("--overwrite"));
|
|
33
35
|
},
|
|
34
36
|
});
|
|
35
37
|
|
|
@@ -46,11 +48,10 @@ export function registerVoiceCommands(pi: ExtensionAPI, services: VoiceToolServi
|
|
|
46
48
|
});
|
|
47
49
|
|
|
48
50
|
pi.registerCommand("voice-on", {
|
|
49
|
-
description: "Enable hands-free voice loop. Use --speak to
|
|
51
|
+
description: "Enable hands-free voice loop with auto-speak and auto-listen. Use --no-speak to disable reading replies aloud, --manual to only listen on demand.",
|
|
50
52
|
handler: async (args, ctx) => {
|
|
51
53
|
state.enabled = true;
|
|
52
|
-
|
|
53
|
-
if (args.includes("--no-speak")) state.autoSpeakAssistant = false;
|
|
54
|
+
state.autoSpeakAssistant = !args.includes("--no-speak");
|
|
54
55
|
state.autoListen = !args.includes("--manual");
|
|
55
56
|
installVoiceUi(ctx, state, createVoiceUiCallbacks(pi, services, state, ctx));
|
|
56
57
|
applyVoiceChrome(ctx, state);
|
|
@@ -60,30 +61,97 @@ export function registerVoiceCommands(pi: ExtensionAPI, services: VoiceToolServi
|
|
|
60
61
|
});
|
|
61
62
|
|
|
62
63
|
|
|
63
|
-
pi.registerCommand("voice-
|
|
64
|
-
description: "
|
|
64
|
+
pi.registerCommand("voice-check", {
|
|
65
|
+
description: "Check pi-listens setup: Sarvam AI key, recorder, player, and voice-mode status",
|
|
65
66
|
handler: async (_args, ctx) => {
|
|
66
67
|
const config = services.getConfig();
|
|
67
68
|
const audio = services.getAudio().describe();
|
|
69
|
+
const ready = Boolean(config.apiKey) && audio.recorder !== "missing" && audio.player !== "missing";
|
|
68
70
|
ctx.ui.notify(
|
|
69
71
|
[
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
`
|
|
73
|
-
`Status: ${state.status}`,
|
|
74
|
-
`Sarvam API key: ${config.apiKey ? "set" : "missing"}`,
|
|
72
|
+
ready ? "✓ pi-listens is ready." : "⚠ pi-listens needs attention.",
|
|
73
|
+
"",
|
|
74
|
+
`Sarvam API key: ${maskSecret(config.apiKey)}`,
|
|
75
75
|
`Recorder: ${audio.recorder}`,
|
|
76
76
|
`Player: ${audio.player}`,
|
|
77
77
|
`Streaming player: ${audio.streamingPlayer}`,
|
|
78
78
|
`STT: ${config.sttModel} (${config.translateInputToEnglish ? "translate→English" : config.sttMode}, ${config.sttLanguageCode})`,
|
|
79
79
|
`TTS: ${config.ttsModel} (${config.ttsLanguageCode}, speaker ${config.ttsSpeaker})`,
|
|
80
|
+
"",
|
|
81
|
+
`Voice mode: ${state.enabled ? "on" : "off"}`,
|
|
82
|
+
`Auto-speak: ${state.autoSpeakAssistant ? "on" : "off"}`,
|
|
83
|
+
`Auto-listen: ${state.autoListen ? "on" : "off"}`,
|
|
80
84
|
].join("\n"),
|
|
81
|
-
|
|
85
|
+
ready ? "info" : "warning",
|
|
82
86
|
);
|
|
83
87
|
},
|
|
84
88
|
});
|
|
85
89
|
}
|
|
86
90
|
|
|
91
|
+
const INIT_SETTINGS_TEMPLATE = {
|
|
92
|
+
apiKey: "paste-your-sarvam-api-key-here",
|
|
93
|
+
sttModel: "saaras:v3",
|
|
94
|
+
sttMode: "transcribe",
|
|
95
|
+
sttLanguageCode: "unknown",
|
|
96
|
+
translateInputToEnglish: true,
|
|
97
|
+
ttsModel: "bulbul:v3",
|
|
98
|
+
ttsLanguageCode: "en-IN",
|
|
99
|
+
ttsSpeaker: "shubh",
|
|
100
|
+
recordSeconds: 300,
|
|
101
|
+
recordSampleRate: 16000,
|
|
102
|
+
streamChunkMs: 250,
|
|
103
|
+
streamMaxSeconds: 300,
|
|
104
|
+
silenceStartSeconds: 0.2,
|
|
105
|
+
silenceStopSeconds: 3.5,
|
|
106
|
+
silenceThreshold: "1%",
|
|
107
|
+
ttsSampleRate: 24000,
|
|
108
|
+
ttsOutputCodec: "wav",
|
|
109
|
+
textFallback: true,
|
|
110
|
+
autoSpeakAssistant: true,
|
|
111
|
+
maxAutoSpeakChars: 320,
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
async function initSettings(services: VoiceToolServices, ctx: ExtensionCommandContext, overwrite: boolean) {
|
|
115
|
+
const dir = join(homedir(), ".pi");
|
|
116
|
+
const filePath = join(dir, "pi-listens.json");
|
|
117
|
+
|
|
118
|
+
if (existsSync(filePath) && !overwrite) {
|
|
119
|
+
const existing = readFileSync(filePath, "utf8");
|
|
120
|
+
let parsed: Record<string, unknown> = {};
|
|
121
|
+
try { parsed = JSON.parse(existing) as Record<string, unknown>; } catch { /* ignore */ }
|
|
122
|
+
const hasKey = typeof parsed.apiKey === "string" && parsed.apiKey !== "paste-your-sarvam-api-key-here" && parsed.apiKey.length > 0;
|
|
123
|
+
ctx.ui.notify(
|
|
124
|
+
[
|
|
125
|
+
`Settings file already exists: ${filePath}`,
|
|
126
|
+
hasKey ? "Sarvam API key: set" : "Sarvam API key: not yet configured",
|
|
127
|
+
"",
|
|
128
|
+
"Use /init --overwrite to replace it with fresh defaults.",
|
|
129
|
+
].join("\n"),
|
|
130
|
+
"info",
|
|
131
|
+
);
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
await mkdir(dir, { recursive: true });
|
|
136
|
+
await writeFile(filePath, `${JSON.stringify(INIT_SETTINGS_TEMPLATE, null, 2)}\n`, "utf8");
|
|
137
|
+
|
|
138
|
+
const audio = services.getAudio().describe();
|
|
139
|
+
ctx.ui.notify(
|
|
140
|
+
[
|
|
141
|
+
`✓ Created settings file: ${filePath}`,
|
|
142
|
+
"",
|
|
143
|
+
"Next step: open the file and replace the apiKey value with your Sarvam AI API key.",
|
|
144
|
+
"Get a key at: https://dashboard.sarvam.ai",
|
|
145
|
+
"",
|
|
146
|
+
`Recorder: ${audio.recorder}`,
|
|
147
|
+
`Player: ${audio.player}`,
|
|
148
|
+
audio.recorder === "missing" || audio.player === "missing"
|
|
149
|
+
? "⚠ Install SoX (rec/play) or ffmpeg for microphone and audio playback."
|
|
150
|
+
: "✓ Audio recorder and player detected.",
|
|
151
|
+
].join("\n"),
|
|
152
|
+
"info",
|
|
153
|
+
);
|
|
154
|
+
}
|
|
87
155
|
export async function maybeContinueVoiceLoop(pi: ExtensionAPI, services: VoiceToolServices, state: VoiceModeState, ctx: ExtensionContext) {
|
|
88
156
|
if (!state.enabled || state.isListening) return;
|
|
89
157
|
if (state.autoSpeakAssistant && state.lastAssistantText) {
|
package/src/index.ts
CHANGED
|
@@ -52,7 +52,7 @@ export default function piListensExtension(pi: ExtensionAPI) {
|
|
|
52
52
|
`Sarvam API key: ${maskSecret(config.apiKey)}`,
|
|
53
53
|
`Recorder: ${audioInfo.recorder}`,
|
|
54
54
|
`Player: ${audioInfo.player}`,
|
|
55
|
-
"Run /
|
|
55
|
+
"Run /init to create a settings file, or /voice-check for details.",
|
|
56
56
|
].join("\n"),
|
|
57
57
|
"warning",
|
|
58
58
|
);
|