@elizaos/capacitor-talkmode 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +137 -0
- package/android/build.gradle +16 -2
- package/android/src/main/java/ai/eliza/plugins/talkmode/TalkModePlugin.kt +941 -54
- package/dist/esm/definitions.d.ts +146 -0
- package/dist/esm/definitions.d.ts.map +1 -1
- package/dist/esm/web.d.ts +6 -1
- package/dist/esm/web.d.ts.map +1 -1
- package/dist/esm/web.js +34 -5
- package/dist/esm/web.test.d.ts +2 -0
- package/dist/esm/web.test.d.ts.map +1 -0
- package/dist/esm/web.test.js +137 -0
- package/dist/plugin.cjs.js +34 -5
- package/dist/plugin.cjs.js.map +1 -1
- package/dist/plugin.js +34 -5
- package/dist/plugin.js.map +1 -1
- package/ios/Sources/TalkModePlugin/TalkModePlugin.swift +266 -16
- package/package.json +14 -11
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Shaw Walters and elizaOS Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# @elizaos/capacitor-talkmode
|
|
2
|
+
|
|
3
|
+
Capacitor plugin for full voice conversation sessions: speech-to-text → chat orchestration → text-to-speech.
|
|
4
|
+
|
|
5
|
+
Runs inside Eliza agent apps on **iOS**, **Android**, **Electrobun (desktop)**, and **browser**. On native platforms, it uses platform STT (AVFoundation / SFSpeechRecognizer on iOS, Android SpeechRecognizer) and ElevenLabs streaming TTS with PCM/MP3 audio playback. On web, it falls back to the Web Speech API for both STT and TTS.
|
|
6
|
+
|
|
7
|
+
## Capabilities
|
|
8
|
+
|
|
9
|
+
- **Voice session management** — start/stop a hands-free, push-to-talk, compose, or passive session.
|
|
10
|
+
- **Live transcription** — streaming interim and final transcripts from the microphone.
|
|
11
|
+
- **TTS playback** — speak text via ElevenLabs (native only) or system TTS; supports per-utterance directives (voice, speed, stability, language, seed, latency tier).
|
|
12
|
+
- **Interrupt on speech** — automatically cut TTS playback when the user starts speaking.
|
|
13
|
+
- **Permission handling** — check and request microphone + speech recognition permissions.
|
|
14
|
+
- **Event-driven state machine** — `idle` → `listening` → `processing` → `speaking` → `error` with typed events.
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
bun add @elizaos/capacitor-talkmode
|
|
20
|
+
npx cap sync
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Peer dependency: `@capacitor/core ^8.3.1`.
|
|
24
|
+
|
|
25
|
+
On iOS, add the following to your `Info.plist`:
|
|
26
|
+
|
|
27
|
+
```xml
|
|
28
|
+
<key>NSMicrophoneUsageDescription</key>
|
|
29
|
+
<string>Used for voice conversations</string>
|
|
30
|
+
<key>NSSpeechRecognitionUsageDescription</key>
|
|
31
|
+
<string>Used for speech-to-text transcription</string>
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Usage
|
|
35
|
+
|
|
36
|
+
```ts
|
|
37
|
+
import { TalkMode } from "@elizaos/capacitor-talkmode";
|
|
38
|
+
|
|
39
|
+
// Start a voice session with ElevenLabs TTS
|
|
40
|
+
const { started } = await TalkMode.start({
|
|
41
|
+
config: {
|
|
42
|
+
mode: "hands-free",
|
|
43
|
+
tts: {
|
|
44
|
+
apiKey: process.env.ELEVENLABS_API_KEY,
|
|
45
|
+
voiceId: "your-voice-id",
|
|
46
|
+
modelId: "eleven_flash_v2_5",
|
|
47
|
+
interruptOnSpeech: true,
|
|
48
|
+
},
|
|
49
|
+
stt: {
|
|
50
|
+
language: "en",
|
|
51
|
+
sampleRate: 16000,
|
|
52
|
+
},
|
|
53
|
+
silenceWindowMs: 700,
|
|
54
|
+
},
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
// Listen for transcripts
|
|
58
|
+
await TalkMode.addListener("transcript", (event) => {
|
|
59
|
+
console.log(event.transcript, event.isFinal);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
// Listen for state changes
|
|
63
|
+
await TalkMode.addListener("stateChange", (event) => {
|
|
64
|
+
console.log(event.state, event.statusText);
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
// Speak a string directly
|
|
68
|
+
const result = await TalkMode.speak({
|
|
69
|
+
text: "Hello from your Eliza agent.",
|
|
70
|
+
directive: { speed: 1.1, language: "en-US" },
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
// Stop the session
|
|
74
|
+
await TalkMode.stop();
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Actions / Methods
|
|
78
|
+
|
|
79
|
+
| Method | Description |
|
|
80
|
+
|---|---|
|
|
81
|
+
| `start(options?)` | Begin a voice session |
|
|
82
|
+
| `stop()` | End the session and release audio resources |
|
|
83
|
+
| `isEnabled()` | Whether a session is currently active |
|
|
84
|
+
| `getState()` | Current state (`TalkModeState`) and status text |
|
|
85
|
+
| `updateConfig(options)` | Patch configuration mid-session |
|
|
86
|
+
| `speak(options)` | Speak text via TTS; returns completion result |
|
|
87
|
+
| `stopSpeaking()` | Interrupt TTS playback |
|
|
88
|
+
| `isSpeaking()` | Whether TTS is currently playing |
|
|
89
|
+
| `checkPermissions()` | Read microphone + speech recognition permission status |
|
|
90
|
+
| `requestPermissions()` | Prompt for required permissions |
|
|
91
|
+
|
|
92
|
+
## Events
|
|
93
|
+
|
|
94
|
+
| Event | Payload type | When fired |
|
|
95
|
+
|---|---|---|
|
|
96
|
+
| `stateChange` | `TalkModeStateEvent` | State machine transition |
|
|
97
|
+
| `transcript` | `TalkModeTranscriptEvent` | Interim or final STT result |
|
|
98
|
+
| `speaking` | `TTSSpeakingEvent` | TTS utterance starts |
|
|
99
|
+
| `speakComplete` | `TTSCompleteEvent` | TTS utterance finishes or is interrupted |
|
|
100
|
+
| `playbackStart` | `TalkModePlaybackStartEvent` | Native PCM/MP3 playback begins |
|
|
101
|
+
| `error` | `TalkModeErrorEvent` | Recoverable or fatal error |
|
|
102
|
+
|
|
103
|
+
## Configuration reference
|
|
104
|
+
|
|
105
|
+
All config is passed to `start()` or `updateConfig()` — no process env vars are read by this package.
|
|
106
|
+
|
|
107
|
+
| Field | Required | Description |
|
|
108
|
+
|---|---|---|
|
|
109
|
+
| `tts.apiKey` | Native TTS only | ElevenLabs API key |
|
|
110
|
+
| `tts.voiceId` | No | ElevenLabs voice ID |
|
|
111
|
+
| `tts.modelId` | No | ElevenLabs model (default: `eleven_flash_v2_5` on iOS) |
|
|
112
|
+
| `tts.outputFormat` | No | e.g. `"pcm_24000"`, `"mp3_44100"` |
|
|
113
|
+
| `tts.interruptOnSpeech` | No | Cut TTS when mic detects speech |
|
|
114
|
+
| `tts.voiceAliases` | No | Name → voiceId mapping |
|
|
115
|
+
| `stt.engine` | No | `"whisper"` or `"web"` |
|
|
116
|
+
| `stt.modelSize` | No | Whisper model size |
|
|
117
|
+
| `stt.language` | No | BCP-47 language code |
|
|
118
|
+
| `stt.sampleRate` | No | Hz, default 16000 |
|
|
119
|
+
| `silenceWindowMs` | No | Silence gap before finalising transcript |
|
|
120
|
+
| `mode` | No | Session mode (`compose`, `push-to-talk`, `hands-free`, `passive`) |
|
|
121
|
+
| `sessionKey` | No | Chat session key for orchestration |
|
|
122
|
+
|
|
123
|
+
## Platform notes
|
|
124
|
+
|
|
125
|
+
| Platform | STT | TTS |
|
|
126
|
+
|---|---|---|
|
|
127
|
+
| iOS / Android | Native platform API | ElevenLabs streaming + system TTS fallback |
|
|
128
|
+
| Electrobun (desktop) | Native STT | ElevenLabs streaming + system TTS fallback |
|
|
129
|
+
| Browser | Web Speech API | `SpeechSynthesis` only (ElevenLabs blocked by CORS) |
|
|
130
|
+
|
|
131
|
+
## Building
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
bun run --cwd plugins/plugin-native-talkmode build
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Outputs ESM to `dist/esm/`, CJS to `dist/plugin.cjs.js`, and IIFE to `dist/plugin.js`.
|
package/android/build.gradle
CHANGED
|
@@ -6,6 +6,16 @@ ext {
|
|
|
6
6
|
}
|
|
7
7
|
|
|
8
8
|
apply plugin: 'com.android.library'
|
|
9
|
+
// Explicitly apply the Kotlin Android plugin. The kotlin-gradle-plugin is on
|
|
10
|
+
// the root buildscript classpath, but without applying it here AGP 8.13 falls
|
|
11
|
+
// back to its "built-in Kotlin" compile path (build/intermediates/
|
|
12
|
+
// built_in_kotlinc), which compiles the .kt sources but does NOT bundle the
|
|
13
|
+
// resulting .class files into the *release* library jar. The app's
|
|
14
|
+
// :app:assembleRelease then links a library AAR with zero plugin classes, so
|
|
15
|
+
// the Capacitor plugin (and any manifest-declared component) is absent from
|
|
16
|
+
// the release dex. Applying the standard Kotlin plugin wires Kotlin
|
|
17
|
+
// compilation into both the debug and release jar-bundling tasks.
|
|
18
|
+
apply plugin: 'org.jetbrains.kotlin.android'
|
|
9
19
|
android {
|
|
10
20
|
namespace = "ai.eliza.plugins.talkmode"
|
|
11
21
|
compileSdk project.hasProperty('compileSdkVersion') ? rootProject.ext.compileSdkVersion : 35
|
|
@@ -21,8 +31,12 @@ android {
|
|
|
21
31
|
}
|
|
22
32
|
}
|
|
23
33
|
compileOptions {
|
|
24
|
-
sourceCompatibility JavaVersion.
|
|
25
|
-
targetCompatibility JavaVersion.
|
|
34
|
+
sourceCompatibility JavaVersion.VERSION_21
|
|
35
|
+
targetCompatibility JavaVersion.VERSION_21
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
kotlinOptions {
|
|
39
|
+
jvmTarget = "21"
|
|
26
40
|
}
|
|
27
41
|
}
|
|
28
42
|
|