@omote/avatar 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +132 -0
- package/package.json +1 -1
package/README.md
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# @omote/avatar
|
|
2
|
+
|
|
3
|
+
> Renderer-agnostic voice composition for the [Omote AI Character SDK](https://github.com/omoteai/muse).
|
|
4
|
+
|
|
5
|
+
`OmoteAvatarCore` holds all voice state (TTSSpeaker, SpeechListener, VoiceOrchestrator) and exposes the imperative voice API. Renderer adapters (`@omote/three`, `@omote/babylon`, `@omote/r3f`) instantiate this class and delegate voice methods, keeping only renderer-specific code (animation, blendshape writes).
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install @omote/avatar @omote/core
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```typescript
|
|
16
|
+
import { OmoteAvatarCore } from '@omote/avatar';
|
|
17
|
+
import { createKokoroTTS } from '@omote/core';
|
|
18
|
+
|
|
19
|
+
const core = new OmoteAvatarCore();
|
|
20
|
+
|
|
21
|
+
// Set frame handler (adapter writes these to its renderer)
|
|
22
|
+
core.onFrame = (frame) => {
|
|
23
|
+
applyBlendshapes(frame.blendshapes); // 52 ARKit weights
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
// Set state handler
|
|
27
|
+
core.onStateChange = (state) => {
|
|
28
|
+
console.log('State:', state); // 'idle' | 'listening' | 'thinking' | 'speaking'
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
// Connect full voice pipeline
|
|
32
|
+
await core.connectVoice({
|
|
33
|
+
mode: 'local',
|
|
34
|
+
tts: createKokoroTTS(),
|
|
35
|
+
onTranscript: async (text) => {
|
|
36
|
+
const res = await fetch('/api/chat', { method: 'POST', body: text });
|
|
37
|
+
return await res.text();
|
|
38
|
+
},
|
|
39
|
+
});
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## API
|
|
43
|
+
|
|
44
|
+
### `OmoteAvatarCore`
|
|
45
|
+
|
|
46
|
+
#### Voice Pipeline
|
|
47
|
+
|
|
48
|
+
| Method | Description |
|
|
49
|
+
|--------|-------------|
|
|
50
|
+
| `connectVoice(config)` | Connect full voice pipeline (speaker + listener + interruption). Accepts `VoiceOrchestratorConfig`. |
|
|
51
|
+
| `disconnectVoice()` | Disconnect and dispose the voice orchestrator. |
|
|
52
|
+
| `connectSpeaker(tts, config?)` | Connect a TTS backend for speech output and lip sync. |
|
|
53
|
+
| `disconnectSpeaker()` | Disconnect and dispose the TTS speaker. |
|
|
54
|
+
| `connectListener(config?)` | Connect speech listener (mic + VAD + ASR). |
|
|
55
|
+
| `disconnectListener()` | Disconnect and dispose the speech listener. |
|
|
56
|
+
|
|
57
|
+
#### Speech
|
|
58
|
+
|
|
59
|
+
| Method | Description |
|
|
60
|
+
|--------|-------------|
|
|
61
|
+
| `speak(text, options?)` | Speak text with lip sync. Returns a Promise that resolves when playback completes. |
|
|
62
|
+
| `streamText(options?)` | Start streaming TTS. Returns a `StreamTextSink` for token-by-token input. |
|
|
63
|
+
| `stopSpeaking()` | Abort current speech playback. |
|
|
64
|
+
| `warmup()` | Warm up AudioContext for iOS/Safari autoplay policy. Call from a user gesture. |
|
|
65
|
+
|
|
66
|
+
#### Listening
|
|
67
|
+
|
|
68
|
+
| Method | Description |
|
|
69
|
+
|--------|-------------|
|
|
70
|
+
| `startListening()` | Start mic capture and speech recognition. |
|
|
71
|
+
| `stopListening()` | Stop mic capture. |
|
|
72
|
+
|
|
73
|
+
#### Frame Source
|
|
74
|
+
|
|
75
|
+
| Method | Description |
|
|
76
|
+
|--------|-------------|
|
|
77
|
+
| `connectFrameSource(source)` | Wire any `FrameSource` (PlaybackPipeline, MicLipSync, etc.). |
|
|
78
|
+
| `disconnectFrameSource()` | Disconnect the current frame source. |
|
|
79
|
+
|
|
80
|
+
#### State
|
|
81
|
+
|
|
82
|
+
| Property/Method | Description |
|
|
83
|
+
|--------|-------------|
|
|
84
|
+
| `isSpeaking` | `boolean` — whether TTS is currently playing. |
|
|
85
|
+
| `state` | Current `ConversationalState` (`'idle'`, `'listening'`, `'thinking'`, `'speaking'`). |
|
|
86
|
+
| `speaker` | The active `TTSSpeaker` instance, or `null`. |
|
|
87
|
+
| `listener` | The active `SpeechListener` instance, or `null`. |
|
|
88
|
+
| `setState(state)` | Manually set conversational state. |
|
|
89
|
+
| `reset()` | Reset all state to idle. |
|
|
90
|
+
| `dispose()` | Clean up all resources. |
|
|
91
|
+
|
|
92
|
+
#### Event Subscriptions
|
|
93
|
+
|
|
94
|
+
| Method | Description |
|
|
95
|
+
|--------|-------------|
|
|
96
|
+
| `onTranscript(cb)` | Subscribe to transcript results. Returns unsubscribe function. |
|
|
97
|
+
| `onVoiceStateChange(cb)` | Subscribe to conversational state changes. |
|
|
98
|
+
| `onLoadingProgress(cb)` | Subscribe to model loading progress events. |
|
|
99
|
+
| `onError(cb)` | Subscribe to error events. |
|
|
100
|
+
| `onAudioLevel(cb)` | Subscribe to audio level events (`{ rms, peak }`). |
|
|
101
|
+
|
|
102
|
+
### Types
|
|
103
|
+
|
|
104
|
+
#### `SpeakOptions`
|
|
105
|
+
|
|
106
|
+
```typescript
|
|
107
|
+
interface SpeakOptions {
|
|
108
|
+
signal?: AbortSignal;
|
|
109
|
+
voice?: string;
|
|
110
|
+
speed?: number;
|
|
111
|
+
language?: string;
|
|
112
|
+
}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
#### `StreamTextSink`
|
|
116
|
+
|
|
117
|
+
```typescript
|
|
118
|
+
interface StreamTextSink {
|
|
119
|
+
push: (token: string) => void;
|
|
120
|
+
end: () => Promise<void>;
|
|
121
|
+
}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
#### `FrameHandler`
|
|
125
|
+
|
|
126
|
+
```typescript
|
|
127
|
+
type FrameHandler = (frame: { blendshapes: Float32Array; emotion?: string }) => void;
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## License
|
|
131
|
+
|
|
132
|
+
MIT
|