@omote/core 0.10.5 → 0.10.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -34
- package/dist/chunk-3FILA2CD.mjs +785 -0
- package/dist/chunk-3FILA2CD.mjs.map +1 -0
- package/dist/chunk-5WIOGMJA.mjs +785 -0
- package/dist/chunk-5WIOGMJA.mjs.map +1 -0
- package/dist/chunk-NWZMIQK4.mjs +782 -0
- package/dist/chunk-NWZMIQK4.mjs.map +1 -0
- package/dist/chunk-WW4XAUJ3.mjs +208 -0
- package/dist/chunk-WW4XAUJ3.mjs.map +1 -0
- package/dist/index.d.mts +84 -79
- package/dist/index.d.ts +84 -79
- package/dist/index.js +514 -406
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +233 -199
- package/dist/index.mjs.map +1 -1
- package/dist/logging/index.js +5 -0
- package/dist/logging/index.js.map +1 -1
- package/dist/logging/index.mjs +1 -1
- package/dist/otlp-2BML6FIK.mjs +7 -0
- package/dist/otlp-2BML6FIK.mjs.map +1 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
- **SpeechListener** — Mic → VAD → ASR orchestration with adaptive silence detection
|
|
16
16
|
- **createTTSPlayer()** — Factory composing Kokoro TTS + TTSSpeaker for zero-config playback
|
|
17
17
|
- **VoiceOrchestrator** — Full conversational agent loop with local TTS support (cloud or offline)
|
|
18
|
-
- **
|
|
18
|
+
- **configureModelUrls()** — Self-host model files from your own CDN
|
|
19
19
|
- **Animation Graph** — State machine (idle/listening/thinking/speaking) with emotion blending
|
|
20
20
|
- **Emotion Controller** — Preset-based emotion system with smooth transitions
|
|
21
21
|
- **Model Caching** — IndexedDB with versioning, LRU eviction, and quota monitoring
|
|
@@ -81,15 +81,15 @@ const { blendshapes } = await a2e.infer(audioSamples); // Float32Array (16kHz)
|
|
|
81
81
|
// → 52 ARKit blendshape weights
|
|
82
82
|
```
|
|
83
83
|
|
|
84
|
-
####
|
|
84
|
+
#### Custom Configuration
|
|
85
85
|
|
|
86
86
|
```typescript
|
|
87
|
-
import {
|
|
87
|
+
import { createA2E, ARKIT_BLENDSHAPES } from '@omote/core';
|
|
88
88
|
|
|
89
|
-
const
|
|
90
|
-
await
|
|
89
|
+
const a2e = createA2E({ backend: 'wasm' }); // Force WASM for testing
|
|
90
|
+
await a2e.load();
|
|
91
91
|
|
|
92
|
-
const { blendshapes } = await
|
|
92
|
+
const { blendshapes } = await a2e.infer(audioSamples);
|
|
93
93
|
const jawOpen = blendshapes[ARKIT_BLENDSHAPES.indexOf('jawOpen')];
|
|
94
94
|
```
|
|
95
95
|
|
|
@@ -136,11 +136,9 @@ const frame = processor.getFrameForTime(audioContext.currentTime);
|
|
|
136
136
|
SenseVoice ASR — 15x faster than Whisper, with progressive transcription and emotion detection.
|
|
137
137
|
|
|
138
138
|
```typescript
|
|
139
|
-
import {
|
|
139
|
+
import { createSenseVoice } from '@omote/core';
|
|
140
140
|
|
|
141
|
-
const asr =
|
|
142
|
-
modelUrl: '/models/sensevoice/model.int8.onnx',
|
|
143
|
-
});
|
|
141
|
+
const asr = createSenseVoice(); // Auto-detects platform, fetches from HF CDN
|
|
144
142
|
await asr.load();
|
|
145
143
|
|
|
146
144
|
const { text, emotion, language } = await asr.transcribe(audioSamples);
|
|
@@ -149,22 +147,19 @@ const { text, emotion, language } = await asr.transcribe(audioSamples);
|
|
|
149
147
|
#### Platform-Aware ASR
|
|
150
148
|
|
|
151
149
|
```typescript
|
|
152
|
-
import { shouldUseNativeASR, SafariSpeechRecognition,
|
|
150
|
+
import { shouldUseNativeASR, SafariSpeechRecognition, createSenseVoice } from '@omote/core';
|
|
153
151
|
|
|
154
152
|
const asr = shouldUseNativeASR()
|
|
155
153
|
? new SafariSpeechRecognition({ language: 'en-US' })
|
|
156
|
-
:
|
|
154
|
+
: createSenseVoice();
|
|
157
155
|
```
|
|
158
156
|
|
|
159
157
|
### Voice Activity Detection (Silero VAD)
|
|
160
158
|
|
|
161
|
-
#### Factory API (Recommended)
|
|
162
|
-
|
|
163
159
|
```typescript
|
|
164
160
|
import { createSileroVAD } from '@omote/core';
|
|
165
161
|
|
|
166
162
|
const vad = createSileroVAD({
|
|
167
|
-
modelUrl: '/models/silero-vad.onnx',
|
|
168
163
|
threshold: 0.5,
|
|
169
164
|
// useWorker: true // Force off-main-thread
|
|
170
165
|
// useWorker: false // Force main thread
|
|
@@ -174,18 +169,6 @@ await vad.load();
|
|
|
174
169
|
const { isSpeech, probability } = await vad.process(audioSamples);
|
|
175
170
|
```
|
|
176
171
|
|
|
177
|
-
#### Direct API
|
|
178
|
-
|
|
179
|
-
```typescript
|
|
180
|
-
import { SileroVADInference, SileroVADWorker } from '@omote/core';
|
|
181
|
-
|
|
182
|
-
// Main thread (mobile-friendly)
|
|
183
|
-
const vad = new SileroVADInference({ modelUrl: '/models/silero-vad.onnx' });
|
|
184
|
-
|
|
185
|
-
// Web Worker (desktop, off-main-thread)
|
|
186
|
-
const vadWorker = new SileroVADWorker({ modelUrl: '/models/silero-vad.onnx' });
|
|
187
|
-
```
|
|
188
|
-
|
|
189
172
|
### Animation Graph
|
|
190
173
|
|
|
191
174
|
State machine for avatar animation states with emotion blending and audio energy.
|
|
@@ -248,7 +231,7 @@ const data = await fetchWithCache('/models/model.onnx', {
|
|
|
248
231
|
});
|
|
249
232
|
|
|
250
233
|
// Cache quota monitoring
|
|
251
|
-
import { configureCacheLimit
|
|
234
|
+
import { configureCacheLimit } from '@omote/core';
|
|
252
235
|
|
|
253
236
|
configureCacheLimit({
|
|
254
237
|
maxSizeBytes: 500 * 1024 * 1024, // 500MB limit
|
|
@@ -307,17 +290,76 @@ const span = telemetry.startSpan('custom-operation');
|
|
|
307
290
|
span.end();
|
|
308
291
|
```
|
|
309
292
|
|
|
310
|
-
|
|
293
|
+
### Text-to-Speech (Kokoro TTS)
|
|
294
|
+
|
|
295
|
+
```typescript
|
|
296
|
+
import { createKokoroTTS } from '@omote/core';
|
|
311
297
|
|
|
312
|
-
|
|
298
|
+
const tts = createKokoroTTS({ defaultVoice: 'af_heart' });
|
|
299
|
+
await tts.load();
|
|
313
300
|
|
|
301
|
+
const audio = await tts.synthesize('Hello world!');
|
|
302
|
+
// audio: Float32Array @ 24kHz
|
|
314
303
|
```
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
304
|
+
|
|
305
|
+
Kokoro auto-detects the platform: mixed-fp16 WebGPU model (156MB) on Chrome/Edge, q8 WASM model (92MB) on Safari/iOS/Firefox.
|
|
306
|
+
|
|
307
|
+
### Eager Load & Warmup
|
|
308
|
+
|
|
309
|
+
Use `eagerLoad` to preload models at construction time:
|
|
310
|
+
|
|
311
|
+
```typescript
|
|
312
|
+
const tts = createKokoroTTS({ eagerLoad: true }); // Starts loading immediately
|
|
319
313
|
```
|
|
320
314
|
|
|
315
|
+
Use `warmup()` to prime AudioContext for iOS/Safari autoplay policy. Call from a user gesture handler:
|
|
316
|
+
|
|
317
|
+
```typescript
|
|
318
|
+
button.onclick = async () => {
|
|
319
|
+
await avatar.warmup(); // Primes AudioContext
|
|
320
|
+
await avatar.connectVoice({ ... });
|
|
321
|
+
};
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
### Observability
|
|
325
|
+
|
|
326
|
+
The SDK includes built-in OpenTelemetry-compatible tracing and metrics:
|
|
327
|
+
|
|
328
|
+
```typescript
|
|
329
|
+
import { configureTelemetry, getTelemetry, MetricNames } from '@omote/core';
|
|
330
|
+
|
|
331
|
+
configureTelemetry({
|
|
332
|
+
enabled: true,
|
|
333
|
+
serviceName: 'my-app',
|
|
334
|
+
exporter: 'console', // or OTLPExporter for production
|
|
335
|
+
});
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
All inference calls, model loads, cache operations, and voice turns are automatically instrumented.
|
|
339
|
+
|
|
340
|
+
## Models
|
|
341
|
+
|
|
342
|
+
All models default to the HuggingFace CDN and are auto-downloaded on first use. Self-host with `configureModelUrls()`:
|
|
343
|
+
|
|
344
|
+
```typescript
|
|
345
|
+
import { configureModelUrls } from '@omote/core';
|
|
346
|
+
|
|
347
|
+
configureModelUrls({
|
|
348
|
+
lam: 'https://your-cdn.com/models/lam.onnx',
|
|
349
|
+
lamData: 'https://your-cdn.com/models/lam.onnx.data',
|
|
350
|
+
senseVoice: 'https://your-cdn.com/models/sensevoice.onnx',
|
|
351
|
+
sileroVad: 'https://your-cdn.com/models/silero_vad.onnx',
|
|
352
|
+
});
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
| Model | HuggingFace Repo | Size |
|
|
356
|
+
|-------|-------------------|------|
|
|
357
|
+
| LAM A2E | `omote-ai/lam-a2e` | `lam.onnx` (230KB) + `lam.onnx.data` (192MB) |
|
|
358
|
+
| SenseVoice | `omote-ai/sensevoice-asr` | 228MB |
|
|
359
|
+
| Silero VAD | `deepghs/silero-vad-onnx` | ~2MB |
|
|
360
|
+
| Kokoro TTS (WASM) | `onnx-community/Kokoro-82M-v1.0-ONNX` | 92MB q8 |
|
|
361
|
+
| Kokoro TTS (WebGPU) | `omote-ai/kokoro-tts` | 156MB mixed-fp16 |
|
|
362
|
+
|
|
321
363
|
## Browser Compatibility
|
|
322
364
|
|
|
323
365
|
WebGPU-first with automatic WASM fallback.
|