@extentos/mcp-server 0.0.90 → 0.0.91
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/tools/data/capabilityPatterns.d.ts.map +1 -1
- package/dist/tools/data/capabilityPatterns.js +220 -2
- package/dist/tools/data/capabilityPatterns.js.map +1 -1
- package/dist/tools/data/codeExamples.d.ts.map +1 -1
- package/dist/tools/data/codeExamples.js +232 -7
- package/dist/tools/data/codeExamples.js.map +1 -1
- package/dist/tools/data/version.d.ts.map +1 -1
- package/dist/tools/data/version.js +9 -7
- package/dist/tools/data/version.js.map +1 -1
- package/dist/tools/definitions.d.ts.map +1 -1
- package/dist/tools/definitions.js +8 -7
- package/dist/tools/definitions.js.map +1 -1
- package/dist/tools/docs/index.d.ts.map +1 -1
- package/dist/tools/docs/index.js +56 -5
- package/dist/tools/docs/index.js.map +1 -1
- package/dist/tools/handlers/getCredentialGuide.d.ts.map +1 -1
- package/dist/tools/handlers/getCredentialGuide.js +3 -1
- package/dist/tools/handlers/getCredentialGuide.js.map +1 -1
- package/dist/tools/handlers/getProductionChecklist.d.ts.map +1 -1
- package/dist/tools/handlers/getProductionChecklist.js +53 -3
- package/dist/tools/handlers/getProductionChecklist.js.map +1 -1
- package/package.json +1 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"capabilityPatterns.d.ts","sourceRoot":"","sources":["../../../src/tools/data/capabilityPatterns.ts"],"names":[],"mappings":"AAUA,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,MAAM,EAAE,CAAC;CAC3B;
|
|
1
|
+
{"version":3,"file":"capabilityPatterns.d.ts","sourceRoot":"","sources":["../../../src/tools/data/capabilityPatterns.ts"],"names":[],"mappings":"AAUA,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,MAAM,EAAE,CAAC;CAC3B;AAuyBD,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAiB7D,CAAC;AAEF,eAAO,MAAM,yBAAyB,UAAwC,CAAC"}
|
|
@@ -159,7 +159,7 @@ glasses.audio.transcriptions(
|
|
|
159
159
|
};
|
|
160
160
|
const SPEAK_AND_CANCEL = {
|
|
161
161
|
feature: "speak",
|
|
162
|
-
summary: "TTS via platform engine (Android TextToSpeech / iOS AVSpeechSynthesizer). Audio bytes route over HFP to the glasses speaker. Use cancelSpeak() to interrupt for barge-in.",
|
|
162
|
+
summary: "Legacy `glasses.audio.speak(text)` — TTS via platform engine (Android TextToSpeech / iOS AVSpeechSynthesizer). Audio bytes route over HFP to the glasses speaker. Use cancelSpeak() to interrupt for barge-in. **For Phase 3 apps**, prefer `glasses.conversation`'s `speak(text)` inside an onWake/start handler block (see `getCapabilityGuide(feature: 'conversation_speak')`) — composes with the runtime's listen/cancelSpeak/AI pipeline and routes through the cloud TTS provider (OpenAI by default) rather than the platform engine. This `audio.speak` surface stays canonical for apps that want platform-TTS (no cloud dependency, no BYOK key) or for one-shot announcements outside any conversation handler.",
|
|
163
163
|
kotlin: `// Speak a response:
|
|
164
164
|
glasses.audio.speak("Got it.") // blocks until done by default
|
|
165
165
|
|
|
@@ -340,7 +340,7 @@ for await toggles in glasses.toggles.state.stream {
|
|
|
340
340
|
};
|
|
341
341
|
const VOICE_COMMAND = {
|
|
342
342
|
feature: "voice_command",
|
|
343
|
-
summary: "
|
|
343
|
+
summary: "Legacy `glasses.voice.onPhrase(phrase) { handler }` — wake-phrase registration with auto-display on the connection page AND the simulator's click-to-fire voice rail. Sugar over glasses.audio.transcriptions() with structured-concurrency cancellation when a `stops` phrase fires. **For Phase 3 apps**, prefer `glasses.conversation.onWake(phrase) { listen(); ... }` (see `getCapabilityGuide(feature: 'conversation_on_wake')`) — composes the same wake matcher with the conversation runtime's listen/speak/ai.complete pipeline in one handler block. The voice client's hint surface (connection page + sim chips) still works because onWake internally calls voice.onPhrase under the hood. This `voice_command` surface stays canonical for non-conversation flows (e.g. wake → photo capture, wake → toggle UI state, wake → a custom non-listen action where the handler doesn't need `listen()`/`speak()`/`ai.complete()`).",
|
|
344
344
|
kotlin: `import com.extentos.glasses.core.ExtentosGlasses
|
|
345
345
|
import com.extentos.glasses.core.VoiceRegistration
|
|
346
346
|
import kotlinx.coroutines.delay
|
|
@@ -569,6 +569,218 @@ let duration = clip.durationMs
|
|
|
569
569
|
],
|
|
570
570
|
relatedExamples: ["voice_notes"],
|
|
571
571
|
};
|
|
572
|
+
// ── Phase 3 conversation runtime primitives ────────────────────────────
|
|
573
|
+
//
|
|
574
|
+
// Five entries covering the new `glasses.conversation` + `glasses.ai`
|
|
575
|
+
// API surface. Sit alongside the legacy primitives (voice_command,
|
|
576
|
+
// speak, record_audio, transcription_incremental) — the legacy
|
|
577
|
+
// primitives keep working, these are the new composed path.
|
|
578
|
+
// `getCodeExample(pattern: "conversation_agent_loop")` is the full
|
|
579
|
+
// composition; these per-feature entries cover individual snippet shape
|
|
580
|
+
// + gotchas the developer hits per-call.
|
|
581
|
+
const CONVERSATION_ON_WAKE = {
|
|
582
|
+
feature: "conversation_on_wake",
|
|
583
|
+
summary: "Phase 3 — register a conversation handler keyed to a wake phrase. Composes `glasses.voice.onPhrase` (wake matching) + `glasses.conversation.start` (handler dispatch with the runtime's listen()/speak()/ai context). The handler block runs under structured concurrency; `defer` / `finally` runs on cancel.",
|
|
584
|
+
kotlin: `// Inside an init / start function on your handler class:
|
|
585
|
+
glasses.conversation?.onWake("hey assistant") {
|
|
586
|
+
// 'this' is a ConversationScope receiver — listen(), speak(),
|
|
587
|
+
// cancelSpeak() are available directly. glasses.ai stays
|
|
588
|
+
// top-level (developer owns conversation context).
|
|
589
|
+
speak("Hi! What can I do?")
|
|
590
|
+
while (true) {
|
|
591
|
+
val turn = listen()
|
|
592
|
+
if (turn !is Turn.Said) break
|
|
593
|
+
if (turn.text.contains("stop", ignoreCase = true)) {
|
|
594
|
+
speak("Goodbye.")
|
|
595
|
+
break
|
|
596
|
+
}
|
|
597
|
+
val reply = glasses.ai?.complete("User asked: \${turn.text}. Reply briefly.")
|
|
598
|
+
?: "I couldn't reach the model right now."
|
|
599
|
+
speak(reply)
|
|
600
|
+
}
|
|
601
|
+
}`,
|
|
602
|
+
swift: `// Inside an init / start function on your handler:
|
|
603
|
+
_ = glasses.conversation?.onWake(phrase: "hey assistant") { ctx in
|
|
604
|
+
// 'ctx' is a ConversationHandler — listen, speak, cancelSpeak.
|
|
605
|
+
// glasses.ai stays top-level.
|
|
606
|
+
try? await ctx.speak("Hi! What can I do?")
|
|
607
|
+
while !Task.isCancelled {
|
|
608
|
+
let turn = await ctx.listen()
|
|
609
|
+
guard case .said(let text) = turn else { return }
|
|
610
|
+
if text.lowercased().contains("stop") {
|
|
611
|
+
try? await ctx.speak("Goodbye.")
|
|
612
|
+
return
|
|
613
|
+
}
|
|
614
|
+
let reply = (try? await glasses.ai?.complete("User asked: \\(text). Reply briefly."))
|
|
615
|
+
?? "I couldn't reach the model right now."
|
|
616
|
+
try? await ctx.speak(reply)
|
|
617
|
+
}
|
|
618
|
+
}`,
|
|
619
|
+
gotchas: [
|
|
620
|
+
"**`glasses.conversation` is null when conversationOptions wasn't passed at ExtentosConfig construction.** Phase 3 is opt-in to save the ~10 MB ONNX model load cost for voice-only apps. Use `glasses.conversation?.onWake(...)` defensively; check `glasses.runtime.events` for a `conversation.init_failed` Log event if the surface is unexpectedly null (missing model files → init fails warn-logged).",
|
|
621
|
+
"**Wake phrase composes with the voice client.** `onWake(\"hey assistant\")` internally calls `voice.onPhrase(\"hey assistant\") { core.startHandler(id) }`. The voice client's hint surface (connection page + simulator click-to-fire) still shows the phrase. Stops-cancellation works the same way as legacy onPhrase.",
|
|
622
|
+
"**`start { }` is the primitive when you want a custom trigger.** No wake phrase — register a handler that the developer fires manually (button, schedule, custom voice phrase, external signal). Returns the same ConversationRegistration shape; cancel() removes the registration without killing an in-flight handler.",
|
|
623
|
+
"**Singleton-active in v1.** Only one conversation handler runs at a time per core. Concurrent triggers get rejected (`startHandler` returns false). Future versions may relax this; for now, write handlers that complete promptly or use cancellation to free the slot.",
|
|
624
|
+
],
|
|
625
|
+
relatedExamples: ["conversation_agent_loop"],
|
|
626
|
+
};
|
|
627
|
+
const CONVERSATION_LISTEN = {
|
|
628
|
+
feature: "conversation_listen",
|
|
629
|
+
summary: "Phase 3 — suspends the handler until the next user `Turn` resolves. Drives the Rust core's VAD-gated capture loop: VAD → 8→16 kHz upsample → STT chunk-batch + Smart Turn EOU → coalescer → `Turn::Said`. Real audio: live mic via the audio_chunk pipe. Agent-driven E2E: `injectTranscript(text)` resolves it directly (H1 wiring).",
|
|
630
|
+
kotlin: `// Inside an onWake / start handler block (ConversationScope receiver):
|
|
631
|
+
val turn = listen()
|
|
632
|
+
when (turn) {
|
|
633
|
+
is Turn.Said -> { /* turn.text */ }
|
|
634
|
+
is Turn.HoldRequested -> { /* developer-signaled hold; uncommon v1 */ }
|
|
635
|
+
is Turn.Interrupted -> { /* barge-in cancelled a concurrent speak() */ }
|
|
636
|
+
is Turn.Error -> when (turn.kind) {
|
|
637
|
+
is TurnError.SttTimeout -> { /* STT didn't respond in time */ }
|
|
638
|
+
is TurnError.SttNetwork -> { /* connectivity */ }
|
|
639
|
+
is TurnError.SttRateLimited -> { /* back off */ }
|
|
640
|
+
is TurnError.SttUnauthorized -> { /* fix the BYOK key */ }
|
|
641
|
+
is TurnError.Internal -> { /* turn.kind.message has diagnostics */ }
|
|
642
|
+
else -> { /* future variants */ }
|
|
643
|
+
}
|
|
644
|
+
}`,
|
|
645
|
+
swift: `// Inside an onWake / start handler closure:
|
|
646
|
+
let turn = await ctx.listen()
|
|
647
|
+
switch turn {
|
|
648
|
+
case .said(let text): break // use 'text'
|
|
649
|
+
case .holdRequested: break // developer-signaled hold
|
|
650
|
+
case .interrupted: break // barge-in cancelled concurrent speak()
|
|
651
|
+
case .error(let kind):
|
|
652
|
+
switch kind {
|
|
653
|
+
case .sttTimeout, .sttNetwork, .sttRateLimited, .sttUnauthorized: break
|
|
654
|
+
case .internal(let msg): _ = msg
|
|
655
|
+
default: break // future variants
|
|
656
|
+
}
|
|
657
|
+
}`,
|
|
658
|
+
gotchas: [
|
|
659
|
+
"**`Turn` is a sealed type — always cover the four variants.** Compiler-enforced exhaustiveness on Kotlin / Swift catches missing handling at build time. `Turn.Said` carries the coalesced final text; the others signal control-flow events (hold / interrupted / error).",
|
|
660
|
+
"**`TurnError.Internal { message }` is diagnostics-only, not user-facing.** Show the user something like \"Network trouble — try again\" and log the message for the developer. Surfacing the raw message risks leaking implementation details.",
|
|
661
|
+
"**No real audio flowing → silence fallback fires after 600 ms.** The runtime's two-path turn-finalization: ML signal (Smart Turn sigmoid > 0.5) OR `coalesceTrailingSilenceMs` timeout. If your test setup pipes no audio and no inject, listen() will eventually resolve `Turn::Error{kind: SttTimeout}` after STT_RESPONSE_TIMEOUT (5 s default).",
|
|
662
|
+
"**Agent-driven E2E: wait for `conversation.turn_started` in the event log before injecting the follow-up text** (H1 bridge). injectTranscript only resolves a parked listen(); injecting before the handler is parked drops the text silently (no FSM state poisoning, just no effect).",
|
|
663
|
+
],
|
|
664
|
+
relatedExamples: ["conversation_agent_loop"],
|
|
665
|
+
};
|
|
666
|
+
const CONVERSATION_SPEAK = {
|
|
667
|
+
feature: "conversation_speak",
|
|
668
|
+
summary: "Phase 3 — synthesize TTS for `text` and stream PCM chunks through the registered audio-out sink (sim browser via Web Audio, real-hardware playback path on device). Throws `SpeakException` subclass on failure / cancellation. `cancelSpeak()` aborts in-flight speak by drop-cancelling the TtsStream (reqwest connection closes → no more bytes).",
|
|
669
|
+
kotlin: `// Inside an onWake / start handler block:
|
|
670
|
+
try {
|
|
671
|
+
speak("Hello! How can I help?")
|
|
672
|
+
// Returns only after all chunks have been pushed to the sink.
|
|
673
|
+
} catch (e: SpeakException.Cancelled) {
|
|
674
|
+
// cancelSpeak() fired (manual barge-in trigger in v1)
|
|
675
|
+
} catch (e: SpeakException.Network) {
|
|
676
|
+
// TTS provider unreachable
|
|
677
|
+
} catch (e: SpeakException.Unauthorized) {
|
|
678
|
+
// OpenAI rejected the key (BYOK misconfigured)
|
|
679
|
+
} catch (e: SpeakException.RateLimited) {
|
|
680
|
+
// Back off
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
// Cancel an in-flight speak from another coroutine / handler tick:
|
|
684
|
+
cancelSpeak() // no-op when nothing in flight`,
|
|
685
|
+
swift: `// Inside an onWake / start handler closure:
|
|
686
|
+
do {
|
|
687
|
+
try await ctx.speak("Hello! How can I help?")
|
|
688
|
+
} catch SpeakError.cancelled {
|
|
689
|
+
// cancelSpeak() fired (manual barge-in trigger in v1)
|
|
690
|
+
} catch SpeakError.network {
|
|
691
|
+
// TTS provider unreachable
|
|
692
|
+
} catch SpeakError.unauthorized {
|
|
693
|
+
// OpenAI rejected the key
|
|
694
|
+
} catch {
|
|
695
|
+
// Other variants (timeout, rateLimited, internal)
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
// Cancel from elsewhere:
|
|
699
|
+
ctx.cancelSpeak() // no-op when nothing in flight`,
|
|
700
|
+
gotchas: [
|
|
701
|
+
"**v1 barge-in is the MECHANISM (cancelSpeak), not the TRIGGER.** The runtime ships the cancel path; the trigger (detect user voice during speak → auto-cancel) is shell-side in v1. v1.1 adds an automatic VAD watchdog when `bargeIn: true` is set in the manifest (forward-compatible — the config knob is already there).",
|
|
702
|
+
"**Cancellation latency = at most one chunk read time.** The speak loop checks the cancel flag between TTS chunks (~tens of ms for OpenAI streaming). For sub-perception interruption, v1.1 may add tighter cancellation; v1's is acceptable for typical interactions.",
|
|
703
|
+
"**No SpeakAudioSink registered = chunks are discarded silently.** speak() still resolves Ok — useful for tests / warmup. In production, the host-app library (DefaultConversationClient on Android, iOS sibling) registers a sink that forwards to the transport's outgoing-audio path (BrowserSim → hub binary relay → browser Web Audio; RealMeta HFP playback in v1.1).",
|
|
704
|
+
"**OpenAI TTS emits 24 kHz mono i16 LE PCM by default.** That's the sample rate that arrives at the SpeakAudioSink and gets forwarded to the browser's Web Audio. Don't try to resample mid-flight; the browser AudioContext handles rate matching.",
|
|
705
|
+
],
|
|
706
|
+
relatedExamples: ["conversation_agent_loop"],
|
|
707
|
+
};
|
|
708
|
+
const CONVERSATION_AI_COMPLETE = {
|
|
709
|
+
feature: "conversation_ai_complete",
|
|
710
|
+
summary: "Phase 3 — BYOK LLM completion via `glasses.ai.complete(prompt)`. Single-turn (developer owns conversation history; pass accumulated context in the prompt each call). Key flows DIRECT to api.openai.com (no Extentos proxy — synthesis §6 / §8). Throws `AiException` subclass on failure. `ai_stream` deferred core-side; v1 ships `complete()` only.",
|
|
711
|
+
kotlin: `// Set the OpenAI BYOK key once at app init (typically from
|
|
712
|
+
// BuildConfig or a secure store — see getCredentialGuide(service: "openai")):
|
|
713
|
+
glasses.ai?.setOpenaiApiKey(BuildConfig.OPENAI_API_KEY)
|
|
714
|
+
|
|
715
|
+
// Per-call: prompt is plain text. Developer manages multi-turn
|
|
716
|
+
// context by concatenating history into the prompt:
|
|
717
|
+
val reply = try {
|
|
718
|
+
glasses.ai?.complete("User asked: \${question}. Reply briefly.")
|
|
719
|
+
?: "I couldn't reach the model right now."
|
|
720
|
+
} catch (e: AiException.KeyNotSet) {
|
|
721
|
+
"I need an OpenAI key — please configure it in settings."
|
|
722
|
+
} catch (e: AiException.Network) {
|
|
723
|
+
"Network trouble — try again later."
|
|
724
|
+
} catch (e: AiException.Unauthorized) {
|
|
725
|
+
"The OpenAI key was rejected — check your key."
|
|
726
|
+
} catch (e: AiException.RateLimited) {
|
|
727
|
+
"Too many requests — try again in a moment."
|
|
728
|
+
}`,
|
|
729
|
+
swift: `// Set the OpenAI BYOK key once at app init:
|
|
730
|
+
glasses.ai?.setOpenAiApiKey(Secrets.openAiKey)
|
|
731
|
+
|
|
732
|
+
// Per-call:
|
|
733
|
+
let reply: String
|
|
734
|
+
do {
|
|
735
|
+
reply = try await glasses.ai?.complete(
|
|
736
|
+
"User asked: \\(question). Reply briefly.") ?? ""
|
|
737
|
+
} catch AiError.keyNotSet {
|
|
738
|
+
reply = "I need an OpenAI key — please configure it in settings."
|
|
739
|
+
} catch AiError.network {
|
|
740
|
+
reply = "Network trouble — try again later."
|
|
741
|
+
} catch AiError.unauthorized {
|
|
742
|
+
reply = "The OpenAI key was rejected — check your key."
|
|
743
|
+
} catch {
|
|
744
|
+
reply = "Couldn't reach the model right now."
|
|
745
|
+
}`,
|
|
746
|
+
gotchas: [
|
|
747
|
+
"**`glasses.ai` is null on cores constructed without conversationOptions.** Same opt-in story as `glasses.conversation`. Use `?.` defensively; `AiException.KeyNotSet` only fires after `setOpenaiApiKey` was actually called AND the key was then cleared OR was never set — null `glasses.ai` is a different failure mode.",
|
|
748
|
+
"**The key flows direct to OpenAI; Extentos never sees it.** Per synthesis §6 + plan §4.3 BYOK contract. The Extentos backend doesn't proxy LLM calls (it DOES proxy STT + TTS for `glasses.conversation`'s `listen()` + `speak()`; the LLM path is the BYOK exception).",
|
|
749
|
+
"**v1 single-turn only.** No conversation history maintained by the runtime. Multi-turn = the developer accumulates prior turns into the prompt themselves. `ai_stream` (streaming chunks) is deferred core-side; if you need streaming for long answers, you're back to the legacy `BYOK_ANTHROPIC` pattern with manual HTTP wiring until v1.1 lands.",
|
|
750
|
+
"**Model selection comes from the manifest's `conversation.model` field** (default `openai/gpt-4o-mini`). The `vendor/` prefix is stripped before the OpenAI request; vendor prefix is reserved for future Track-B AI Gateway routing.",
|
|
751
|
+
"**Not yet auto-tracked in the 'ai' event-log chip.** The legacy `glasses.observability.aiCall(label) { ... }` wrapper emits ai_call_start / ai_call_end frames; `glasses.ai.complete` doesn't auto-emit these. If you want call timing in `getEventLog(filter: \"ai\")`, wrap the complete() call: `glasses.observability.aiCall(\"chat\") { glasses.ai?.complete(prompt) }` — works because aiCall is provider-agnostic.",
|
|
752
|
+
],
|
|
753
|
+
relatedExamples: ["conversation_agent_loop", "byok_anthropic"],
|
|
754
|
+
};
|
|
755
|
+
const CONVERSATION_RUNTIME = {
|
|
756
|
+
feature: "conversation_runtime",
|
|
757
|
+
summary: "Phase 3 — umbrella entry for the `glasses.conversation` + `glasses.ai` API. Composes VAD + STT + Smart Turn EOU + TTS + BYOK OpenAI LLM in a single handler block. See per-feature entries (conversation_on_wake, conversation_listen, conversation_speak, conversation_ai_complete) for call-shape detail. Full composition example: `getCodeExample(pattern: \"conversation_agent_loop\")`. Conceptual guide: `searchDocs(topic: \"conversation_runtime\")`.",
|
|
758
|
+
kotlin: `// The whole runtime in one handler block:
|
|
759
|
+
glasses.conversation?.onWake("hey assistant") {
|
|
760
|
+
speak("How can I help?")
|
|
761
|
+
val turn = listen()
|
|
762
|
+
if (turn is Turn.Said) {
|
|
763
|
+
val reply = glasses.ai?.complete("User said: \${turn.text}") ?: "(no AI)"
|
|
764
|
+
speak(reply)
|
|
765
|
+
}
|
|
766
|
+
}`,
|
|
767
|
+
swift: `// The whole runtime in one handler closure:
|
|
768
|
+
_ = glasses.conversation?.onWake(phrase: "hey assistant") { ctx in
|
|
769
|
+
try? await ctx.speak("How can I help?")
|
|
770
|
+
let turn = await ctx.listen()
|
|
771
|
+
if case .said(let text) = turn {
|
|
772
|
+
let reply = (try? await glasses.ai?.complete("User said: \\(text)")) ?? "(no AI)"
|
|
773
|
+
try? await ctx.speak(reply)
|
|
774
|
+
}
|
|
775
|
+
}`,
|
|
776
|
+
gotchas: [
|
|
777
|
+
"**Opt-in via `ExtentosConfig.conversationOptions`.** Pass `ConversationCoreOptions(config:, sileroVadModelPath:, smartTurnModelPath:, backendBaseUrl:, aiEndpoint:)` at create. Missing options → both `glasses.conversation` and `glasses.ai` are null; voice-only apps don't pay the ~10 MB model load. The required Silero VAD + Smart Turn v3 ONNX models ship as Android assets / iOS resources.",
|
|
778
|
+
"**Five A10 lifecycle events emitted automatically:** handler_started / handler_finished / turn_started / turn_ended / barge_in / error. Visible via `glasses.runtime.events.filterIsInstance<RuntimeEvent.Conversation>()`, the simulator's right-rail ConversationPanel (H2), and `getEventLog(filter: \"voice\")` (conversation.error climbs to filter: \"errors\" automatically).",
|
|
779
|
+
"**PII boundary at emission.** Event payloads carry `textLen` (char count) only — never the transcript text itself. Rust core enforces; bridges + persist + agent surfaces carry the boundary forward. Don't try to add a backdoor; v1 deliberately doesn't ship one.",
|
|
780
|
+
"**Compare with the legacy path before committing.** `searchDocs(topic: \"conversation_runtime\")` has a comparison table positioning Phase 3 vs. legacy by use case (custom STT, non-OpenAI LLM, hand-rolled turn-finalization → still use legacy primitives).",
|
|
781
|
+
],
|
|
782
|
+
relatedExamples: ["conversation_agent_loop"],
|
|
783
|
+
};
|
|
572
784
|
export const CAPABILITY_GUIDES = {
|
|
573
785
|
capture_photo: CAPTURE_PHOTO,
|
|
574
786
|
capture_video: CAPTURE_VIDEO,
|
|
@@ -580,6 +792,12 @@ export const CAPABILITY_GUIDES = {
|
|
|
580
792
|
connection_state: CONNECTION_STATE,
|
|
581
793
|
toggles: TOGGLES,
|
|
582
794
|
voice_command: VOICE_COMMAND,
|
|
795
|
+
// Phase 3 conversation runtime primitives (see block above).
|
|
796
|
+
conversation_runtime: CONVERSATION_RUNTIME,
|
|
797
|
+
conversation_on_wake: CONVERSATION_ON_WAKE,
|
|
798
|
+
conversation_listen: CONVERSATION_LISTEN,
|
|
799
|
+
conversation_speak: CONVERSATION_SPEAK,
|
|
800
|
+
conversation_ai_complete: CONVERSATION_AI_COMPLETE,
|
|
583
801
|
};
|
|
584
802
|
export const CAPABILITY_GUIDE_FEATURES = Object.keys(CAPABILITY_GUIDES).sort();
|
|
585
803
|
//# sourceMappingURL=capabilityPatterns.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"capabilityPatterns.js","sourceRoot":"","sources":["../../../src/tools/data/capabilityPatterns.ts"],"names":[],"mappings":"AAAA,mEAAmE;AACnE,iEAAiE;AACjE,+DAA+D;AAC/D,iCAAiC;AACjC,EAAE;AACF,iEAAiE;AACjE,qEAAqE;AACrE,mEAAmE;AACnE,0CAA0C;AAW1C,MAAM,aAAa,GAAoB;IACrC,OAAO,EAAE,eAAe;IACxB,OAAO,EACL,yKAAyK;IAC3K,MAAM,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;mGAoCyF;IACjG,KAAK,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;6BA2BoB;IAC3B,OAAO,EAAE;QACP,wQAAwQ;QACxQ,wQAAwQ;QACxQ,6JAA6J;QAC7J,+JAA+J;QAC/J,mJAAmJ;QACnJ,kJAAkJ;QAClJ,uRAAuR;KACxR;IACD,eAAe,EAAE,CAAC,sBAAsB,CAAC;CAC1C,CAAC;AAEF,MAAM,YAAY,GAAoB;IACpC,OAAO,EAAE,cAAc;IACvB,OAAO,EACL,qPAAqP;IACvP,MAAM,EAAE;;;;;;;;;;;;yDAY+C;IACvD,KAAK,EAAE;;;;;;;;;gCASuB;IAC9B,OAAO,EAAE;QACP,kJAAkJ;QAClJ,wIAAwI;QACxI,iJAAiJ;QACjJ,iLAAiL;QACjL,wFAAwF;QACxF,uUAAuU;KACxU;IACD,eAAe,EAAE,CAAC,oBAAoB,EAAE,aAAa,CAAC;CACvD,CAAC;AAEF,MAAM,yBAAyB,GAAoB;IACjD,OAAO,EAAE,2BAA2B;IACpC,OAAO,EACL,wIAAwI;IAC1I,MAAM,EAAE;;;;;;;;;;;;EAYR;IACA,KAAK,EAAE;;;;;;;;;EASP;IACA,OAAO,EAAE;QACP,kQAAkQ;QAClQ,qOAAqO;QACrO,uKAAuK;QACvK,4MAA4M;QAC5M,8JAA8J;KAC/J;IACD,eAAe,EAAE,CAAC,oBAAoB,EAAE,uBAAuB,EAAE,aAAa,EAAE,sBAAsB,CAAC;CACxG,CAAC;AAEF,MAAM,gBAAgB,GAAoB;IACxC,OAAO,EAAE,OAAO;IAChB,OAAO,EACL,
|
|
1
|
+
{"version":3,"file":"capabilityPatterns.js","sourceRoot":"","sources":["../../../src/tools/data/capabilityPatterns.ts"],"names":[],"mappings":"AAAA,mEAAmE;AACnE,iEAAiE;AACjE,+DAA+D;AAC/D,iCAAiC;AACjC,EAAE;AACF,iEAAiE;AACjE,qEAAqE;AACrE,mEAAmE;AACnE,0CAA0C;AAW1C,MAAM,aAAa,GAAoB;IACrC,OAAO,EAAE,eAAe;IACxB,OAAO,EACL,yKAAyK;IAC3K,MAAM,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;mGAoCyF;IACjG,KAAK,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;6BA2BoB;IAC3B,OAAO,EAAE;QACP,wQAAwQ;QACxQ,wQAAwQ;QACxQ,6JAA6J;QAC7J,+JAA+J;QAC/J,mJAAmJ;QACnJ,kJAAkJ;QAClJ,uRAAuR;KACxR;IACD,eAAe,EAAE,CAAC,sBAAsB,CAAC;CAC1C,CAAC;AAEF,MAAM,YAAY,GAAoB;IACpC,OAAO,EAAE,cAAc;IACvB,OAAO,EACL,qPAAqP;IACvP,MAAM,EAAE;;;;;;;;;;;;yDAY+C;IACvD,KAAK,EAAE;;;;;;;;;gCASuB;IAC9B,OAAO,EAAE;QACP,kJAAkJ;QAClJ,wIAAwI;QACxI,iJAAiJ;QACjJ,iLAAiL;QACjL,wFAAwF;QACxF,uUAAuU;KACxU;IACD,eAAe,EAAE,CAAC,oBAAoB,EAAE,aAAa,CAAC;CACvD,CAAC;AAEF,MAAM,yBAAyB,GAAoB;IACjD,OAAO,EAAE,2BAA2B;IACpC,OAAO,EACL,wIAAwI;IAC1I,MAAM,EAAE;;;;;;;;;;;;EAYR;IACA,KAAK,EAAE;;;;;;;;;EASP;IACA,OAAO,EAAE;QACP,kQAAkQ;QAClQ,qOAAqO;QACrO,uKAAuK;QACvK,4MAA4M;QAC5M,8JAA8J;KAC/J;IACD,eAAe,EAAE,CAAC,oBAAoB,EAAE,uBAAuB,EAAE,aAAa,EAAE,sBAAsB,CAAC;CACxG,CAAC;AAEF,MAAM,gBAAgB,GAAoB;IACxC,OAAO,EAAE,OAAO;IAChB,OAAO,EACL,6rBAA6rB;IAC/rB,MAAM,EAAE;;;;;;;aAOG;IACX,KAAK,EAAE;;;;;;;;cAQK;IACZ,OAAO,EAAE;QACP,gJAAgJ;QAChJ,wKAAwK;QACxK,+LAA+L;QAC/L,kHAAkH;KACnH;IACD,eAAe,EAAE,CAAC,oBAAoB,EAAE,gBAAgB,EAAE,aAAa,EAAE,sBAAsB,CAAC;CACjG,CAAC;AAEF,MAAM,YAAY,GAAoB;IACpC,OAAO,EAAE,cAAc;IACvB,OAAO,EACL,4LAA4L;IAC9L,MAAM,EAAE;;;;;;;;;;EAUR;IACA,KAAK,EAAE;;;;;;;;;EASP;IACA,OAAO,EAAE;QACP,6LAA6L;QAC7L,2OAA2O;QAC3O,+LAA+L;QAC/L,+HAA+H;QAC/H,gHAAgH;KACjH;IACD,eAAe,EAAE,EAAE;CACpB,CAAC;AAEF,MAAM,YAAY,GAAoB;IACpC,OAAO,EAAE,cAAc;IACvB,OAAO,EACL,yJAAyJ;IAC3J,MAAM,EAAE;;;;;;;;;EASR;IACA,KAAK,EAAE;;;;;;;EAOP;IACA,OAAO,EAAE;QACP,8RAA8R;QAC9R,kJAAkJ;QAClJ,sHAAsH;QACtH,2GAA2G;QAC3G,uJAAuJ;KACxJ;IACD,eAAe,EAAE,EAAE;CACpB,CAAC;AAEF,MAAM,gBAAgB,GAAoB;IACxC,OAAO,EAAE,kBAAkB;IAC3B,OAAO,EACL,2MAA2M;IAC7M,MAAM,EAAE;;;;;;;;;EASR;IACA,KAAK,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EA8BP;IACA,OAAO,EAAE;QACP,2JAA2J;QAC3J,uPAAuP;KACxP;IACD,eAAe,EAAE,CAAC,uBAAuB,CAAC;CAC3C,CAAC;AAEF,MAAM,OAAO,GAAoB;IAC/B,OAAO,EAAE,SAAS;IAClB,OAAO,EACL,0SAA0S;IAC5S,MAAM,EAAE;;;;;;mFAMyE;IACjF,KAAK,EAAE;;;;;;;;;;;;;;;;;;;;;EAqBP;IACA,OAAO,EAAE;QACP,6MAA6M;QAC7M,4JAA4J;QAC5J,kHAAkH;QAClH,4JAA4J;KAC7J;IACD,eAAe,EAAE,EAAE;CACpB,CAAC;AAEF,MAAM,aAAa,GAAoB;IACrC,OAAO,EAAE,eAAe;IACxB,OAAO,EACL,84BAA84B;IACh5B,MAAM,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;8CAsCoC;IAC5C,KAAK,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;8CAuCqC;IAC5C,OAAO,EAAE;QACP,kQAAkQ;QAClQ,mXAAmX;QACnX,2SAA2S;QAC3S,0QAA0Q;QAC1Q,uOAAuO;QACvO,4PAA4P;QAC5P,sQAAsQ;KACvQ;IACD,eAAe,EAAE,CAAC,oBAAoB,EAAE,gBAAgB,EAAE,aAAa,CAAC;CACzE,CAAC;AAEF,MAAM,aAAa,GAAoB;IACrC,OAAO,EAAE,eAAe;IACxB,OAAO,EACL,+XAA+X;IACjY,MAAM,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gEA0EsD;IAC9D,KAAK,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA6CF;IACL,OAAO,EAAE;QACP,yfAAyf;QACzf,2lBAA2lB;QAC3lB,mdAAmd;QACnd,6ZAA6Z;QAC7Z,6QAA6Q;QAC7Q,0UAA0U;QAC1U,kPAAkP;QAClP,sPAAsP;QACtP,4NAA4N;QAC5N,sZAAsZ;KACvZ;IACD,eAAe,EAAE,CAAC,aAAa,CAAC;CACjC,CAAC;AAEF,0EAA0E;AAC1E,EAAE;AACF,sEAAsE;AACtE,mEAAmE;AACnE,+DAA+D;AAC/D,4DAA4D;AAC5D,mEAAmE;AACnE,wEAAwE;AACxE,yCAAyC;AAEzC,MAAM,oBAAoB,GAAoB;IAC5C,OAAO,EAAE,sBAAsB;IAC/B,OAAO,EACL,gTAAgT;IAClT,MAAM,EAAE;;;;;;;;;;;;;;;;;EAiBR;IACA,KAAK,EAAE;;;;;;;;;;;;;;;;EAgBP;IACA,OAAO,EAAE;QACP,6YAA6Y;QAC7Y,2TAA2T;QAC3T,2TAA2T;QAC3T,0QAA0Q;KAC3Q;IACD,eAAe,EAAE,CAAC,yBAAyB,CAAC;CAC7C,CAAC;AAEF,MAAM,mBAAmB,GAAoB;IAC3C,OAAO,EAAE,qBAAqB;IAC9B,OAAO,EACL,uUAAuU;IACzU,MAAM,EAAE;;;;;;;;;;;;;;EAcR;IACA,KAAK,EAAE;;;;;;;;;;;;EAYP;IACA,OAAO,EAAE;QACP,4QAA4Q;QAC5Q,gPAAgP;QAChP,qVAAqV;QACrV,yRAAyR;KAC1R;IACD,eAAe,EAAE,CAAC,yBAAyB,CAAC;CAC7C,CAAC;AAEF,MAAM,kBAAkB,GAAoB;IAC1C,OAAO,EAAE,oBAAoB;IAC7B,OAAO,EACL,sVAAsV;IACxV,MAAM,EAAE;;;;;;;;;;;;;;;+CAeqC;IAC7C,KAAK,EAAE;;;;;;;;;;;;;;mDAc0C;IACjD,OAAO,EAAE;QACP,8TAA8T;QAC9T,uQAAuQ;QACvQ,4WAA4W;QAC5W,oPAAoP;KACrP;IACD,eAAe,EAAE,CAAC,yBAAyB,CAAC;CAC7C,CAAC;AAEF,MAAM,wBAAwB,GAAoB;IAChD,OAAO,EAAE,0BAA0B;IACnC,OAAO,EACL,yVAAyV;IAC3V,MAAM,EAAE;;;;;;;;;;;;;;;;;EAiBR;IACA,KAAK,EAAE;;;;;;;;;;;;;;;;EAgBP;IACA,OAAO,EAAE;QACP,6TAA6T;QAC7T,yQAAyQ;QACzQ,uVAAuV;QACvV,uOAAuO;QACvO,2ZAA2Z;KAC5Z;IACD,eAAe,EAAE,CAAC,yBAAyB,EAAE,gBAAgB,CAAC;CAC/D,CAAC;AAEF,MAAM,oBAAoB,GAAoB;IAC5C,OAAO,EAAE,sBAAsB;IAC/B,OAAO,EACL,gcAAgc;IAClc,MAAM,EAAE;;;;;;;;EAQR;IACA,KAAK,EAAE;;;;;;;;EAQP;IACA,OAAO,EAAE;QACP,uYAAuY;QACvY,sXAAsX;QACtX,sQAAsQ;QACtQ,gQAAgQ;KACjQ;IACD,eAAe,EAAE,CAAC,yBAAyB,CAAC;CAC7C,CAAC;AAEF,MAAM,CAAC,MAAM,iBAAiB,GAAoC;IAChE,aAAa,EAAE,aAAa;IAC5B,aAAa,EAAE,aAAa;IAC5B,YAAY,EAAE,YAAY;IAC1B,yBAAyB,EAAE,yBAAyB;IACpD,KAAK,EAAE,gBAAgB;IACvB,YAAY,EAAE,YAAY;IAC1B,YAAY,EAAE,YAAY;IAC1B,gBAAgB,EAAE,gBAAgB;IAClC,OAAO,EAAE,OAAO;IAChB,aAAa,EAAE,aAAa;IAC5B,6DAA6D;IAC7D,oBAAoB,EAAE,oBAAoB;IAC1C,oBAAoB,EAAE,oBAAoB;IAC1C,mBAAmB,EAAE,mBAAmB;IACxC,kBAAkB,EAAE,kBAAkB;IACtC,wBAAwB,EAAE,wBAAwB;CACnD,CAAC;AAEF,MAAM,CAAC,MAAM,yBAAyB,GAAG,MAAM,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,IAAI,EAAE,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"codeExamples.d.ts","sourceRoot":"","sources":["../../../src/tools/data/codeExamples.ts"],"names":[],"mappings":"AAeA,6DAA6D;AAC7D,MAAM,WAAW,iBAAiB;IAChC,kEAAkE;IAClE,aAAa,EAAE,gBAAgB,GAAG,qBAAqB,GAAG,oBAAoB,GAAG,2BAA2B,CAAC;IAC7G,0EAA0E;IAC1E,QAAQ,EAAE,MAAM,CAAC;IACjB,yCAAyC;IACzC,SAAS,EAAE,sBAAsB,CAAC;CACnC;AAED,6DAA6D;AAC7D,MAAM,WAAW,oBAAoB;IACnC,mEAAmE;IACnE,EAAE,EAAE,MAAM,CAAC;IACX,yDAAyD;IACzD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,4CAA4C;IAC5C,SAAS,EAAE,uBAAuB,GAAG,sBAAsB,CAAC;IAC5D;4EACwE;IACxE,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,8DAA8D;IAC9D,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,mEAAmE;AACnE,MAAM,WAAW,oBAAoB;IACnC,sBAAsB;IACtB,GAAG,EAAE,MAAM,CAAC;IACZ,oEAAoE;IACpE,IAAI,EAAE,MAAM,CAAC;IACb,6CAA6C;IAC7C,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE;QACJ,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;IACF,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B;;;;;;OAMG;IACH,oBAAoB,CAAC,EAAE;QACrB,OAAO,CAAC,EAAE,iBAAiB,EAAE,CAAC;QAC9B,gBAAgB,CAAC,EAAE,oBAAoB,EAAE,CAAC;KAC3C,CAAC;IACF;;;;;;OAMG;IACH,eAAe,CAAC,EAAE;QAChB,OAAO,CAAC,EAAE,oBAAoB,EAAE,CAAC;KAClC,CAAC;CACH;
|
|
1
|
+
{"version":3,"file":"codeExamples.d.ts","sourceRoot":"","sources":["../../../src/tools/data/codeExamples.ts"],"names":[],"mappings":"AAeA,6DAA6D;AAC7D,MAAM,WAAW,iBAAiB;IAChC,kEAAkE;IAClE,aAAa,EAAE,gBAAgB,GAAG,qBAAqB,GAAG,oBAAoB,GAAG,2BAA2B,CAAC;IAC7G,0EAA0E;IAC1E,QAAQ,EAAE,MAAM,CAAC;IACjB,yCAAyC;IACzC,SAAS,EAAE,sBAAsB,CAAC;CACnC;AAED,6DAA6D;AAC7D,MAAM,WAAW,oBAAoB;IACnC,mEAAmE;IACnE,EAAE,EAAE,MAAM,CAAC;IACX,yDAAyD;IACzD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,4CAA4C;IAC5C,SAAS,EAAE,uBAAuB,GAAG,sBAAsB,CAAC;IAC5D;4EACwE;IACxE,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,8DAA8D;IAC9D,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,mEAAmE;AACnE,MAAM,WAAW,oBAAoB;IACnC,sBAAsB;IACtB,GAAG,EAAE,MAAM,CAAC;IACZ,oEAAoE;IACpE,IAAI,EAAE,MAAM,CAAC;IACb,6CAA6C;IAC7C,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE;QACJ,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;IACF,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B;;;;;;OAMG;IACH,oBAAoB,CAAC,EAAE;QACrB,OAAO,CAAC,EAAE,iBAAiB,EAAE,CAAC;QAC9B,gBAAgB,CAAC,EAAE,oBAAoB,EAAE,CAAC;KAC3C,CAAC;IACF;;;;;;OAMG;IACH,eAAe,CAAC,EAAE;QAChB,OAAO,CAAC,EAAE,oBAAoB,EAAE,CAAC;KAClC,CAAC;CACH;AAizDD,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,WAAW,CAUrD,CAAC;AAEF,eAAO,MAAM,qBAAqB,UAAoC,CAAC"}
|
|
@@ -13,8 +13,8 @@
|
|
|
13
13
|
import { VERSION_INFO } from "./version.js";
|
|
14
14
|
const VOICE_QA_ASSISTANT = {
|
|
15
15
|
pattern: "voice_qa_assistant",
|
|
16
|
-
title: "Voice Q&A assistant
|
|
17
|
-
description: "
|
|
16
|
+
title: "Voice Q&A assistant — LEGACY manual composition (use conversation_agent_loop for new apps)",
|
|
17
|
+
description: "**Legacy pattern — for new voice-assistant work use `conversation_agent_loop` (Phase 3 `glasses.conversation` runtime).** This is the original manual composition: `glasses.voice.onPhrase` for wake matching, `glasses.audio.recordDiscrete` for one-shot question capture with silence-VAD, customer-side `AnthropicClient` for the LLM call, `glasses.audio.speak` for TTS. Multi-turn loop optional. The flow was the F21 / F23 friction-log resolution — the single use case that motivated the pure-SDK pivot — and it stays shipped for apps that need fine-grained control over each step (custom STT engine, custom turn-finalization, alternative LLM provider not on OpenAI). For the canonical \"wake + ask + AI responds\" pattern in greenfield apps, the Phase 3 `glasses.conversation.onWake { listen() / speak() / ai.complete() }` API composes all four pieces (VAD + STT + Smart Turn EOU + TTS + BYOK LLM) in one block — see `getCodeExample(pattern: 'conversation_agent_loop')`.",
|
|
18
18
|
code: {
|
|
19
19
|
kotlin: `import com.extentos.glasses.core.AudioRecordConfig
|
|
20
20
|
import com.extentos.glasses.core.ExtentosGlasses
|
|
@@ -175,7 +175,7 @@ class CoachHandler(
|
|
|
175
175
|
const BARGE_IN_SPEAK = {
|
|
176
176
|
pattern: "barge_in_speak",
|
|
177
177
|
title: "Barge-in: cancel TTS when user starts speaking",
|
|
178
|
-
description: "The user interrupts the AI mid-response by talking. The AI shuts up immediately and listens. This is the F24 friction-log resolution; needs glasses.audio.cancelSpeak() which lands with this commit.",
|
|
178
|
+
description: "The user interrupts the AI mid-response by talking. The AI shuts up immediately and listens. This is the F24 friction-log resolution; needs glasses.audio.cancelSpeak() which lands with this commit. **Phase 3 conversation runtime** ships `cancelSpeak()` as the cancel mechanism (see `getCapabilityGuide(feature: 'conversation_speak')`), but the v1 trigger is still shell-side — this pattern (subscribe to transcriptions, call cancelSpeak on detected speech) is the canonical v1 manual-trigger implementation regardless of which speak() surface you use. v1.1 will add an auto-VAD watchdog inside `glasses.conversation`'s speak() honoring the manifest's `bargeIn: true` knob, removing the need for the manual subscription.",
|
|
179
179
|
code: {
|
|
180
180
|
kotlin: `import com.extentos.glasses.core.ExtentosGlasses
|
|
181
181
|
import com.extentos.glasses.core.Transcript
|
|
@@ -250,7 +250,7 @@ enum BargeOutcome {
|
|
|
250
250
|
const PHOTO_DESCRIBE_VOICE = {
|
|
251
251
|
pattern: "photo_describe_voice",
|
|
252
252
|
title: "Voice-activated photo + vision-LLM description",
|
|
253
|
-
description: "User says 'describe what you see', glasses capture a photo, customer's vision LLM (Anthropic Claude Vision / OpenAI GPT-4V / Gemini) describes it, AI speaks the description. The canonical 'AI vision on glasses' demo.",
|
|
253
|
+
description: "User says 'describe what you see', glasses capture a photo, customer's vision LLM (Anthropic Claude Vision / OpenAI GPT-4V / Gemini) describes it, AI speaks the description. The canonical 'AI vision on glasses' demo. **Phase 3 conversation runtime** doesn't cover vision in v1 — `glasses.ai.complete(prompt)` is text-only. For Phase 3 apps that want voice + vision, use `glasses.conversation.onWake { ... }` for the wake + listen + speak portion, then call `glasses.camera.capturePhoto()` + your vision-LLM client (`BYOK_ANTHROPIC.describe()` or equivalent) inside the handler. The legacy `voice.onPhrase` pattern shown here works identically for vision flows; pick based on whether you want Phase 3's structured-concurrency handler shape or the legacy manual composition.",
|
|
254
254
|
code: {
|
|
255
255
|
kotlin: `import com.extentos.glasses.core.ExtentosGlasses
|
|
256
256
|
import com.extentos.glasses.core.PhotoConfig
|
|
@@ -381,7 +381,7 @@ class VisionHandler(
|
|
|
381
381
|
const LIVE_TRANSCRIPTION_UI = {
|
|
382
382
|
pattern: "live_transcription_ui",
|
|
383
383
|
title: "Live transcription feeding into your app's UI",
|
|
384
|
-
description: "Continuous transcripts from the glasses mic, surfaced as live captions in your existing Compose / SwiftUI state. The canonical 'live captions' / meeting-notes / translation-app shape.",
|
|
384
|
+
description: "Continuous transcripts from the glasses mic, surfaced as live captions in your existing Compose / SwiftUI state. The canonical 'live captions' / meeting-notes / translation-app shape. **Phase 3 conversation runtime** doesn't expose mid-utterance partials — `listen()` resolves once per Turn with the coalesced final text, not incrementally. For live-caption / meeting-notes / translation UIs that surface text as the user speaks (before they finish a turn), this pattern (subscribing directly to `glasses.audio.transcriptions()` and reading both Partial + Final events) stays canonical. Use the Phase 3 runtime when you want one-shot per-turn AI response; use this pattern when you want a streaming caption feed.",
|
|
385
385
|
code: {
|
|
386
386
|
kotlin: `import com.extentos.glasses.core.ExtentosGlasses
|
|
387
387
|
import com.extentos.glasses.core.Transcript
|
|
@@ -450,7 +450,7 @@ final class TranscriptionViewModel: ObservableObject {
|
|
|
450
450
|
const VOICE_NOTES = {
|
|
451
451
|
pattern: "voice_notes",
|
|
452
452
|
title: "Voice note (wake + record + persist)",
|
|
453
|
-
description: "'Take a note' style flow: wake phrase → record_audio with silence-VAD → save the transcript to the customer's notes repository (Notion, Supabase, filesystem, etc.).",
|
|
453
|
+
description: "'Take a note' style flow: wake phrase → record_audio with silence-VAD → save the transcript to the customer's notes repository (Notion, Supabase, filesystem, etc.). **Phase 3 conversation runtime** owns the audio buffer internally and only returns a transcribed `Turn.Said { text }` from `listen()` — there's no way to extract the underlying raw audio bytes for persistence (e.g. uploading the .wav alongside the transcript to a cloud notes service). For voice-note flows that need the raw recording, this pattern with `glasses.audio.recordDiscrete()` stays canonical (returns `AudioRecording.rawAudioUri` for persistence). For text-only notes (just the transcript), you could rewrite this against `glasses.conversation.onWake { val t = listen(); savedRepo.append(t.text) }` — simpler but loses the audio backup.",
|
|
454
454
|
code: {
|
|
455
455
|
kotlin: `import com.extentos.glasses.core.AudioRecordConfig
|
|
456
456
|
import com.extentos.glasses.core.ExtentosGlasses
|
|
@@ -612,7 +612,7 @@ struct ContentView: View {
|
|
|
612
612
|
const BYOK_ANTHROPIC = {
|
|
613
613
|
pattern: "byok_anthropic",
|
|
614
614
|
title: "BYOK Anthropic Claude API client (text + Vision, OkHttp / URLSession, with retry + observability)",
|
|
615
|
-
description: "Minimal AnthropicClient that voice_qa_assistant
|
|
615
|
+
description: "Minimal AnthropicClient that the legacy `voice_qa_assistant` + `photo_describe_voice` patterns reference. Two methods: `ask(question, history, system)` for text-only Q&A — the `system` parameter is Anthropic's system-prompt slot, where you feed grounding context (a flattened representation of your app's data, today's date, role instructions) so the AI's answers derive from real records rather than generic prose — and `describe(imageBase64, mediaType, prompt, system)` for Claude Vision (image content blocks). There is no first-party Anthropic Kotlin SDK; this is the canonical OkHttp + kotlinx.serialization wrapper around POST /v1/messages. iOS uses URLSession + Codable. Returns LlmResult (Ok / KeyMissing / KeyRejected / Connectivity / Transient / ClientBug / Empty) — distinct failure variants for distinct user-facing remediations. Built-in retry-with-backoff (200ms / 800ms / 3.2s, 3 attempts) for Transient (429 + 5xx) and Connectivity (IOException) failures so most upstream blips never reach the caller. Optional `observability: glasses.observability` wiring — pass it and every call surfaces under getEventLog's 'ai' filter chip. Paste, then wire the API key through resValue (Android) / Info.plist (iOS) per getCredentialGuide. F-R5-01 — ask() exposes `system` so voice_qa_assistant composes against this client unchanged. **For the Phase 3 `glasses.conversation` runtime, you don't need this client** — `glasses.ai.complete(prompt)` ships a built-in OpenAI BYOK path (key flows direct to OpenAI, no Extentos proxy). The conversation_agent_loop pattern uses `glasses.ai.setOpenaiApiKey` + `complete` instead of a hand-rolled HTTP client. Use this BYOK_ANTHROPIC pattern when you need Anthropic specifically (different cost/quality profile, Claude Vision for images), the legacy `voice_qa_assistant` flow, or when wiring vision flows that Phase 3's `glasses.ai` doesn't yet cover (vision is post-v1).",
|
|
616
616
|
code: {
|
|
617
617
|
kotlin: `import com.extentos.glasses.core.ObservabilityClient
|
|
618
618
|
import java.io.IOException
|
|
@@ -1590,6 +1590,7 @@ The same follow loop tests a conversation with an AI the developer built into th
|
|
|
1590
1590
|
|
|
1591
1591
|
The agent reads each \`speak\` event's \`details.text\` — that is what the app's AI actually said — so its next utterance can be contextual rather than pre-scripted. \`follow: true\` returns whatever happened next (speak, a photo capture, an ai_call, several at once), so the agent never has to predict the turn's shape. inject ↔ watch is the irreducible shape of a conversation; \`follow\` just makes the watch half a live tail instead of guess-and-poll. Give a real LLM turn room — timeoutMs: 30000 — so a slow model reply does not register as a false timeout.`,
|
|
1592
1592
|
gotchas: [
|
|
1593
|
+
"**For Phase 3 conversation runtime apps, use `conversation_agent_loop` instead.** This `agent_test_loop` pattern is the LEGACY recipe — handler under test uses `glasses.voice.onPhrase` + `glasses.audio.recordDiscrete` + a customer-side `AnthropicClient`, and the watch gate is `record_audio` (the legacy primitive). For apps built on `glasses.conversation.onWake { listen() / speak() / ai.complete() }` (the Phase 3 runtime), the watch gate is `conversation.turn_started` (lifecycle event the new pipeline emits), the inject path is the H1 `core.injectListenText` bridge, and the three-surface DB assertion is rarely needed because the conversation runtime owns the LLM call — Surface A (event log) and screencap usually cover what dogfood needs. See `getCodeExample(pattern: 'conversation_agent_loop')`.",
|
|
1593
1594
|
"**WAL caveat (Android Room).** Room writes in WAL mode by default. Pulling only the .db file gives a stale snapshot — recent writes live in .db-wal. Always pull .db + .db-wal + .db-shm together; the agent's first DB-read returning zero rows almost always means missing .db-wal.",
|
|
1594
1595
|
"**Anchor a cursor before injecting, then watch with `follow: true`.** Capture a cursor from a no-cursor getEventLog *before* the wake inject — that no-cursor call returns the tail of the log, so the cursor means \"now\" and a resumed sim's stale events never leak in. Between the wake and the question, watch with `follow: true` until a `record_audio` frame appears: the cursor guarantees no event is missed in the gap, and the `record_audio` gate guarantees the handler's recordDiscrete is subscribed before the question lands — otherwise the second inject races past the subscriber and only the wake matcher sees it.",
|
|
1595
1596
|
"**A `timedOut: true` follow result is not an error.** It means `timeoutMs` elapsed with no new events — the handler may still be working. Re-call getEventLog with the SAME cursor to keep watching. Treat it as a signal only after two or three consecutive timeouts, and cross-check filter:\"errors\" when you do.",
|
|
@@ -1601,6 +1602,229 @@ The agent reads each \`speak\` event's \`details.text\` — that is what the app
|
|
|
1601
1602
|
],
|
|
1602
1603
|
relatedFeatures: ["record_audio", "transcription_incremental", "speak", "voice_command"],
|
|
1603
1604
|
};
|
|
1605
|
+
const CONVERSATION_AGENT_LOOP = {
|
|
1606
|
+
pattern: "conversation_agent_loop",
|
|
1607
|
+
title: "Phase 3 conversation runtime + agent-driven E2E loop",
|
|
1608
|
+
description: "The Phase 3 `glasses.conversation.onWake { listen() / speak() / ai.complete() }` API in one place, plus the headless agent loop that verifies it end-to-end via `injectTranscript` + `getEventLog`. The runtime composes VAD + STT + Smart Turn + TTS + BYOK LLM in the shared Rust core — the customer code is one block of structured-concurrency Kotlin/Swift. The agent loop drives wake + follow-up utterances via MCP, then asserts symmetric `conversation.handler_started`/`turn_started`/`turn_ended`/`handler_finished` pairs in the event log. Pair this with `voice_qa_assistant` (which uses the legacy `recordDiscrete` API) when comparing the two paths.",
|
|
1609
|
+
code: {
|
|
1610
|
+
kotlin: `import com.extentos.glasses.core.ExtentosGlasses
|
|
1611
|
+
import com.extentos.glasses.core.Turn
|
|
1612
|
+
|
|
1613
|
+
class AssistantHandler(
|
|
1614
|
+
private val glasses: ExtentosGlasses,
|
|
1615
|
+
) {
|
|
1616
|
+
fun start() {
|
|
1617
|
+
// glasses.ai requires a BYOK OpenAI key — set once at app
|
|
1618
|
+
// init. Key flows direct to OpenAI (no Extentos proxy);
|
|
1619
|
+
// never logged, never persisted. See getCredentialGuide.
|
|
1620
|
+
glasses.ai?.setOpenaiApiKey(BuildConfig.OPENAI_API_KEY)
|
|
1621
|
+
|
|
1622
|
+
// onWake registers a wake phrase + handler block. The block
|
|
1623
|
+
// runs under structured concurrency inside a ConversationScope —
|
|
1624
|
+
// listen() / speak() / cancelSpeak() are available on the
|
|
1625
|
+
// implicit \`this\` receiver. \`glasses.ai\` stays top-level
|
|
1626
|
+
// because the developer owns conversation context per
|
|
1627
|
+
// synthesis §6 (no message history in the runtime; pass the
|
|
1628
|
+
// full prompt on each ai.complete call).
|
|
1629
|
+
glasses.conversation?.onWake("hey assistant") {
|
|
1630
|
+
speak("Hi! What can I do?")
|
|
1631
|
+
|
|
1632
|
+
// Multi-turn loop: each listen() suspends until the user's
|
|
1633
|
+
// next utterance resolves to a Turn (real audio: VAD →
|
|
1634
|
+
// STT → Smart Turn / silence-fallback EOU → coalescer →
|
|
1635
|
+
// Turn::Said). The same listen() resolves via the agent
|
|
1636
|
+
// loop's injectTranscript path — headless E2E exercises
|
|
1637
|
+
// exactly the same suspend point as a real user.
|
|
1638
|
+
while (true) {
|
|
1639
|
+
val turn = listen()
|
|
1640
|
+
when (turn) {
|
|
1641
|
+
is Turn.Said -> {
|
|
1642
|
+
if (turn.text.contains("stop", ignoreCase = true)) {
|
|
1643
|
+
speak("Goodbye.")
|
|
1644
|
+
break
|
|
1645
|
+
}
|
|
1646
|
+
// ai.complete is single-turn BYOK; the
|
|
1647
|
+
// developer accumulates conversation history
|
|
1648
|
+
// in their own state and re-passes it each
|
|
1649
|
+
// call. v1 ships complete() only; ai_stream
|
|
1650
|
+
// is deferred core-side.
|
|
1651
|
+
val response = glasses.ai?.complete(
|
|
1652
|
+
"User asked: \${turn.text}. Reply briefly."
|
|
1653
|
+
) ?: "I couldn't reach the model right now."
|
|
1654
|
+
speak(response)
|
|
1655
|
+
}
|
|
1656
|
+
is Turn.Interrupted -> {
|
|
1657
|
+
// Barge-in: user spoke during our speak().
|
|
1658
|
+
// Hand control back without re-prompting —
|
|
1659
|
+
// they have something specific in mind.
|
|
1660
|
+
}
|
|
1661
|
+
is Turn.Error -> {
|
|
1662
|
+
speak("Network trouble — let me know if you want to try again.")
|
|
1663
|
+
break
|
|
1664
|
+
}
|
|
1665
|
+
is Turn.HoldRequested -> { /* uncommon in v1 */ }
|
|
1666
|
+
}
|
|
1667
|
+
}
|
|
1668
|
+
}
|
|
1669
|
+
}
|
|
1670
|
+
}`,
|
|
1671
|
+
swift: `import GlassesCore
|
|
1672
|
+
|
|
1673
|
+
final class AssistantHandler {
|
|
1674
|
+
private let glasses: ExtentosGlasses
|
|
1675
|
+
|
|
1676
|
+
init(_ glasses: ExtentosGlasses) { self.glasses = glasses }
|
|
1677
|
+
|
|
1678
|
+
func start() {
|
|
1679
|
+
// BYOK OpenAI key — set once at app init.
|
|
1680
|
+
glasses.ai?.setOpenAiApiKey(Secrets.openAiKey)
|
|
1681
|
+
|
|
1682
|
+
// onWake's handler closure receives a ConversationHandler whose
|
|
1683
|
+
// listen / speak / cancelSpeak proxy to the same core. \`glasses.ai\`
|
|
1684
|
+
// is top-level (developer owns conversation context, synthesis §6).
|
|
1685
|
+
_ = glasses.conversation?.onWake(phrase: "hey assistant") { ctx in
|
|
1686
|
+
try? await ctx.speak("Hi! What can I do?")
|
|
1687
|
+
|
|
1688
|
+
while !Task.isCancelled {
|
|
1689
|
+
let turn = await ctx.listen()
|
|
1690
|
+
switch turn {
|
|
1691
|
+
case .said(let text):
|
|
1692
|
+
if text.lowercased().contains("stop") {
|
|
1693
|
+
try? await ctx.speak("Goodbye.")
|
|
1694
|
+
return
|
|
1695
|
+
}
|
|
1696
|
+
let response: String
|
|
1697
|
+
do {
|
|
1698
|
+
response = try await glasses.ai?.complete(
|
|
1699
|
+
"User asked: \\(text). Reply briefly.") ?? ""
|
|
1700
|
+
} catch {
|
|
1701
|
+
try? await ctx.speak("Network trouble — try again later.")
|
|
1702
|
+
return
|
|
1703
|
+
}
|
|
1704
|
+
try? await ctx.speak(response)
|
|
1705
|
+
case .interrupted:
|
|
1706
|
+
continue // barge-in: hand control back, don't re-prompt
|
|
1707
|
+
case .error:
|
|
1708
|
+
try? await ctx.speak("Network trouble — try again later.")
|
|
1709
|
+
return
|
|
1710
|
+
case .holdRequested:
|
|
1711
|
+
continue
|
|
1712
|
+
}
|
|
1713
|
+
}
|
|
1714
|
+
}
|
|
1715
|
+
}
|
|
1716
|
+
}`,
|
|
1717
|
+
},
|
|
1718
|
+
explanation: `WHY THE CONVERSATION RUNTIME, NOT \`glasses.voice.onPhrase\` + \`recordDiscrete\`?
|
|
1719
|
+
|
|
1720
|
+
The legacy \`voice_qa_assistant\` pattern bolts a wake-phrase matcher onto a discrete recorder; the Phase 3 \`conversation\` runtime is a single composed pipeline. The customer code drops from ~50 lines of recordDiscrete + transcript parsing + LLM retry boilerplate to a \`while\` loop calling \`listen()\` / \`speak()\` / \`ai.complete()\`. Underneath:
|
|
1721
|
+
|
|
1722
|
+
- VAD (Silero v3, 8 kHz native, 256-sample chunks)
|
|
1723
|
+
- Cloud STT (OpenAI Whisper via Extentos proxy; chunk-batch shape)
|
|
1724
|
+
- Smart Turn v3 EOU (8 s window, 80×800 log-mel features)
|
|
1725
|
+
- Coalescer with two completion paths (Smart Turn COMPLETE OR 600 ms silence fallback)
|
|
1726
|
+
- TTS (OpenAI; streaming PCM; drop-cancel for barge-in)
|
|
1727
|
+
- BYOK LLM (developer's OpenAI key direct, no proxy)
|
|
1728
|
+
- Lifecycle event emission (6 G2 schemas: handler_started/finished, turn_started/ended, barge_in, error)
|
|
1729
|
+
|
|
1730
|
+
All shared between Android (Kotlin) and iOS (Swift) via the Rust core (extentos-core) + uniffi bindings. The shell's job is structured concurrency + idiomatic wrapping; the algorithmic logic lives once in Rust.
|
|
1731
|
+
|
|
1732
|
+
AGENT-DRIVEN E2E LOOP
|
|
1733
|
+
|
|
1734
|
+
The MCP \`injectTranscript\` tool now reaches a parked \`listen()\` (H1 wiring — DefaultConversationClient subscribes to audio.transcriptions().filter for .final and forwards to core.injectListenText). The MCP \`getEventLog\` tool returns the lifecycle events under \`filter: "voice"\` (H4 wiring — backend classifier routes conversation.* to the voice layer; conversation.error climbs to errors automatically). End-to-end:
|
|
1735
|
+
|
|
1736
|
+
// 1. Anchor a cursor BEFORE injecting so the watch picks up
|
|
1737
|
+
// everything the inject produces, with no stale events leaking
|
|
1738
|
+
// from a previous test run on the same sim.
|
|
1739
|
+
let cur = (await getEventLog({ sessionId, filter: "voice" })).cursor;
|
|
1740
|
+
|
|
1741
|
+
// 2. Drive the wake. Voice client matches the registered phrase →
|
|
1742
|
+
// onWake handler fires → listen() parks for the next utterance.
|
|
1743
|
+
await injectTranscript({ sessionId, text: "hey assistant", isFinal: true });
|
|
1744
|
+
|
|
1745
|
+
// 3. Watch until the handler is parked on listen() — turn_started
|
|
1746
|
+
// is the signal it's ready for the question.
|
|
1747
|
+
let listening = false;
|
|
1748
|
+
while (!listening) {
|
|
1749
|
+
const log = await getEventLog({
|
|
1750
|
+
sessionId, filter: "voice", cursor: cur, follow: true, timeoutMs: 10000,
|
|
1751
|
+
});
|
|
1752
|
+
cur = log.cursor;
|
|
1753
|
+
listening = log.events.some(e => e.type === "conversation.turn_started");
|
|
1754
|
+
}
|
|
1755
|
+
|
|
1756
|
+
// 4. Drive the question. The H1 bridge (DefaultConversationClient's
|
|
1757
|
+
// transcriptions subscriber) forwards this Final to
|
|
1758
|
+
// core.injectListenText → listen() resolves Turn::Said{text}.
|
|
1759
|
+
await injectTranscript({ sessionId, text: "what's the weather", isFinal: true });
|
|
1760
|
+
|
|
1761
|
+
// 5. Watch until the turn ends + the AI response speak() completes.
|
|
1762
|
+
// filter:"all" because ai.complete fires an ai_call_end event
|
|
1763
|
+
// that lives in the 'ai' chip, not 'voice'.
|
|
1764
|
+
let spoke = false;
|
|
1765
|
+
const turn = [];
|
|
1766
|
+
while (!spoke) {
|
|
1767
|
+
const log = await getEventLog({
|
|
1768
|
+
sessionId, cursor: cur, follow: true, timeoutMs: 20000,
|
|
1769
|
+
});
|
|
1770
|
+
cur = log.cursor;
|
|
1771
|
+
turn.push(...log.events);
|
|
1772
|
+
spoke = log.events.some(e => e.type === "speak_completed");
|
|
1773
|
+
}
|
|
1774
|
+
|
|
1775
|
+
// 6. ASSERT the symmetric event pairs (F-R5-05).
|
|
1776
|
+
const turnEnded = turn.find(e => e.type === "conversation.turn_ended");
|
|
1777
|
+
assert(turnEnded?.details?.outcome === "said");
|
|
1778
|
+
assert(turnEnded?.details?.textLen > 0); // PII boundary: char count only
|
|
1779
|
+
|
|
1780
|
+
// 7. To loop more utterances, just repeat steps 4–5. The handler
|
|
1781
|
+
// stays parked on the next listen() until the next inject.
|
|
1782
|
+
|
|
1783
|
+
WHAT GETS LOGGED (PII BOUNDARY)
|
|
1784
|
+
|
|
1785
|
+
Each \`conversation.*\` event carries structural fields (handlerId,
|
|
1786
|
+
turnId, outcome, textLen, kind, operation, interruptedAtMs,
|
|
1787
|
+
wakePhrase). **The transcript text is NEVER logged** — only its
|
|
1788
|
+
character count. The Rust core enforces this at the A10 emission
|
|
1789
|
+
point; the Android/iOS bridges + backend persist + MCP layer preserve
|
|
1790
|
+
it end-to-end. Agents see "Turn t_5 ended: said (12 chars)" rather
|
|
1791
|
+
than the user's words. Same boundary the Extentos telemetry product
|
|
1792
|
+
already enforces.
|
|
1793
|
+
|
|
1794
|
+
FILTER QUICK REFERENCE
|
|
1795
|
+
|
|
1796
|
+
- filter: "voice" → conversation.handler_started / handler_finished
|
|
1797
|
+
/ turn_started / turn_ended / barge_in
|
|
1798
|
+
(alongside legacy stt_/speak/audio_/
|
|
1799
|
+
tts_audio_chunk)
|
|
1800
|
+
- filter: "errors" → conversation.error AUTOMATICALLY (the backend
|
|
1801
|
+
bumps severity to warn for this type)
|
|
1802
|
+
- filter: "ai" → ai_call_start / ai_call_end frames if the
|
|
1803
|
+
customer wrapped their BYOK calls in
|
|
1804
|
+
glasses.observability.aiCall (the legacy
|
|
1805
|
+
wrapper). glasses.ai.complete in the Phase 3
|
|
1806
|
+
runtime does NOT auto-emit ai_call_* yet —
|
|
1807
|
+
wrap it if you want timing in the 'ai' chip.
|
|
1808
|
+
|
|
1809
|
+
SIMULATOR UI
|
|
1810
|
+
|
|
1811
|
+
The browser sim's right rail renders a ConversationPanel (H2) above
|
|
1812
|
+
the existing voice-commands chips. Active handlers show as emerald
|
|
1813
|
+
cards with a pulsing "listening — t_X" indicator when listen() is
|
|
1814
|
+
parked. Recent turns appear color-coded by outcome (said=emerald,
|
|
1815
|
+
interrupted=amber, hold_requested=sky, error=red). Open the sim
|
|
1816
|
+
browser side-by-side with this agent loop and watch the panel react.`,
|
|
1817
|
+
gotchas: [
|
|
1818
|
+
"**Anchor a cursor BEFORE the first inject.** A no-cursor getEventLog returns the tail of the log + a cursor positioned after it — that's your 'now' bookmark. Without it, a resumed sim's stale handler_started from a prior run can satisfy your watch loop's assertion and you'd never know the new inject silently failed.",
|
|
1819
|
+
"**Wait for `turn_started` before injecting the question.** The H1 bridge only resolves listen() — if you inject text before the handler is parked, the inject lands in the void (DefaultConversationClient's H1 forwarder returns false; the text is dropped, no FSM state poisoning). Watching for `conversation.turn_started` proves the handler reached its listen() before the next inject.",
|
|
1820
|
+
"**The PII boundary is real.** `details.textLen` is the only field carrying user-input length. The transcript text itself never appears in the event log. If you need the actual text for an LLM evaluator, you'll need a separate channel — Phase 3 deliberately doesn't ship a backdoor.",
|
|
1821
|
+
"**`glasses.conversation` and `glasses.ai` are nullable.** Both are null when the host app didn't pass `ExtentosConfig.conversationOptions` (the manifest's `conversation` block was absent). Phase 3 is opt-in to avoid the ~10 MB model load cost for voice-only apps. Use `glasses.conversation?.onWake(...)` defensively.",
|
|
1822
|
+
"**Smart Turn requires the silero_vad + smart-turn-v3.2-cpu ONNX models bundled as assets.** Host app discovers their paths and passes them in `ConversationCoreOptions`. Missing model files raise `ConversationCoreException.VadModelLoad` / `SmartTurnModelLoad` at app init — the library logs warn-level and leaves `glasses.conversation` null (rest of the library stays usable). Check `glasses.runtime.events` for `conversation.init_failed` if your conversation surface is unexpectedly null.",
|
|
1823
|
+
"**OpenAI API key flows direct, not via Extentos.** synthesis §6 + plan §4.3 BYOK contract — `glasses.ai.complete()` POSTs straight to `https://api.openai.com/v1/chat/completions` with the key in the `Authorization: Bearer` header. The Extentos backend doesn't see the key. Test endpoint override is `ConversationCoreOptions.aiEndpoint` (also the swap point for the future Track-B AI Gateway).",
|
|
1824
|
+
"**`ai_stream` is deferred core-side.** v1 ships `complete()` only — single-turn, non-streaming. If your handler needs streaming responses (long answers, perceived latency), you'll either chunk on the customer side or wait for v1.1's ai_stream surface.",
|
|
1825
|
+
],
|
|
1826
|
+
relatedFeatures: ["voice_command", "speak", "transcription_incremental", "ai_call", "smart_turn_eou"],
|
|
1827
|
+
};
|
|
1604
1828
|
export const CODE_EXAMPLES = {
|
|
1605
1829
|
voice_qa_assistant: VOICE_QA_ASSISTANT,
|
|
1606
1830
|
barge_in_speak: BARGE_IN_SPEAK,
|
|
@@ -1610,6 +1834,7 @@ export const CODE_EXAMPLES = {
|
|
|
1610
1834
|
connection_page_setup: CONNECTION_PAGE_SETUP,
|
|
1611
1835
|
byok_anthropic: BYOK_ANTHROPIC,
|
|
1612
1836
|
agent_test_loop: AGENT_TEST_LOOP,
|
|
1837
|
+
conversation_agent_loop: CONVERSATION_AGENT_LOOP,
|
|
1613
1838
|
};
|
|
1614
1839
|
export const CODE_EXAMPLE_PATTERNS = Object.keys(CODE_EXAMPLES).sort();
|
|
1615
1840
|
//# sourceMappingURL=codeExamples.js.map
|