@extentos/mcp-server 0.0.57 → 0.0.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +22 -62
  2. package/dist/generated/events.d.ts +3 -3
  3. package/dist/generated/events.d.ts.map +1 -1
  4. package/dist/generated/events.js +11 -6
  5. package/dist/generated/events.js.map +1 -1
  6. package/dist/generated/schemas.d.ts.map +1 -1
  7. package/dist/generated/schemas.js +41 -124
  8. package/dist/generated/schemas.js.map +1 -1
  9. package/dist/index.js +4 -3
  10. package/dist/index.js.map +1 -1
  11. package/dist/tools/data/capabilityPatterns.d.ts +11 -0
  12. package/dist/tools/data/capabilityPatterns.d.ts.map +1 -0
  13. package/dist/tools/data/capabilityPatterns.js +300 -0
  14. package/dist/tools/data/capabilityPatterns.js.map +1 -0
  15. package/dist/tools/data/codeExamples.d.ts +15 -0
  16. package/dist/tools/data/codeExamples.d.ts.map +1 -0
  17. package/dist/tools/data/codeExamples.js +519 -0
  18. package/dist/tools/data/codeExamples.js.map +1 -0
  19. package/dist/tools/definitions.d.ts.map +1 -1
  20. package/dist/tools/definitions.js +99 -181
  21. package/dist/tools/definitions.js.map +1 -1
  22. package/dist/tools/docs/index.d.ts.map +1 -1
  23. package/dist/tools/docs/index.js +321 -800
  24. package/dist/tools/docs/index.js.map +1 -1
  25. package/dist/tools/handlers/createSimulatorSession.d.ts.map +1 -1
  26. package/dist/tools/handlers/createSimulatorSession.js +36 -51
  27. package/dist/tools/handlers/createSimulatorSession.js.map +1 -1
  28. package/dist/tools/handlers/generateConnectionModule.js +14 -15
  29. package/dist/tools/handlers/generateConnectionModule.js.map +1 -1
  30. package/dist/tools/handlers/getCapabilityGuide.d.ts +3 -0
  31. package/dist/tools/handlers/getCapabilityGuide.d.ts.map +1 -0
  32. package/dist/tools/handlers/getCapabilityGuide.js +47 -0
  33. package/dist/tools/handlers/getCapabilityGuide.js.map +1 -0
  34. package/dist/tools/handlers/getCodeExample.d.ts +3 -0
  35. package/dist/tools/handlers/getCodeExample.d.ts.map +1 -0
  36. package/dist/tools/handlers/getCodeExample.js +50 -0
  37. package/dist/tools/handlers/getCodeExample.js.map +1 -0
  38. package/dist/tools/handlers/getCredentialGuide.d.ts.map +1 -1
  39. package/dist/tools/handlers/getCredentialGuide.js +44 -84
  40. package/dist/tools/handlers/getCredentialGuide.js.map +1 -1
  41. package/dist/tools/handlers/getEventLog.d.ts.map +1 -1
  42. package/dist/tools/handlers/getEventLog.js +7 -8
  43. package/dist/tools/handlers/getEventLog.js.map +1 -1
  44. package/dist/tools/handlers/getPermissions.d.ts.map +1 -1
  45. package/dist/tools/handlers/getPermissions.js +27 -12
  46. package/dist/tools/handlers/getPermissions.js.map +1 -1
  47. package/dist/tools/handlers/getPlatformInfo.d.ts.map +1 -1
  48. package/dist/tools/handlers/getPlatformInfo.js +112 -46
  49. package/dist/tools/handlers/getPlatformInfo.js.map +1 -1
  50. package/dist/tools/handlers/getProductionChecklist.d.ts.map +1 -1
  51. package/dist/tools/handlers/getProductionChecklist.js +86 -120
  52. package/dist/tools/handlers/getProductionChecklist.js.map +1 -1
  53. package/dist/tools/handlers/getSimulatorStatus.d.ts.map +1 -1
  54. package/dist/tools/handlers/getSimulatorStatus.js +1 -3
  55. package/dist/tools/handlers/getSimulatorStatus.js.map +1 -1
  56. package/dist/tools/handlers/getVoiceCommandGuidance.d.ts.map +1 -1
  57. package/dist/tools/handlers/getVoiceCommandGuidance.js +18 -45
  58. package/dist/tools/handlers/getVoiceCommandGuidance.js.map +1 -1
  59. package/dist/tools/handlers/inspectIntegration.d.ts.map +1 -1
  60. package/dist/tools/handlers/inspectIntegration.js +14 -52
  61. package/dist/tools/handlers/inspectIntegration.js.map +1 -1
  62. package/dist/tools/handlers/validateIntegration.d.ts.map +1 -1
  63. package/dist/tools/handlers/validateIntegration.js +83 -167
  64. package/dist/tools/handlers/validateIntegration.js.map +1 -1
  65. package/dist/tools/registry.d.ts.map +1 -1
  66. package/dist/tools/registry.js +6 -11
  67. package/dist/tools/registry.js.map +1 -1
  68. package/dist/tools/templates/androidBootstrap.d.ts.map +1 -1
  69. package/dist/tools/templates/androidBootstrap.js +20 -47
  70. package/dist/tools/templates/androidBootstrap.js.map +1 -1
  71. package/dist/tools/templates/iosBootstrap.d.ts.map +1 -1
  72. package/dist/tools/templates/iosBootstrap.js +24 -34
  73. package/dist/tools/templates/iosBootstrap.js.map +1 -1
  74. package/dist/tools/util/manifest.d.ts +0 -61
  75. package/dist/tools/util/manifest.d.ts.map +1 -1
  76. package/dist/tools/util/manifest.js +14 -112
  77. package/dist/tools/util/manifest.js.map +1 -1
  78. package/dist/tools/util/permissions.d.ts +4 -7
  79. package/dist/tools/util/permissions.d.ts.map +1 -1
  80. package/dist/tools/util/permissions.js +151 -161
  81. package/dist/tools/util/permissions.js.map +1 -1
  82. package/package.json +1 -6
@@ -154,590 +154,66 @@ export const DOC_INDEX = [
154
154
  {
155
155
  topic: "getting_started",
156
156
  title: "Getting Started with Extentos",
157
- content: "Extentos lets agents build Meta Ray-Ban apps from a structured spec. The library carries all hardware logic developer code is a spec file plus thin generated glue (bootstrap + callback dispatch + optional stream consumer) plus handler bodies. " +
158
- "Agents plan composition themselves; there is no planning tool. Read the catalog topics below to see what primitives exist, then assemble the smallest spec your goal needs. " +
159
- "Catalog topics: trigger_types, action_types, block_types, stream_types, spec_format, template_syntax, toggles, app_callback_guide, library_api, voice_ux_guide, permissions, connection_ui_placement, host_app_scaffold. Each has inline minimal examples sufficient for most specs. " +
160
- "Constraint topics: constraints_and_limitations, audio_video_coexistence, spec_validation_rules. " +
161
- "**Prerequisite: host app must already exist.** generateConnectionModule scaffolds Extentos INTO an existing Compose / SwiftUI host app it does not create the host app itself. CLI agents starting from scratch should fetch `searchDocs(topic: 'host_app_scaffold')` first for a complete copy-pasteable Compose / SwiftUI project template; Android Studio / Xcode users can use the New Project wizard instead (Empty Compose Activity for Android, App for iOS). F-R4-5. " +
162
- "Mutation tool sequence (deterministic, in order): " +
163
- "(1) getPlatformInfo one call to fetch capability surface (compact: names + tiers, ~2KB) and library version. Default response is compact; pass expand: ['capabilities.full'] for per-primitive params/payload/requires/constraints, or expand: ['schema'] for the raw JSON Schema (rarely needed validateSpec handles validation). " +
164
- "(2) generateConnectionModule one-shot scaffold (bootstrap + dependencies + empty spec). Surfaces a developerInstructions.Step 1 prompt to ask the dev about ExtentosConnectionPage placement; do not skip it. " +
165
- "(3) initSpec populate the first real spec from primitives you composed. " +
166
- "(4) generateConsumer(kind: 'callback') — stub app_callback handlers if your spec uses any. Use kind: 'stream' for stream consumers. " +
167
- "(5) validateIntegration — pre-test correctness gate. Re-run after every mutation. " +
168
- "(6) createSimulatorSession — provision a browser simulator session to test end-to-end. " +
169
- "(7) getEventLog — primary debug tool when a flow misbehaves. Returns the structured event trace for a session: trigger fires, block completions, callback invocations, template-resolution warnings, protocol violations. Filter by 'errors' to surface only warn/error-severity rows; filter by 'triggers' to follow flow execution. If you ever observe 'nothing happens when I fire the trigger' or a callback being called with empty input, ALWAYS call getEventLog before guessing — it's almost always one of: the block timed out (transport.protocol_violation), a template didn't resolve (TemplateUnresolved), or the handler returned an error (callback.completed errorCode). " +
170
- "For changes after step 3: inspectIntegration updateSpec generateConsumer (if handlers/streams added) validateIntegration. " +
171
- "**Iteration loop — DO NOT mint a new sim per change.** `createSimulatorSession` is get-or-create: it returns the saved sim for this project (status:'resumed') after the first call, not a new one. Three rates of change with different requirements: (a) SPEC edits → updateSpec live-pushes to the running app via spec_updated frame, no rebuild, no remint, response shows liveSessionUpdate.appNotified:true; (b) APP-CODE edits (Kotlin/Swift handlers, custom UI) → rebuild + reinstall, the library reattaches to the same saved sim, no remint; (c) FORCE-FRESH (clean-slate ID rotation) → createSimulatorSession({ resetFresh: true }) OR click 'Reset' on the dashboard at extentos.com/s — backend archives the existing sim, mints a replacement, pushes session_moved to live device sockets. URL-bake apps rebuild once after force-fresh. For agents that can rebuild app code themselves: do it inline. For agents that can't: hand the dev rebuild + reinstall commands; same auto-attach behavior on the other end. Full mechanics in searchDocs(topic: 'simulator_browser_mode') § Iteration model. " +
172
- "**Multi-platform projects (Android + iOS = ONE project).** When a developer has both Android and iOS versions of the same app, the dashboard groups them under one project IF the Android `applicationId` matches the iOS bundle identifier (the standard reverse-DNS convention — `com.example.myapp` on both platforms). When they do, calling createSimulatorSession from each project yields the same `projectInstallId`, so the dashboard auto-groups them. When they don't match (mismatched reverse-DNS, or one project predates the manifest), two separate projects appear — the developer can resolve via the dashboard's Danger Zone → Merge action, which moves all sessions from one project into another. Full guidance: searchDocs(topic: 'multi_platform_projects'). " +
173
- "Reference library: getExampleSpec returns 8 worked patterns (voice_assistant, live_translation, etc.). On-demand only read it when studying a complex composition. The catalog topics cover the simple cases without it.",
174
- keywords: ["setup", "start", "first", "begin", "overview", "walkthrough", "tool order", "workflow", "catalog", "debug", "iteration", "iteration loop", "rebuild", "live-push", "remint", "dev loop"],
175
- relatedTools: ["getPlatformInfo", "generateConnectionModule", "initSpec", "searchDocs", "validateIntegration", "getEventLog", "updateSpec", "createSimulatorSession"],
176
- },
177
- {
178
- topic: "block_types",
179
- title: "Block Types",
180
- content: "**Blocks are discrete, bounded media-capture operations.** A block is *declared* in spec.blocks[] (with config) and *invoked* from a trigger's actions[] via a `block_call` action. The result is bound to a save_as variable for use in subsequent actions. 3 block types:\n\n" +
181
- "─── capture_photo ───\n" +
182
- "One-shot still photo from the glasses camera. Tier: dat_native (high confidence).\n\n" +
183
- "When to use: voice-driven photo capture ('take a photo', 'describe what you see'), or capture_button-driven photography.\n" +
184
- "When NOT to use: continuous frames for ML — use video_frames stream at low fps. Burst capture — call capture_photo multiple times from a flow with delay between, OR use video_frames at 7-15 fps and consume manually.\n\n" +
185
- "Required: type, id\n" +
186
- "Optional params: resolution ('LOW' | 'MEDIUM' | 'HIGH'), format ('jpeg' | 'heic')\n" +
187
- "Returns: { uri, width, height, format }\n\n" +
188
- "Field rules:\n" +
189
- "- resolution: HIGH adds ~400ms latency on Gen 1 hardware vs MEDIUM. Use LOW for ML inputs that downscale anyway. MEDIUM is the right default.\n" +
190
- "- format: 'heic' is ~50% smaller but Android Bitmap pipelines need conversion. 'jpeg' is the universal default.\n\n" +
191
- "Constraints:\n" +
192
- "- Requires GlassesState == Active. Calling while not Active fails with NotConnected.\n" +
193
- "- Camera exclusivity: blocks while outgoing_video stream is active (unless audio_video_coexistence_policy = prefer_video AND resolution = LOW).\n\n" +
194
- "Displaying the photo on-device:\n" +
195
- "- The `uri` shape varies by transport (`data:image/...;base64,...` in BrowserSim, `file://...` in RealMeta + LocalSim). Don't pass it directly to Coil's `AsyncImage` / SwiftUI's `AsyncImage` — those URL pipelines silently fail on inlined data URIs from BrowserSim. Use the library helper instead: `photo.loadBitmap()` (Android, suspend → `Bitmap?`) or `await photo.loadImage()` (iOS, async → `UIImage?` / `NSImage?`). The helper branches on scheme internally and runs decode off the main thread.\n\n" +
196
- "Consuming the photo from an `app_callback` handler (the AI-vision path):\n" +
197
- "- The spec convention `input: { image: \"{{photo.uri}}\" }` passes the URI as a **String** to your handler — `call.input.image` is the URI text, not a `Photo` object. The `photo.loadBitmap()` extension is unreachable on this path because there's no String → Photo conversion API. (Same shape applies to capture_video — see capture_video below for `Videos.copyToFile(uri, dst)` etc.)\n" +
198
- "- Use the URI-string helpers on `Photos` (top-level object, library 1.1.13-pair+) instead:\n" +
199
- " - `Photos.loadBase64(uri): String?` — bare base64 payload, ready to drop into Anthropic Claude Vision / OpenAI GPT-4V request bodies. Has a fast path for `data:` URIs that skips the decode + re-encode round trip.\n" +
200
- " - `Photos.loadBytes(uri): ByteArray?` — raw image bytes for multipart upload or custom encoders.\n" +
201
- " - `Photos.loadBitmap(uri): Bitmap?` — same Bitmap shape as `photo.loadBitmap()` but takes a URI string.\n" +
202
- " - `Photos.mediaTypeFromUri(uri): String?` — `\"image/jpeg\"` / `\"image/png\"` / `\"image/webp\"` / etc. Best-effort: reads the embedded MIME type from `data:` URIs, infers from the file extension otherwise. Most AI vision APIs require both base64 + media type.\n" +
203
- " - All loaders are suspend, run decode on `Dispatchers.IO`, and return null on missing file / decode failure / OOM. They never throw.\n" +
204
- " - Anthropic Claude Vision shape: `{ \"type\": \"image\", \"source\": { \"type\": \"base64\", \"media_type\": Photos.mediaTypeFromUri(uri), \"data\": Photos.loadBase64(uri) } }`.\n\n" +
205
- "Common mistakes:\n" +
206
- "- Calling capture_photo in a tight loop expecting frame-rate semantics — use video_frames stream instead. capture_photo has shutter overhead on every call.\n" +
207
- "- Setting resolution = HIGH for ML inputs that you'll downscale — wastes capture time and battery.\n" +
208
- "- Writing your own `data:` / `file://` URI decoder in the handler before checking for `Photos.loadBase64(uri)` / `Photos.loadBytes(uri)` — the helpers exist exactly to avoid this branch in every AI-vision handler.\n" +
209
- "- Reaching for `photo.loadBitmap()` from an `app_callback` handler — the handler receives a String URI, not a Photo. Use `Photos.loadBitmap(uri)` (top-level) instead.\n\n" +
210
- "Minimal:\n" +
211
- ` { "type": "capture_photo", "id": "main_camera", "params": { "resolution": "MEDIUM", "format": "jpeg" } }\n` +
212
- "Then in a trigger:\n" +
213
- ` { "type": "block_call", "block_id": "main_camera", "save_as": "photo" } → result available as {{photo}} or {{photo.uri}}\n\n` +
214
- "─── capture_video ───\n" +
215
- "Bounded video clip with explicit duration. Tier: phone_side_workaround (degraded confidence).\n\n" +
216
- "When to use: short demo / share clips up to 60s. Always-bounded — there's no live-stream video capture in Meta DAT, so this is clip-only.\n" +
217
- "When NOT to use: continuous video for AR / ML — use video_frames stream. Live broadcasting — not supported on Meta Ray-Ban.\n\n" +
218
- "Required: type, id\n" +
219
- "Optional params: resolution ('LOW' | 'MEDIUM' | 'HIGH'), max_duration_seconds (max 60, default 10), format ('mp4_hevc' | 'mp4_h264'), include_audio (default false), stop_conditions (array)\n" +
220
- "Returns: { uri, duration_ms, format, width, height } (snake_case — matches the runtime template variable keys; `{{clip.duration_ms}}`, NOT `clip.durationMs`)\n\n" +
221
- "Field rules:\n" +
222
- "- format: 'mp4_hevc' is HEVC passthrough — fast, no thermal load. 'mp4_h264' triggers MediaCodec re-encode — drops to <5 fps on Gen 1 hardware (issue #44). Use HEVC unless your downstream consumer can't decode it.\n" +
223
- "- include_audio = true: forces HFP audio route, killing high-quality A2DP playback for the duration of capture. Default off.\n" +
224
- "- max_duration_seconds: do NOT set higher than 30 on battery-constrained sessions; thermal_warning typically fires at moderate severity around 30s of HEVC. Renamed from `duration_seconds` in 1.1.15-pair to match `record_audio.params.max_duration_seconds`. Old `duration_seconds` is no longer accepted; validateSpec emits an additionalProperty error naming the unknown key.\n" +
225
- "- stop_conditions: array of `{ type: 'voice_command', phrase: '...' }` entries that end the capture early. The capture returns the partial clip captured up to the stop. Phase 6 only enforces voice_command at runtime; capture_button / tap / double_tap / ai_vision are accepted by the spec parser but no-ops on real hardware (validateIntegration warns). Combine with max_duration_seconds: whichever fires first wins.\n\n" +
226
- "Common mistakes:\n" +
227
- "- Choosing h264 because 'it's universal' — the encoder choice causes the FPS cliff. Default to hevc.\n" +
228
- "- Long clips (60s) without a thermal_warning trigger to downgrade — expect dropped frames and degraded quality past ~30s.\n" +
229
- "- Using `duration_seconds` (the pre-1.1.15-pair name) — the schema now requires `max_duration_seconds`. validateSpec's additionalProperty error names the unknown key.\n\n" +
230
- "Minimal:\n" +
231
- ` { "type": "capture_video", "id": "main_clip", "params": { "max_duration_seconds": 5, "resolution": "MEDIUM", "format": "mp4_hevc" } }\n\n` +
232
- "Voice-stoppable (canonical for 'start recording / stop recording' UX):\n" +
233
- ` { "type": "capture_video", "id": "main_clip", "params": { "max_duration_seconds": 60, "resolution": "MEDIUM", "format": "mp4_hevc", "include_audio": true, "stop_conditions": [{ "type": "voice_command", "phrase": "stop recording" }] } }\n\n` +
234
- "Consuming the video from an `app_callback` handler (the canonical 'save to gallery' path):\n" +
235
- "- Same gotcha as capture_photo: spec convention `input: { uri: \"{{clip.uri}}\" }` passes the URI as a **String** to your handler — `call.input.uri` is the URI text, not a typed VideoClip object.\n" +
236
- "- Use the `Videos` URI-string helpers (top-level object, library 1.1.14-pair+):\n" +
237
- " - `Videos.copyToFile(uri: String, dst: File): Boolean` — the canonical save-to-gallery path. Streams from `file://` and bare-path sources via `File.copyTo` (memory-bounded regardless of clip length); decodes `data:` URIs in-memory (BrowserSim caps the payload server-side). Creates parent dirs, overwrites existing dst. Returns true on success, false on any failure (no throw).\n" +
238
- " - `Videos.loadBytes(uri: String): ByteArray?` — raw bytes, for AI APIs that take inline video (Gemini 1.5 Pro vision). Loads the whole video into memory — prefer `copyToFile` for >50 MB clips.\n" +
239
- " - `Videos.loadBase64(uri: String): String?` — base64 payload with the same `data:` URI fast path as `Photos.loadBase64`.\n" +
240
- " - `Videos.mediaTypeFromUri(uri: String): String?` — best-effort MIME from `data:` prefix or file extension. Recognized: `.mp4` (`video/mp4`), `.m4v` (`video/x-m4v`), `.mov` (`video/quicktime`), `.webm` (`video/webm`), `.mkv` (`video/x-matroska`), `.avi` (`video/x-msvideo`), `.3gp` (`video/3gpp`).\n" +
241
- " - All loaders are suspend (run on `Dispatchers.IO`), return null/false on missing file / decode failure / OOM. Never throw.\n" +
242
- "- Common shape: `val ok = Videos.copyToFile(call.input.jsonObject[\"uri\"]?.jsonPrimitive?.contentOrNull ?: return Error(...), File(galleryDir, \"\\${System.currentTimeMillis()}.mp4\"))`.\n\n" +
243
- "─── record_audio ───\n" +
244
- "Bounded mic capture from the glasses, auto-transcribed on completion. Tier: phone_side_workaround (degraded confidence).\n\n" +
245
- "When to use: voice-note flows ('take a note'), short dictation. STT runs automatically using the platform's on-device recognizer.\n" +
246
- "When NOT to use: continuous transcription — use the transcription_incremental stream. Audio-only recording without STT — not exposed; this block always transcribes.\n\n" +
247
- "Required: type, id\n" +
248
- "Optional params: max_duration_seconds (max 60, default 15), silence_timeout_seconds (max 10, default 2), quality ('standard' | 'high'), stop_conditions (array)\n" +
249
- "Returns: { transcript, audio_duration_ms, raw_audio_uri } (snake_case — matches the runtime template variable keys; `{{note.audio_duration_ms}}`, NOT `note.audioDurationMs`)\n\n" +
250
- "Field rules:\n" +
251
- "- silence_timeout_seconds: the recording stops automatically when this much silence is detected. Lower for snappy UX (1-2s); raise to 4-5s if users tend to pause.\n" +
252
- "- quality: 'high' is misleading — Meta DAT routes the mic over HFP mono 8 kHz. 'high' is effectively the same as 'standard' until A2DP-grade mic capture lands. Treat them as equivalent for now.\n" +
253
- "- stop_conditions: array of `{ type: 'voice_command', phrase: '...' }` entries that end the recording early. Whichever fires first wins (max_duration_seconds, silence_timeout_seconds, or any matching phrase). Returns the partial recording captured up to the stop. Phase 6 only enforces voice_command at runtime; other types are accepted by the parser but no-op on real hardware (validateIntegration warns).\n" +
254
- "- STT runs on-device via Android SpeechRecognizer / iOS SFSpeechRecognizer — no cloud cost, no developer config required.\n\n" +
255
- "Common mistakes:\n" +
256
- "- Expecting raw audio for custom STT — the block always returns a transcript. To do custom STT, use the audio_chunks stream + your own ASR.\n" +
257
- "- Setting max_duration_seconds = 60 without thermal awareness — works fine, but if the spec also captures video, you'll hit thermal limits faster.\n\n" +
258
- "Minimal:\n" +
259
- ` { "type": "record_audio", "id": "note_recorder", "params": { "max_duration_seconds": 30, "silence_timeout_seconds": 3, "quality": "standard" } }\n\n` +
260
- "── Block result usage ──\n" +
261
- "After a block_call action with save_as: 'photo' (or any name), the result is bound to that variable. Reference fields via dot notation in subsequent actions: {{photo.uri}}, {{photo.width}}, {{note.transcript}}, {{clip.duration_ms}}. **Field names are snake_case** to match the runtime payload (`duration_ms`, `audio_duration_ms`, `raw_audio_uri`) — NOT camelCase. validateSpec rule R13 catches camelCase typos like `{{clip.durationMs}}` at validation time. The bare {{photo}} resolves to the URI for capture_photo and capture_video. See template_syntax topic.",
262
- keywords: ["block", "capture_photo", "capture_video", "record_audio", "photo", "video", "audio", "capture", "hevc", "h264", "stt"],
263
- relatedTools: ["getPlatformInfo", "initSpec", "updateSpec"],
264
- },
265
- {
266
- topic: "stream_types",
267
- title: "Stream Types",
268
- content: "**Streams are continuous flows of media or events.** A stream is *declared* in spec.streams[] (with config), *gated by a toggle* at runtime, and *consumed by developer code* in ExtentosStreams.{kt,swift} (generated by `generateConsumer(kind: 'stream')`). Direction matters: video_frames / audio_chunks / transcription_incremental are OUTBOUND from glasses; outgoing_video / outgoing_audio are phone→glasses.\n\n" +
269
- "5 stream types. All require `generateConsumer(kind: 'stream')` to scaffold the consumer file. Lifecycle is tied to app foreground; on Android 14+ you need FOREGROUND_SERVICE permission and a foregroundServiceType declaration in AndroidManifest.xml.\n\n" +
270
- "─── video_frames ───\n" +
271
- "Outbound stream of camera frames from the glasses. Tier: dat_native.\n\n" +
272
- "When to use: continuous on-device ML (object detection, scene classification). Live preview UI. Anything needing frames > 1/second.\n" +
273
- "When NOT to use: one-shot still capture (use capture_photo block). Bounded clips (use capture_video block). Streaming to a remote server (use outgoing_video_stream — different direction).\n\n" +
274
- "Required: type, id, config\n" +
275
- "Config: resolution ('LOW' | 'MEDIUM' | 'HIGH'), frame_rate (2 | 7 | 15 | 24 | 30), codec ('hvc1' | 'h264'), backpressure ('drop_oldest' | 'drop_newest' | 'suspend')\n\n" +
276
- "Field rules:\n" +
277
- "- frame_rate × battery: 2 fps for ML workloads (negligible drain). 7-15 fps for preview UI (~1 hr active). 24-30 fps drains battery fast (~30 min); ALWAYS pair with a thermal_throttle preset trigger to downgrade on warning.\n" +
278
- "- resolution × ML: LOW is sufficient for most on-device models that downscale anyway. Don't ask for HIGH if you're going to crop and downscale.\n" +
279
- "- codec: hvc1 (HEVC) bypasses the MediaCodec FPS cliff — use it. h264 forces re-encode and drops to <5 fps in practice.\n" +
280
- "- backpressure: 'drop_oldest' is correct for preview / latest-frame ML. 'drop_newest' for analyses where the historical frame matters more. 'suspend' blocks the producer until consumer drains — only use if your consumer is guaranteed fast.\n\n" +
281
- "Common mistakes:\n" +
282
- "- 30 fps + HIGH for ML — wastes battery; downscale to 2 fps + LOW and throttle aggressively.\n" +
283
- "- No thermal_throttle trigger — 24-30 fps will hit thermal limits within 5 minutes; without a trigger to downgrade, the user just sees a dead session.\n" +
284
- "- Forgetting the FOREGROUND_SERVICE manifest entry on Android 14+ — `validateIntegration` flags this; `getPermissions` returns the manifest snippet.\n\n" +
285
- "Minimal:\n" +
286
- ` { "type": "video_frames", "id": "frames", "config": { "resolution": "LOW", "frame_rate": 2, "codec": "hvc1", "backpressure": "drop_oldest" } }\n\n` +
287
- "─── audio_chunks ───\n" +
288
- "Outbound stream of raw audio chunks from the glasses mic. Tier: dat_native.\n\n" +
289
- "When to use: custom on-device or cloud STT (your own model, not Meta's). Audio analysis (silence detection, voice activity, custom keyword spotting).\n" +
290
- "When NOT to use: phrase recognition (use voice_command trigger — Meta's STT is cheaper). Continuous transcription (use transcription_incremental — already STT'd). Voice notes (use record_audio block — bounded + auto-STT).\n\n" +
291
- "Required: type, id, config\n" +
292
- "Config: chunk_millis (default 20), backpressure ('drop_oldest' | 'drop_newest' | 'suspend')\n\n" +
293
- "Field rules:\n" +
294
- "- chunk_millis: 20ms is the canonical opus/CELT frame size. Don't change unless your downstream consumer expects something else.\n" +
295
- "- 16-chunk buffer cap; slow consumers will drop. Profile your consumer — anything > ~50ms per chunk processing on average will fall behind.\n" +
296
- "- Activating audio_chunks routes audio to HFP, dropping any active A2DP playback to mono 8 kHz. Coexistence is governed by audio_video_coexistence_policy toggle.\n\n" +
297
- "Minimal:\n" +
298
- ` { "type": "audio_chunks", "id": "mic", "config": { "chunk_millis": 20, "backpressure": "drop_oldest" } }\n\n` +
299
- "─── transcription_incremental ───\n" +
300
- "STT events from the platform recognizer (partial + final transcripts). Tier: phone_side_workaround.\n\n" +
301
- "When to use: continuous live transcription (translation app, dictation). The standard 'live captions' shape.\n" +
302
- "When NOT to use: one-shot voice-to-text (use record_audio block). Phrase activation (use voice_command trigger). Custom STT model (use audio_chunks stream + your own engine).\n\n" +
303
- "Required: type, id, config\n" +
304
- "Config: language (BCP-47 tag, e.g., 'en-US'), min_partial_confidence (default 0.4)\n" +
305
- "Gated by toggle: transcription_enabled (default false). Set via set_toggle action to start. Fires Transcript.Final and Transcript.Partial events on the consumer Flow.\n\n" +
306
- "Field rules:\n" +
307
- "- min_partial_confidence: 0.4 is the platform-recommended floor. Partial events are chatty — debounce in your UI consumer if rendering each one to a Composable / SwiftUI view.\n" +
308
- "- language: must be a BCP-47 tag. 'en' alone won't work; use 'en-US' or 'en-GB'.\n" +
309
- "- Activating transcription_incremental routes audio to HFP same as audio_chunks — A2DP playback drops to mono.\n" +
310
- "- Lifecycle: the library auto-restarts the recognizer after each Final transcript (300ms gap on iOS, 100ms on Android) to keep the stream continuous. Developer-visible behavior: an always-on Flow / AsyncStream that never naturally ends until you cancel it. Each Transcript carries `startMs` / `endMs` relative to the current recognition task — they reset to ~0 on each restart, so don't rely on monotonic-across-restarts timing; anchor against your own clock if you need wall-clock alignment.\n" +
311
- "- Engine source per transport: phone STT via SCO mic — Android `SpeechRecognizer` (Google STT online) on RealMeta + LocalSim; iOS `SFSpeechRecognizer` (Apple STT online) on RealMeta + LocalSim; Web Speech API → Whisper fallback in BrowserSim. Surface visible in `getEventLog(filter: \"transport\")` as `transport.transcript_emitted.source` ∈ `apple_stt` / `google_stt` / `web_speech_api` / `whisper_browser`.\n\n" +
312
- "Minimal:\n" +
313
- ` { "type": "transcription_incremental", "id": "live_transcript", "config": { "language": "en-US" } }\n\n` +
314
- "─── outgoing_audio_stream ───\n" +
315
- "Phone→glasses audio (TTS, voice call audio, music). Tier: dat_native.\n\n" +
316
- "When to use: video-call audio. Custom TTS bypassing platform speak. Streaming music or generated audio.\n" +
317
- "When NOT to use: simple TTS prompts (use speak_text action — it handles routing). Bounded earcons (use earcon_beep action).\n\n" +
318
- "Config: chunk_millis (default 20), codec ('opus' | 'pcm_s16le')\n" +
319
- "Field rules:\n" +
320
- "- codec: opus for network-sourced audio (compressed). pcm_s16le only for local on-device generation where compression overhead matters.\n\n" +
321
- "─── outgoing_video_stream ───\n" +
322
- "Phone→glasses video. Tier: phone_side_workaround.\n\n" +
323
- "When to use: video-call rendering on the glasses display. Custom AR overlays (limited — no AR display surface on Ray-Ban Gen 1).\n" +
324
- "Config: resolution, frame_rate (2|7|15|24|30), codec ('hvc1' | 'h264')\n" +
325
- "Field rules:\n" +
326
- "- codec: hvc1 (HEVC) passthrough is mandatory for sustained streaming. h264 requires re-encode; thermal_warning will fire moderate severity within 5 minutes.\n\n" +
327
- "── Stream consumption ──\n" +
328
- "Stream consumers live in `ExtentosStreams.kt` / `ExtentosStreams.swift`, generated by `generateConsumer({ kind: 'stream', streams: [...] })`. The generated file uses Kotlin `Flow.collect {}` (Android) or Swift `for await … in` (iOS). Lifecycle: scope to `lifecycle` (recommended — auto-cancels on app teardown) or `connection` (only collects while glasses are Active). Toggles gate the actual data flow; set the toggle from a manual_launch trigger or from app code via `glasses.toggles.put(...)`.",
329
- keywords: ["stream", "video_frames", "audio_chunks", "transcription", "outgoing_audio", "outgoing_video", "flow", "asyncstream", "backpressure", "continuous", "hevc", "battery"],
330
- relatedTools: ["getPlatformInfo", "generateConsumer", "getPermissions"],
331
- },
332
- {
333
- topic: "trigger_types",
334
- title: "Trigger Types",
335
- content: "**Triggers fire flows.** A spec contains 0+ triggers; each describes a stimulus + an `actions[]` flow that runs on every fire. 13 trigger types grouped by stimulus class:\n\n" +
336
- "- VOICE: voice_command, wake_word, push_to_talk, fallback\n" +
337
- "- HARDWARE: hinges_closed, thermal_warning, audio_route_changed\n" +
338
- "- LIFECYCLE: manual_launch, connection_state_changed, app_lifecycle_changed\n" +
339
- "- PHONE: phone_notification_forwarded, incoming_call_detected, location_updated\n\n" +
340
- "**Future trigger types — not yet in the schema:** `capture_button`, `tap`, `double_tap` (the glasses' physical capture button + touchpad). Today these exist only as block-level stop conditions (e.g. `stop_conditions: [{ type: \"capture_button\" }]` to end a recording on button press). Validating a spec with `{ \"type\": \"capture_button\", \"actions\": [...] }` as a standalone trigger fails `validateSpec R6` (trigger type not supported). When Meta DAT exposes them as third-party trigger surfaces, Extentos will add them as standalone triggers without spec migration.\n\n" +
341
- "Per-primitive reference below. Each entry: when to use → required → optional → field rules → common mistakes → minimal example.\n\n" +
342
- "─── voice_command ───\n" +
343
- "Fire on a phrase match in STT output. The most common trigger for host-app callbacks ('delete all notes', 'take a photo', 'translate this').\n\n" +
344
- "When to use: phone-app integration via a memorable phrase. Most third-party integrations of glasses use this trigger to call back into the host app's existing logic via app_callback.\n" +
345
- "When NOT to use: single-word triggers ('start', 'go') — use wake_word with a custom Picovoice model. Continuous transcription — use stream type transcription_incremental fed by a manual_launch.\n\n" +
346
- "Required: type, phrase, actions\n" +
347
- "Optional: id, match_mode (default 'contains'), min_confidence (default 0.5), mode (default 'single')\n\n" +
348
- "Field rules:\n" +
349
- "- phrase: 3+ syllables for STT reliability. Avoid digits ('say two' — STT rarely round-trips '2'), homophones (two/to/too, four/for, here/hear, no/know, sea/see, right/write), and Meta wake words ('hey meta', 'ok meta', 'hey facebook' — reserved for Meta AI). Named captures via {{name}} must be the FINAL token in the phrase ('translate {{text}}' not '{{text}} please'). **Apostrophes, trailing punctuation, case, and whitespace runs are normalized automatically** — both `whats my move` and `what's my move` (and the STT output `What's my move?`) collapse to the same key and match identically. Write whichever spec phrase reads naturally; the matcher lowercases, drops anything that is not a letter/digit/whitespace, and collapses whitespace before comparison.\n" +
350
- "- match_mode: 'contains' (default) tolerates STT filler words ('um', 'okay') prepended/appended — almost always the right choice. 'exact' requires verbatim match — use only for short single-token phrases. 'starts_with' is brittle because STT prepends filler — prefer 'contains'.\n" +
351
- "- min_confidence: 0.5 works for clean speech. Drop to 0.3 in noisy environments (more false positives). Raise to 0.7 for destructive flows ('delete everything').\n" +
352
- "- mode: 'single' (default) ignores fires while a flow runs. 'restart' cancels in-flight on new fire (use for 'stop listening'-style toggles). 'queued' buffers up to 3 (use for 'save note' where users may rapid-fire).\n" +
353
- "- Recognition source: phone STT via SCO mic, never the glasses DSP. Android SpeechRecognizer (Google STT online); iOS SFSpeechRecognizer (Apple STT online); BrowserSim uses Web Speech API → Whisper fallback. Confidence calibration varies between engines, so a phrase that fires reliably at 0.5 in BrowserSim may need a different threshold on RealMeta — check `getEventLog(filter: 'transport')` for `transport.transcript_emitted` events that show what each recognizer produced before the matcher saw it.\n\n" +
354
- "Common mistakes:\n" +
355
- "- Single-word phrase ('delete') — STT will mishear; require 3+ syllables.\n" +
356
- "- Numeric phrase ('set timer to 5') — write 'set timer to five' and parse the spelled number in your handler.\n" +
357
- "- Two triggers whose phrases are homophones ('note' / 'no') — both will fire on either utterance.\n\n" +
358
- "Minimal:\n" +
359
- ` {\n` +
360
- ` "type": "voice_command",\n` +
361
- ` "id": "t_my_phrase",\n` +
362
- ` "phrase": "do the thing",\n` +
363
- ` "match_mode": "contains",\n` +
364
- ` "actions": [\n` +
365
- ` { "type": "app_callback", "handler": "do_the_thing", "input": {}, "save_as": "result" },\n` +
366
- ` { "type": "speak_text", "text": "Done." }\n` +
367
- ` ]\n` +
368
- ` }\n\n` +
369
- "─── wake_word ───\n" +
370
- "Fire on a custom on-device wake word (always-listening, low-power).\n\n" +
371
- "When to use: short single-word activator that has to work without prior phrase ('hey assistant', 'computer'). Continuous always-listening UX where voice_command's STT cost is too high.\n" +
372
- "When NOT to use: phrase-based triggers (use voice_command). Anything Meta's reserved wake words ('hey meta', 'hey facebook', 'ok meta') — those are not exposed to third-party apps.\n\n" +
373
- "Required: type, phrase (built-in or path to a custom Picovoice model), actions\n" +
374
- "Requires: microphone permission + Picovoice access key for custom models.\n\n" +
375
- "─── push_to_talk ───\n" +
376
- "Fire when the user holds a host-app button while speaking; commits on release.\n\n" +
377
- "When to use: explicit voice capture without keyword cost. The host app provides only the BUTTON; audio source is glasses-side, STT runs at the transport layer (browser-side in BrowserSim, glasses mic in RealMeta). The host app never handles raw audio.\n" +
378
- "Required: type, actions. Payload exposes transcript and rawUtterance.\n\n" +
379
- "Wiring — recommended path uses `audio.startPushToTalk()`, which wraps the subscribe / accumulate Final / cancel-and-flush lifecycle so the host app only handles press / release:\n\n" +
380
- "Android:\n" +
381
- ` // onPress (e.g., a Compose pointerInput onPress)\n` +
382
- ` val session = glasses.audio.startPushToTalk(scope)\n` +
383
- `\n` +
384
- ` // onRelease (suspend context — coroutine on the same scope)\n` +
385
- ` val text = session.stopAndFlush()\n` +
386
- ` glasses.runtime.fireTrigger(\n` +
387
- ` triggerId = "<your_trigger_id>",\n` +
388
- ` payload = mapOf(\n` +
389
- ` "transcript" to JsonPrimitive(text),\n` +
390
- ` "rawUtterance" to JsonPrimitive(text),\n` +
391
- ` ),\n` +
392
- ` )\n\n` +
393
- "iOS:\n" +
394
- ` // onPress\n` +
395
- ` let session = glasses.audio.startPushToTalk()\n` +
396
- `\n` +
397
- ` // onRelease (Task)\n` +
398
- ` let text = await session.stopAndFlush()\n` +
399
- ` await glasses.runtime.fireTrigger(\n` +
400
- ` triggerId: "<your_trigger_id>",\n` +
401
- ` payload: .object([\n` +
402
- ` "transcript": .string(text),\n` +
403
- ` "rawUtterance": .string(text),\n` +
404
- ` ])\n` +
405
- ` )\n\n` +
406
- "Under the hood the helper subscribes to `audio.transcriptions()` (the same stream the rest of the library uses) and accumulates `Transcript.Final` segments; `stopAndFlush()` cancels the subscription and returns the joined text. Audio source is glasses-side on both platforms — no phone mic permission, no `SpeechRecognizer` / `SFSpeechRecognizer` involvement, works identically on emulator and real hardware. The trigger's `actions[]` flow then runs with `{{trigger_payload.transcript}}` available for app_callback inputs.\n\n" +
407
- "Common mistakes:\n" +
408
- "- Reaching for Android `SpeechRecognizer` or iOS `SFSpeechRecognizer` directly — bypasses the library, doesn't work in BrowserSim (the emulator mic doesn't deliver audio to platform STT), and rebuilds what `audio.startPushToTalk()` already wraps.\n" +
409
- "- Confusing this with the `record_audio` BLOCK — that's silence-timeout-committed (good for record-then-pause), not press-and-hold UX.\n" +
410
- "- Manually wiring `audio.transcriptions().collect { ... }` + `cancelAndJoin` + a buffer when the helper exists — the ad-hoc version was the canonical pattern before `startPushToTalk()` shipped, but it's now boilerplate.\n\n" +
411
- "Minimal:\n" +
412
- ` { "type": "push_to_talk", "id": "t_record_note", "actions": [{ "type": "app_callback", "handler": "save_voice_note", "input": { "transcript": "{{trigger_payload.transcript}}" }, "save_as": "result" }, { "type": "earcon_beep", "sound": "confirmation" }] }\n\n` +
413
- "─── fallback ───\n" +
414
- "Catches any unrecognized voice utterance that didn't match a voice_command. Max 1 per spec.\n\n" +
415
- "When to use: graceful 'sorry, I didn't catch that' UX. Always include if the spec has any voice_command.\n" +
416
- "Required: type, actions. No phrase — it matches by exclusion.\n\n" +
417
- "Minimal:\n" +
418
- ` { "type": "fallback", "id": "t_fallback", "actions": [{ "type": "speak_text", "text": "Sorry, I didn't catch that." }] }\n\n` +
419
- "─── manual_launch ───\n" +
420
- "Fires when the host app calls `glasses.runtime.fireTrigger(\"<your_trigger_id>\")` — typically from a button or other host-side affordance.\n\n" +
421
- "When to use: explicit start of a session-bound flow ('start translation mode', 'begin recording'). Often paired with a set_toggle action to flip transcription_enabled on.\n" +
422
- "Required: type, actions. No payload required at the spec level — payload comes from the host app's `fireTrigger` call (optional Map<String, JSONValue>).\n\n" +
423
- "Wiring: the host app reaches its ExtentosGlasses instance (same one created in ExtentosBootstrap) and calls `glasses.runtime.fireTrigger(\"t_start\")` from a coroutine context. The actions[] flow runs immediately. Identical API on iOS.\n\n" +
424
- "Common mistakes:\n" +
425
- "- Looking for `runtime.invokeManualLaunch()` — that method does NOT exist. The unified API for `manual_launch` is `runtime.fireTrigger(triggerId, payload)`. (For `push_to_talk` the recommended path is `audio.startPushToTalk()` + `session.stopAndFlush()`, which wraps the transcript collection then calls `fireTrigger` for you — see the push_to_talk section above.)\n\n" +
426
- "Minimal:\n" +
427
- ` { "type": "manual_launch", "id": "t_start", "actions": [{ "type": "set_toggle", "key": "transcription_enabled", "value": true }] }\n\n` +
428
- "─── hinges_closed ───\n" +
429
- "Fires when the glasses' hinges close (user takes them off).\n" +
430
- "When to use: pause-on-removal UX, save-state on close. Pair with set_toggle to disable streams.\n\n" +
431
- "─── thermal_warning ───\n" +
432
- "Fires on device thermal-state escalation. Severity: 'light' | 'moderate' | 'severe' | 'critical'.\n\n" +
433
- "When to use: protect battery + extend session — listen for severity 'moderate' or higher and downgrade quality. Most camera/streaming specs include a thermal_warning trigger that flips battery_save_mode on. The `thermal_throttle` preset on initSpec auto-injects this.\n" +
434
- "Required: type, severity, actions.\n\n" +
435
- "─── connection_state_changed ───\n" +
436
- "Fires on glasses-connection-state transitions. Use on_transition.from / on_transition.to to gate on specific transitions.\n\n" +
437
- "When to use: announce connect/disconnect, persist state across reconnects. The `disconnect_announce` preset on initSpec auto-injects a sensible default.\n" +
438
- "Payload: from, to, cause (all GlassesState bucket strings).\n\n" +
439
- "─── audio_route_changed ───\n" +
440
- "Fires when audio routing changes (BT pairing/unpairing, A2DP↔HFP swap).\n" +
441
- "Payload: from, to. Useful for graceful audio-mode handoff.\n\n" +
442
- "─── phone_notification_forwarded ───\n" +
443
- "Fires when a phone notification matching the app's filter is forwarded to glasses TTS.\n\n" +
444
- "Payload: packageName, title, body.\n" +
445
- "Requires: Android BIND_NOTIFICATION_LISTENER_SERVICE (user-granted at runtime); iOS UNUserNotificationCenter delegate (system notifications only — third-party app banners not accessible).\n\n" +
446
- "─── incoming_call_detected ───\n" +
447
- "Fires when an incoming phone call is detected.\n" +
448
- "Payload: callerNumber, callerName.\n" +
449
- "Requires: Android READ_PHONE_STATE permission; iOS CallKit observer.\n\n" +
450
- "─── location_updated ───\n" +
451
- "Fires periodically with current GPS coordinates. Throttle via min_interval_millis + min_displacement_meters to control battery cost.\n\n" +
452
- "Required: type, actions.\n" +
453
- "Optional: min_interval_millis (default 5000), min_displacement_meters (default 10).\n" +
454
- "Payload: latitude, longitude, accuracyMeters.\n" +
455
- "Requires: ACCESS_FINE_LOCATION (Android) or NSLocationWhenInUseUsageDescription (iOS).\n\n" +
456
- "─── app_lifecycle_changed ───\n" +
457
- "Fires when the host app's lifecycle state changes ('foreground' / 'background' / 'destroyed').\n" +
458
- "Use to pause streaming or save state when backgrounded.\n\n" +
459
- "── Presets ──\n" +
460
- "Three canned trigger shapes can be auto-injected via initSpec.presetTriggers: 'fallback_default' (a fallback trigger with sensible 'sorry' speech), 'thermal_throttle' (thermal_warning trigger flipping battery_save_mode), 'disconnect_announce' (connection_state_changed announcer). Pass the preset names to initSpec; do not duplicate the trigger shape manually if you use the preset.",
461
- keywords: ["trigger", "voice_command", "tap", "capture_button", "wake_word", "fallback", "notification", "thermal", "preset", "phrase", "match_mode", "homophone"],
462
- relatedTools: ["getPlatformInfo", "getVoiceCommandGuidance", "updateSpec"],
463
- },
464
- {
465
- topic: "action_types",
466
- title: "Action Types",
467
- content: "**Actions are steps inside a trigger's flow.** Each trigger's actions[] runs sequentially when the trigger fires. Actions can be flat or nested via control-flow actions (branch, when_connected, retry_with_backoff). 15 action types grouped by purpose:\n\n" +
468
- "- HOST-APP BRIDGE: app_callback (the most important — calls into developer code)\n" +
469
- "- MEDIA: block_call, speak_text, earcon_beep\n" +
470
- "- STATE: set_toggle, set_variable\n" +
471
- "- CONTROL FLOW: branch, when_connected, retry_with_backoff, buffer_and_replay, delay, abort, ask_user\n" +
472
- "- DIAGNOSTICS: log\n\n" +
473
- "─── app_callback ───\n" +
474
- "**The single most important action.** Routes flow control from the spec into the developer's native code. This is how 'voice phrase → my app does the thing' works.\n\n" +
475
- "When to use: ANY time the spec needs to invoke domain logic that lives in the host app — DB writes, AI calls, repository ops, navigation, file IO, network requests, anything that's not a stock primitive. Always wrap arbitrary host-app behavior in an app_callback rather than trying to express it as composed primitives.\n" +
476
- "When NOT to use: pure spec-side state changes (use set_toggle / set_variable). Stock TTS (use speak_text). Capturing media (use block_call).\n\n" +
477
- "Required: type, handler (string — the handler name registered in ExtentosCallbacks)\n" +
478
- "Optional: input (object — payload passed to the handler; values support {{template}} substitution from prior save_as variables), save_as (variable name to capture the handler's return value)\n\n" +
479
- "Field rules:\n" +
480
- "- handler: must match a name passed to generateConsumer(kind: 'callback'). Generated stub lives in `ExtentosCallbacks.kt`/`.swift` with USER-CODE markers; the developer fills in the body.\n" +
481
- "- input: keys are arbitrary, values can be literals OR {{template}} references to prior save_as outputs. Example: `{ image: '{{photo.uri}}', prompt: 'Describe this scene.' }`.\n" +
482
- "- save_as: the handler's `Success` payload becomes a variable accessible as `{{result}}` (or `{{result.field}}`). The handler's `Error` short-circuits the flow.\n" +
483
- "- Timeouts surface as 'callback.timeout' events on the runtime event log.\n\n" +
484
- "Common mistakes:\n" +
485
- "- Forgetting to call `generateConsumer({ kind: 'callback', handlers: [...] })` after adding a new app_callback — the spec validates fine, but at runtime the handler is missing and the flow errors.\n" +
486
- "- Passing `suggestedProvider` to generateConsumer for non-AI handlers — the generated comment will say 'wire your AI provider client here', misleading. Only pass suggestedProvider when the handler actually wraps an AI API.\n" +
487
- "- Trying to express sequential domain logic as multiple app_callback calls when one handler with branching logic would be cleaner. The spec is for hardware orchestration; complex business logic belongs inside the handler.\n\n" +
488
- "Minimal:\n" +
489
- ` { "type": "app_callback", "handler": "clear_notes", "input": {}, "save_as": "result" }\n\n` +
490
- "With templated input from a prior block_call:\n" +
491
- ` { "type": "app_callback", "handler": "describe_image", "input": { "image": "{{photo.uri}}" }, "save_as": "description" }\n\n` +
492
- "─── block_call ───\n" +
493
- "Invoke a declared block (capture_photo, capture_video, record_audio).\n\n" +
494
- "Required: type, block_id (must reference an id from spec.blocks[])\n" +
495
- "Optional: save_as (capture the block's return value as a variable)\n\n" +
496
- "Minimal:\n" +
497
- ` { "type": "block_call", "block_id": "main_camera", "save_as": "photo" }\n\n` +
498
- "─── speak_text ───\n" +
499
- "TTS via the platform speech synthesizer. Routes audio output appropriately based on coexistence policy.\n\n" +
500
- "Required: type, text (string — supports {{template}} substitution)\n\n" +
501
- "Field rules:\n" +
502
- "- text: keep under ~140 characters for snappy UX. Long TTS is interrupting and battery-expensive.\n" +
503
- "- Calling speak_text while transcription_enabled is on routes audio to HFP — the user briefly loses A2DP music if any.\n\n" +
504
- "Minimal:\n" +
505
- ` { "type": "speak_text", "text": "Done." }\n` +
506
- ` { "type": "speak_text", "text": "Hello {{user.name}}." }\n\n` +
507
- "─── earcon_beep ───\n" +
508
- "Short audio cue (canned sounds, no synthesis). Cheaper and faster than speak_text for confirmations.\n\n" +
509
- "Required: type, sound ('confirmation' | 'error' | 'notification' | 'start' | 'stop')\n" +
510
- "Optional: volume (0.0-1.0, default 0.6)\n\n" +
511
- "─── set_toggle ───\n" +
512
- "Mutate a runtime toggle (transcription_enabled, camera_streaming_enabled, privacy_mode, etc.). See `toggles` topic for the 8 toggle keys.\n\n" +
513
- "Required: type, key, value\n\n" +
514
- "Common mistakes:\n" +
515
- "- Toggling transcription_enabled without first declaring a transcription_incremental stream — the toggle has no effect.\n" +
516
- "- Setting privacy_mode true and forgetting to unset it later — privacy_mode suppresses ALL captures until cleared.\n\n" +
517
- "Minimal:\n" +
518
- ` { "type": "set_toggle", "key": "transcription_enabled", "value": true }\n\n` +
519
- "─── set_variable ───\n" +
520
- "Write a flow-scoped variable. Use for synthesized values (counters, computed flags) that subsequent actions reference.\n\n" +
521
- "Required: type, name, value (literal or {{template}})\n" +
522
- "Variables are flow-scoped — they live for the duration of a single trigger fire and don't persist across fires (use set_toggle for cross-fire state).\n\n" +
523
- "─── branch ───\n" +
524
- "Conditional execution. The single control-flow primitive — there is no `if`/`else` outside branch.\n\n" +
525
- "Required: type, condition, then[]\n" +
526
- "Optional: else[]\n\n" +
527
- "Condition shape: `{ variable: '<dotted.path>', operator: '<op>', value?: <literal> }`\n" +
528
- "Operators: 'equals', 'not_equals', 'is_empty', 'not_empty', 'greater_than', 'less_than'\n\n" +
529
- "Field rules:\n" +
530
- "- variable: dotted path to a save_as'd value or a {{trigger_payload.x}} field. Examples: 'result.description', 'note.transcript', 'toggles.privacy_mode', 'trigger_payload.captures.query'.\n" +
531
- "- Branch depth: max 1 — `then` and `else` arms cannot contain another `branch`. validateSpec rejects nested branches with `branch is nested inside another branch; max depth is 1.` (R9 enforcement at validateSpec.ts:336). If you need deeper branching, the logic should move into an app_callback handler that returns a discriminator + a single branch on the discriminator.\n\n" +
532
- "Common mistakes:\n" +
533
- "- Comparing a typed value with a string literal: `{ variable: 'count', operator: 'equals', value: '5' }` when count is a number — use the numeric literal `value: 5`.\n" +
534
- "- Nesting branches more than 1 deep — refactor into an app_callback handler that returns a discriminator + a single branch on the discriminator.\n\n" +
535
- "Minimal:\n" +
536
- ` {\n` +
537
- ` "type": "branch",\n` +
538
- ` "condition": { "variable": "result.description", "operator": "is_empty" },\n` +
539
- ` "then": [{ "type": "speak_text", "text": "No result." }],\n` +
540
- ` "else": [{ "type": "speak_text", "text": "{{result.description}}" }]\n` +
541
- ` }\n\n` +
542
- "─── when_connected ───\n" +
543
- "Wraps an action that should only run while glasses are Active. If not connected, optionally waits with timeout.\n\n" +
544
- "Required: type, action (the wrapped action)\n" +
545
- "Optional: timeout_seconds\n\n" +
546
- "Use when: a flow can be fired by phone-side logic before the glasses are connected (e.g., manual_launch from a phone UI button) and you want graceful deferral.\n\n" +
547
- "─── retry_with_backoff ───\n" +
548
- "Retries an action (typically app_callback) on failure with exponential backoff.\n\n" +
549
- "Required: type, action\n" +
550
- "Optional: max_attempts (default 3), initial_delay_seconds (default 1)\n\n" +
551
- "Field rules:\n" +
552
- "- Succeeds when the wrapped action populates its save_as. Failure paths bubble up to flow termination unless caught by a branch.\n" +
553
- "- Use sparingly — most flows should fail fast; only retry idempotent ops (network reads, etc.).\n\n" +
554
- "─── buffer_and_replay ───\n" +
555
- "Delays action execution until glasses connect, replaying buffered actions in order.\n" +
556
- "Niche — use when_connected for the common case.\n\n" +
557
- "─── delay ───\n" +
558
- "Pause flow execution for N seconds.\n\n" +
559
- "Required: type, seconds (number > 0 and ≤ 3600, literal only — not templated)\n\n" +
560
- "Two legitimate patterns, with very different sane ranges:\n" +
561
- "1. **Response delay** — short pause between actions in a single user-facing flow (e.g., between two speak_text actions). Keep ≤ 5s; users expect glasses to feel instant.\n" +
562
- "2. **Session timeout / window** — bound a longer-running mode like auto-stop transcription after 30s, daily reminder window, etc. Up to 3600s is fine here. Common shape: `[set_toggle on, delay 30, branch on toggle state, set_toggle off]` so a stop-trigger can short-circuit by flipping the toggle off mid-window.\n\n" +
563
- "Field rules:\n" +
564
- "- seconds must be a number literal — `{{some_var}}` is rejected. If you need dynamic delay, compute it in an app_callback handler.\n" +
565
- "- A delay action blocks the trigger's flow until it elapses. With trigger.mode='single' (default), other fires of the SAME trigger are ignored during the delay. To allow re-firing mid-window, use mode='restart' (cancels the in-flight flow on re-fire).\n" +
566
- "- delay cannot be cancelled by another trigger's flow. If you need a 'stop' trigger to abort an in-flight delay, the canonical pattern is the toggle-gated branch shown above — the long flow's later actions check the toggle state and no-op if the stop trigger flipped it off.\n\n" +
567
- "─── abort ───\n" +
568
- "Terminates the flow immediately. Use for early-exit on error conditions inside a branch's then[].\n\n" +
569
- "─── ask_user ───\n" +
570
- "Voice Q&A: speaks the `question` via TTS, captures the user's spoken response into `save_as`. **NOT for LLM calls** — the action's name says what it does (ask the user). For LLM/AI calls, use `app_callback` with a developer-implemented handler (the `prompt` field of that handler's input is the natural-sense LLM prompt).\n" +
571
- "Required: type, question, save_as\n" +
572
- "Optional: timeout_seconds (default 5, max 30), min_confidence (default 0.4)\n\n" +
573
- "─── log ───\n" +
574
- "Emit a structured log event into the runtime event log (visible in getEventLog).\n\n" +
575
- "Required: type, level ('debug' | 'info' | 'warning' | 'error'), message (supports {{template}})\n" +
576
- "Use to mark flow milestones for diagnostics — particularly useful inside branch arms to trace which path executed.\n\n" +
577
- "── Composition pattern ──\n" +
578
- "The canonical 'voice phrase → host app → response' shape:\n" +
579
- ` { "type": "app_callback", "handler": "my_handler", "input": { ... }, "save_as": "result" },\n` +
580
- ` { "type": "branch",\n` +
581
- ` "condition": { "variable": "result.ok", "operator": "equals", "value": true },\n` +
582
- ` "then": [{ "type": "speak_text", "text": "{{result.message}}" }],\n` +
583
- ` "else": [{ "type": "speak_text", "text": "Sorry, that failed." }]\n` +
584
- ` }\n\n` +
585
- "The canonical 'photo + AI → speak result' shape:\n" +
586
- ` { "type": "block_call", "block_id": "main_camera", "save_as": "photo" },\n` +
587
- ` { "type": "app_callback", "handler": "describe_image", "input": { "image": "{{photo.uri}}" }, "save_as": "description" },\n` +
588
- ` { "type": "speak_text", "text": "{{description.text}}" }\n`,
589
- keywords: ["action", "block_call", "app_callback", "branch", "retry", "speak", "set_toggle", "set_variable", "delay", "log", "earcon", "ask_user", "prompt", "host-app", "callback"],
590
- relatedTools: ["updateSpec", "generateConsumer"],
157
+ content: "Extentos is a pure Kotlin/Swift SDK for adding Meta Ray-Ban glasses features to an existing mobile app. The customer writes normal app code that calls `glasses.audio.X`, `glasses.camera.X`, etc. directly — no spec language, no DSL, no triggers/actions/blocks, no callback dispatch. The library bridges hardware; the customer's code is the application logic.\n\n" +
158
+ "**Prerequisite: host app must already exist.** `generateConnectionModule` scaffolds Extentos INTO an existing Compose / SwiftUI host app it does not create the host app itself. CLI agents starting from scratch should fetch `searchDocs(topic: 'host_app_scaffold')` first for a copy-pasteable Compose / SwiftUI project template; Android Studio / Xcode users can use the New Project wizard.\n\n" +
159
+ "**Tool sequence (deterministic, in order):**\n\n" +
160
+ "(1) `getPlatformInfo` — fetch capability surface (compact: feature names + categories + tiers, ~2KB) and library version. The `features` list is the SDK's vocabulary: `capture_photo`, `record_audio`, `transcription_incremental`, `voice_command`, etc. Use these names in subsequent tool calls.\n\n" +
161
+ "(2) `generateConnectionModule`one-shot scaffold (bootstrap + dependencies + connection-page wiring). Surfaces a developerInstructions Step 1 prompt to ask the dev about `ExtentosConnectionPage` placement; do not skip it.\n\n" +
162
+ "(3) Write the handler code in Kotlin/Swift directly. Two tools help you compose against the SDK correctly:\n" +
163
+ " - `getCodeExample(pattern)`full canonical compositions in both languages (voice_qa_assistant for the multi-turn wake+question+LLM+speak flow, barge_in_speak for cancel-TTS-on-interrupt, photo_describe_voice for voice-activated vision, live_transcription_ui for transcripts into UI state, voice_notes for wake+record+save, connection_page_setup for bootstrap wiring). Peel from these; don't copy whole.\n" +
164
+ " - `getCapabilityGuide(feature)`per-feature minimal usage + gotchas. Pair with getPlatformInfo: features tells you what exists, getCapabilityGuide tells you how to call it.\n" +
165
+ " Or read `searchDocs(topic: 'custom_handlers')` for the conceptual framing (you write a class that subscribes to SDK Flows and calls SDK methods — no spec runtime, no callback dispatch).\n\n" +
166
+ "(4) `getPermissions({ capabilities: [...], platform })`derive Android permissions / iOS Info.plist keys / Meta DAT scopes from the features the app uses. Apply manifestEntries to AndroidManifest.xml or plistKeys to Info.plist. Run again whenever the capability set changes.\n\n" +
167
+ "(5) `validateIntegration` — pre-test correctness gate. Checks manifest, generated files, dependency, permissions vs declared capabilities, bootstrap wiring, toolchain versions. Re-run after every mutation.\n\n" +
168
+ "(6) `createSimulatorSession` — provision a browser simulator session to test end-to-end. Get-or-create: same sim resumes on subsequent calls.\n\n" +
169
+ "(7) `getEventLog` — primary debug tool. Returns the structured event trace for a session: stream lifecycle, toggle changes, connection state transitions, transport errors. Filter by 'errors' for warn/error rows; filter by 'transport' for connection/protocol layer.\n\n" +
170
+ "**Iteration loop DO NOT mint a new sim per change.** `createSimulatorSession` is get-or-create: it returns the saved sim for this project (`status:'resumed'`) after the first call. Two rates of change:\n" +
171
+ "(a) APP-CODE edits (Kotlin/Swift) → rebuild + reinstall, the library reattaches to the same saved sim, no remint.\n" +
172
+ "(b) FORCE-FRESH (clean-slate ID rotation) `createSimulatorSession({ resetFresh: true })` OR click 'Reset' on the dashboard at extentos.com/s.\n" +
173
+ "URL-bake apps rebuild once after force-fresh. For agents that can rebuild app code themselves: do it inline. Otherwise hand the dev rebuild + reinstall commands; same auto-attach behavior on the other end. Full mechanics in `searchDocs(topic: 'simulator_browser_mode')`.\n\n" +
174
+ "**Multi-platform projects (Android + iOS = ONE project).** When a developer has both Android and iOS versions of the same app, the dashboard groups them under one project IF the Android `applicationId` matches the iOS bundle identifier (standard reverse-DNS convention). When they don't match, two separate projects appear — resolve via dashboard's Danger Zone → Merge. Full guidance: `searchDocs(topic: 'multi_platform_projects')`.\n\n" +
175
+ "**Production:** `getProductionChecklist({ capabilities, handlers, platform })` for a personalized readiness gate. `getCredentialGuide({ services, platform })` for BYOK provider setup (Anthropic / OpenAI / Gemini / etc.) plus Meta DAT registration.",
176
+ keywords: ["setup", "start", "first", "begin", "overview", "walkthrough", "tool order", "workflow", "catalog", "debug", "iteration", "rebuild", "remint", "dev loop", "sdk", "pure-sdk"],
177
+ relatedTools: ["getPlatformInfo", "generateConnectionModule", "searchDocs", "validateIntegration", "getEventLog", "createSimulatorSession", "getPermissions", "getProductionChecklist", "getCredentialGuide"],
591
178
  },
592
179
  {
593
180
  topic: "voice_ux_guide",
594
181
  title: "Voice Command UX",
595
- content: "Phrases should be 3+ syllables, free of homophones, free of digits (STT rarely round-trips them). Named captures {{query}} must be the final token. Provide 3+ alternates for natural paraphrase. Avoid Meta wake words (hey meta, hey facebook, ok meta) — they conflict with on-device Meta AI. Default match_mode \"contains\" tolerates paraphrase; if you set match_mode: \"exact\", add 3+ alternates.\n\n" +
596
- "**Apostrophes, punctuation, case, and whitespace are normalized automatically.** The matcher lowercases both the spec phrase and the STT transcript, drops anything that is not a letter / digit / whitespace (apostrophes, periods, question marks, commas, etc.), and collapses whitespace runs before comparison. So `whats my move`, `what's my move`, and the STT output `What's my move?` all collapse to `whats my move` and match identically. Write whichever spec phrase reads naturally; both forms work, and you don't need to add alternates just to cover the apostrophe form.\n\n" +
597
- "Run candidate phrases through `getVoiceCommandGuidance` to surface UX issues (homophones, syllable count, captures-not-at-end, etc.) before adding them.",
598
- keywords: ["voice", "phrase", "command", "homophone", "capture", "stt", "disambiguation", "alternate", "contraction", "apostrophe", "normalization", "punctuation"],
599
- relatedTools: ["getVoiceCommandGuidance", "updateSpec"],
600
- },
601
- {
602
- topic: "spec_format",
603
- title: "Spec Format",
604
- content: "Top-level: extentos_version ('1.0'), target.vendor ('meta_rayban'), blocks[], streams[], triggers[]. Each block/stream/trigger has a unique id. Triggers declare a type, optional id, and an actions[] array. Spec file lives at app/src/main/assets/extentos.spec.json (Android) or Resources/extentos.spec.json (iOS), byte-identical across platforms.\n\n" +
605
- "Minimal valid spec — voice phrase that calls back into the host app (the smallest useful shape):\n" +
606
- ` {\n` +
607
- ` "$schema": "extentos://schema/v1",\n` +
608
- ` "extentos_version": "1.0",\n` +
609
- ` "target": { "vendor": "meta_rayban" },\n` +
610
- ` "blocks": [],\n` +
611
- ` "streams": [],\n` +
612
- ` "triggers": [\n` +
613
- ` {\n` +
614
- ` "type": "voice_command",\n` +
615
- ` "id": "t_my_phrase",\n` +
616
- ` "phrase": "do the thing",\n` +
617
- ` "match_mode": "contains",\n` +
618
- ` "actions": [\n` +
619
- ` { "type": "app_callback", "handler": "do_the_thing", "input": {}, "save_as": "result" },\n` +
620
- ` { "type": "speak_text", "text": "Done." }\n` +
621
- ` ]\n` +
622
- ` }\n` +
623
- ` ]\n` +
624
- ` }\n\n` +
625
- "Add blocks[] entries when capturing photo/video/audio. Add streams[] entries for continuous flows (transcription, video frames). See block_types, stream_types, trigger_types, action_types for inline examples of each. To MUTATE an existing spec via updateSpec, see spec_operations for the 13 patch operation shapes.",
626
- keywords: ["spec", "format", "structure", "extentos_version", "target", "schema", "top-level"],
627
- relatedTools: ["validateSpec", "initSpec", "updateSpec", "inspectIntegration"],
628
- },
629
- {
630
- topic: "spec_operations",
631
- title: "Spec Operations (updateSpec patch types)",
632
- content: "**updateSpec applies an ordered list of structured patch operations to an existing spec.** Each operation is one mutation; failed ops are reported in `skipped[]` and the rest still apply. **13 operation types** organized by target:\n\n" +
633
- "BLOCKS: addBlock, removeBlock, updateBlock\n" +
634
- "STREAMS: addStream, removeStream, updateStream\n" +
635
- "TRIGGERS: addTrigger, removeTrigger, updateTrigger\n" +
636
- "ACTIONS (inside a trigger's flow): addAction, removeAction, updateAction\n" +
637
- "METADATA: updateMeta\n\n" +
638
- "Common shape: `{ \"op\": \"<name>\", ...fields }`. Per-op field rules + canonical examples below.\n\n" +
639
- "─── addBlock / addStream / addTrigger ───\n" +
640
- "Append a fully-formed entity to spec.blocks / spec.streams / spec.triggers.\n\n" +
641
- "Required: op, value (the full entity object — must include `id` to be addressable later)\n" +
642
- "Errors (op skipped): \"id already exists in <collection>\" if value.id collides with an existing entry.\n\n" +
643
- "Minimal:\n" +
644
- ` { "op": "addTrigger", "value": { "type": "voice_command", "id": "t_save", "phrase": "save it", "match_mode": "contains", "actions": [{ "type": "speak_text", "text": "Saved." }] } }\n` +
645
- ` { "op": "addStream", "value": { "type": "transcription_incremental", "id": "live_transcription", "config": { "language": "en-US" } } }\n` +
646
- ` { "op": "addBlock", "value": { "type": "capture_photo", "id": "main_camera" } }\n\n` +
647
- "─── removeBlock / removeStream / removeTrigger ───\n" +
648
- "Remove the entity by id.\n\n" +
649
- "Required: op, id (or value.id — both accepted)\n" +
650
- "Skipped (not error): \"<collection> id not found\" — safe to retry idempotently.\n\n" +
651
- "Minimal:\n" +
652
- ` { "op": "removeTrigger", "id": "t_save" }\n\n` +
653
- "─── updateBlock / updateStream / updateTrigger ───\n" +
654
- "Shallow-merge `value` into the existing entity. The id field is preserved (cannot be renamed via update — to rename, remove + add).\n\n" +
655
- "Required: op, id, value (the partial patch — only the fields to change)\n" +
656
- "Skipped: id not found.\n\n" +
657
- "Minimal — change a phrase:\n" +
658
- ` { "op": "updateTrigger", "id": "t_save", "value": { "phrase": "save the note" } }\n\n` +
659
- "─── addAction ───\n" +
660
- "Insert an action into a trigger's actions[] flow.\n\n" +
661
- "Required: op, **triggerId** (NOT id — addresses the parent trigger), value (the action object)\n" +
662
- "Optional: actionIndex (0-based insert position; default = end of array)\n" +
663
- "Skipped: triggerId not found.\n\n" +
664
- "Minimal — append to end:\n" +
665
- ` { "op": "addAction", "triggerId": "t_save", "value": { "type": "speak_text", "text": "Saved." } }\n\n` +
666
- "Insert at index 1:\n" +
667
- ` { "op": "addAction", "triggerId": "t_save", "actionIndex": 1, "value": { "type": "delay", "seconds": 1 } }\n\n` +
668
- "─── removeAction ───\n" +
669
- "Remove an action by index.\n\n" +
670
- "Required: op, triggerId, actionIndex\n" +
671
- "Skipped: trigger not found, OR actionIndex out of range.\n\n" +
672
- "Minimal:\n" +
673
- ` { "op": "removeAction", "triggerId": "t_save", "actionIndex": 1 }\n\n` +
674
- "─── updateAction ───\n" +
675
- "Shallow-merge `value` into the action at actionIndex.\n\n" +
676
- "Required: op, triggerId, actionIndex, value\n" +
677
- "Skipped: trigger not found, OR actionIndex out of range.\n\n" +
678
- "Minimal — change TTS text without changing action type:\n" +
679
- ` { "op": "updateAction", "triggerId": "t_save", "actionIndex": 1, "value": { "text": "Saved successfully." } }\n\n` +
680
- "─── updateMeta ───\n" +
681
- "Patch top-level spec fields OTHER than blocks/streams/triggers (e.g., `target.vendor`, `extentos_version`).\n\n" +
682
- "Required: op, value (top-level patch)\n" +
683
- "Skipped: value contains `blocks`, `streams`, or `triggers` (use the dedicated ops for those).\n\n" +
684
- "Minimal:\n" +
685
- ` { "op": "updateMeta", "value": { "extentos_version": "1.0" } }\n\n` +
686
- "── Common patterns ──\n" +
687
- "- **Add a trigger and chain its actions in one updateSpec call**: pass [addTrigger, addAction, addAction, ...] in order. Operations run sequentially within a single call — the addTrigger lands first, then subsequent addActions can target it by id.\n" +
688
- "- **Idempotent reorder**: removeAction by index, then addAction with the desired actionIndex.\n" +
689
- "- **Partial-apply semantics**: each op runs independently; a single bad op fills `skipped[]` while the rest still apply. Use `applied[]` and `skipped[]` from the response to confirm what landed.\n\n" +
690
- "── Common mistakes ──\n" +
691
- "- Using `id` for action ops where `triggerId` is required — actions are addressed by their parent trigger + array index, never by their own id.\n" +
692
- "- Trying to rename via update*: id is preserved through update, never overwritten. To rename, removeBlock/Stream/Trigger + addBlock/Stream/Trigger.\n" +
693
- "- updateMeta with blocks/streams/triggers in value — these have dedicated ops; updateMeta refuses.\n" +
694
- "- Passing `{op:\"add\", path:\"/triggers/-\", value:...}` (JSON Patch RFC 6902 shape) — NOT supported. Use the domain ops above.",
695
- keywords: [
696
- "operations",
697
- "patch",
698
- "addTrigger",
699
- "addAction",
700
- "addStream",
701
- "addBlock",
702
- "removeTrigger",
703
- "updateTrigger",
704
- "updateAction",
705
- "triggerId",
706
- "actionIndex",
707
- "updateMeta",
708
- "mutate",
709
- "edit spec",
710
- ],
711
- relatedTools: ["updateSpec", "inspectIntegration", "validateSpec"],
712
- },
713
- {
714
- topic: "spec_validation_rules",
715
- title: "Spec Validation Rules",
716
- content: "11 rules: R1 extentos_version, R2 target.vendor, R3 block type supported, R4 stream type supported, R5 id pattern ^[a-z][a-z0-9_]{0,31}$ and uniqueness, R6 trigger type supported, R7 action type supported, R8 app_callback handler name, R9 branch depth ≤ 1 (then/else arms cannot contain another branch — refactor deeper logic into an app_callback handler), R10 template variable resolvable, R11 toggle key in allowed set.",
717
- keywords: ["validation", "rule", "error", "lint", "r1", "r5", "branch depth", "id regex"],
718
- relatedTools: ["validateSpec", "validateIntegration"],
719
- },
720
- {
721
- topic: "template_syntax",
722
- title: "Template Variable Syntax",
723
- content: "Curly-brace interpolation: {{key}} resolves against trigger payload or variables. Dot paths: {{trigger_payload.captures.query}}, {{toggles.listening_mode}}, {{hardware_state.thermal}}. Special namespaces: trigger_payload (per-trigger fields), toggles (runtime state), hardware_state (thermal/battery/audio route/connected). Unresolvable paths fail R10.",
724
- keywords: ["template", "variable", "substitution", "capture", "interpolation", "curly", "{{"],
725
- relatedTools: ["validateSpec", "updateSpec"],
182
+ content: "Voice phrases are plain strings post-pivot your handler subscribes to `glasses.audio.transcriptions()`, gets a `Transcript` (sealed: Partial / Final), and decides what counts as a match. There is no DSL matcher, no `{{query}}` captures, no `match_mode: \"exact\"` vs `\"contains\"`. The matching policy is yours.\n\n" +
183
+ "── Phrase-design rules (UX, not matcher behaviour) ──\n\n" +
184
+ "These are what `getVoiceCommandGuidance` flags. They're UX rules, not parser rules you'll hit them no matter what string-comparison code you write.\n\n" +
185
+ "- **3+ syllables.** Single-syllable phrases mis-trigger on background speech.\n" +
186
+ "- **No homophones with common speech.** \"Right\" matches \"write\" / \"rite\"; STT can't disambiguate from context.\n" +
187
+ "- **No digits.** STT round-trips digits inconsistently — \"two\" vs \"2\" vs \"to\" vs \"too\". Spell them, or use ordinals (\"first / second / third\").\n" +
188
+ "- **Avoid Meta hardware wake words** — \"hey meta\", \"hey facebook\", \"ok meta\". These ALSO trigger Meta's on-device assistant, which can intercept the mic before your handler sees the transcript.\n" +
189
+ "- **3+ alternates per intent.** Natural speech varies; if you only match \"start recording\" you'll miss \"begin recording\" / \"record this\". Iterate from real dogfood transcripts.\n\n" +
190
+ "── Apostrophe / punctuation handling ──\n\n" +
191
+ "STT engines emit different casings, punctuations, and apostrophe shapes for the same utterance. The library does NOT normalize for you `transcript.text` is what the recognizer returned, verbatim. Normalize on the read side in your handler:\n\n" +
192
+ " Kotlin:\n" +
193
+ " fun normalize(s: String) = s.lowercase()\n" +
194
+ " .replace(Regex(\"[^a-z0-9\\\\s]\"), \"\")\n" +
195
+ " .replace(Regex(\"\\\\s+\"), \" \").trim()\n\n" +
196
+ " Swift:\n" +
197
+ " func normalize(_ s: String) -> String {\n" +
198
+ " s.lowercased()\n" +
199
+ " .components(separatedBy: CharacterSet.alphanumerics.union(.whitespaces).inverted).joined()\n" +
200
+ " .components(separatedBy: .whitespaces).filter { !$0.isEmpty }.joined(separator: \" \")\n" +
201
+ " }\n\n" +
202
+ "Match `normalize(transcript.text).contains(normalize(phrase))` to cover apostrophe / punctuation / case variance with one rule.\n\n" +
203
+ "── When to call getVoiceCommandGuidance ──\n\n" +
204
+ "Pass the phrases you're about to wire into your handler (one MCP call per batch). The tool surfaces homophones, length issues, digit-usage, Meta wake-word collisions, and ambiguity against existing phrases (pass `existingPhrases` so it can compare). Run it BEFORE shipping the phrase strings — runtime debugging tells you it didn't match, but never why.",
205
+ keywords: ["voice", "phrase", "command", "homophone", "stt", "disambiguation", "alternate", "contraction", "apostrophe", "normalization", "punctuation", "transcripts", "wake phrase"],
206
+ relatedTools: ["getVoiceCommandGuidance"],
726
207
  },
727
208
  {
728
209
  topic: "toggles",
729
210
  title: "Runtime Toggles",
730
- content: "**Toggles are persistent runtime flags that GATE hardware behavior when implemented.** Unlike spec-side variables (flow-scoped), toggles persist across trigger fires and are written by both spec actions (`set_toggle`) and connection-page UI controls. 8 toggles, each with a defined type / default / *intended* behavior.\n\n" +
731
- "**⚠️ Implementation status (1.1.25-pair):** ENFORCED toggles: `transcription_enabled` (Bundle 9), `camera_streaming_enabled` (Bundle 12), `audio_capture_enabled` (Bundle 12), `listening_mode` (Bundle 13 — `off` value enforced library-side), `privacy_mode` (Bundles 14 + 20 — super-toggle gates capture + audio + STT + notification forwarding), `battery_save_mode` (Bundle 15 — video_frames clamp to LOW+2fps), `voice_confirmations` (Bundle 19 — auto-earcons around voice triggers). Still NOT enforced (1 toggle): `audio_video_coexistence_policy`. F-R5-13 (coexistence) tracks the remaining toggle gap; F-R5-13b residual now down to UI-indicator-only (cosmetic — no functional gap); F-R5-16b tracks battery_save_mode's residual gaps (outgoing_video_stream disable, ask_user timeout shortening, speak_text filler suppression). Until they're closed, treat the unimplemented toggle as cooperative state that YOUR app code can read (via `glasses.toggles.get(...)`) but that the library does NOT honor automatically. Each toggle's section below begins with its current status.\n\n" +
732
- "Three ways flows interact with toggles:\n" +
733
- "1. **WRITE**: `{ \"type\": \"set_toggle\", \"key\": \"<name>\", \"value\": <literal> }` — mutates the toggle. Validation rule R11 rejects unknown keys.\n" +
734
- "2. **READ in branch**: `{ \"variable\": \"toggles.<name>\", \"operator\": \"equals\", \"value\": <literal> }` — branch reads the live toggle value at the decision point (never cached).\n" +
735
- "3. **READ via template**: `{{toggles.<name>}}` resolves to the current value; usable inside any string field that supports template substitution.\n\n" +
736
- "─── listening_mode ───\n**Status (1.1.21-pair): ✅ IMPLEMENTED for `off`; partial for the other enum values.** Library's `DefaultAudioClient.transcriptions(...)` gates the recognizer flow on `listening_mode != \"off\"`. When `off`, no transcripts arrive (transport-side recognizer not started, SCO mic released). The other values (`always_on`, `on_demand`, `wake_word`) all let transcription through library-side — `on_demand`'s 'only while push_to_talk / ask_user is active' semantics require runtime trigger-context integration that's deferred; `wake_word` requires a Meta API the DAT doesn't expose (treat as future-only).\n\nDefault: unset is treated as listening-on for backward compat with apps written before Bundle 13. The connection-page UI's EscapeHatch renders the toggle as OFF when unset, but the library doesn't infer that — set explicit `listening_mode = \"off\"` to actually gate STT.\n\nComposes with `audio_capture_enabled` (Bundle 12) and `transcription_enabled` (Bundle 9). All three must be 'on' for transcripts to flow:\n- `audio_capture_enabled = false` → no audio anywhere (raw chunks, recordings, transcripts).\n- `listening_mode = \"off\"` → audio chunks + recordings still work, but no STT.\n- `transcription_enabled = false` → spec-driven `glasses.streams.transcriptionIncremental(...)` emits nothing; the direct `glasses.audio.transcriptions(config)` API still fires (manual lifecycle).\n\nType: enum\nValues: `off`, `wake_word`, `on_demand`, `always_on`\n- `off`: STT disabled entirely. ✅ ENFORCED.\n- `wake_word`: listen for wake word, switch to always-on after detection. **Not yet wired on Meta Ray-Ban** (DAT 0.5 doesn't expose Meta's wake-word API; library treats as listening-on for now).\n- `on_demand`: intended to enable STT only while push_to_talk or ask_user is active. **Library currently treats as listening-on** (the trigger-context gate is deferred).\n- `always_on`: continuous transcription. ✅ Same effect as wake_word/on_demand at library level.\n\nWhen to use set_toggle for this: usually you don't — it's user-controlled via the connection-page UI. Programmatic flips are valid but consider whether the user actually wanted listening on.\n\nMinimal:\n { \"type\": \"set_toggle\", \"key\": \"listening_mode\", \"value\": \"always_on\" }\n\n─── camera_streaming_enabled ───\n**Status (1.1.20-pair): ✅ IMPLEMENTED.** Library's `DefaultCameraClient` checks the toggle on every camera primitive: `capture_photo` and `capture_video` fail-fast with `CaptureError.DisabledByUser(\"camera_streaming_enabled\")` when false; `video_frames` stream emits nothing while false (transport-side stream is cancelled, camera released). Default true; toggle unset → camera works. The error surfaces in `getEventLog` as `BlockCompleted result:\"failed:disabled_by_user:camera_streaming_enabled\"` so flows can branch on it via the standard `is_empty` check on the saved photo/video URI.\n\nGlobal kill switch for ALL camera primitives: video_frames stream, capture_photo block, capture_video block, outgoing_video_stream.\n\nType: boolean\nDefault: true\n\nMinimal:\n { \"type\": \"set_toggle\", \"key\": \"camera_streaming_enabled\", \"value\": false }\n\n─── audio_capture_enabled ───\n**Status (1.1.20-pair): ✅ IMPLEMENTED.** Library's `DefaultAudioClient` gates `recordDiscrete`/`audioChunks`/`transcriptions` on this toggle. record_audio block fails fast with `AudioError.DisabledByUser(\"audio_capture_enabled\")` when false; audio_chunks + transcription_incremental streams emit nothing while false. Composes with `transcription_enabled` from Bundle 9: setting audio_capture_enabled=false while transcription_enabled=true correctly produces no transcripts (audio capture is killed upstream of STT).\n\nGlobal kill switch for ALL audio-capture primitives: audio_chunks stream, record_audio block, outgoing_audio_stream, transcription_incremental.\n\nType: boolean\nDefault: true\n\nDistinct from `transcription_enabled`: this gates raw audio acquisition; transcription_enabled gates STT processing on top of it. Both toggles compose — true on both = transcripts; false on either = no transcripts.\n\n─── transcription_enabled ───\n**Status (1.1.19-pair): ✅ IMPLEMENTED.** The library's spec-driven stream API (`glasses.streams.transcriptionIncremental(streamId)`, added in Bundle 9) observes this toggle and gates the underlying recognizer flow accordingly. Toggle false → no transcripts; toggle true → recognizer running.\n\n*Note:* the gating only applies to consumers using `glasses.streams.transcriptionIncremental(...)` — the direct `glasses.audio.transcriptions(config)` API is intentionally ungated for manual-lifecycle use. `generateConsumer(kind: \"stream\")` emits the gated path by default.\n\nDistinct from `audio_capture_enabled` — you may want raw audio without STT (for custom on-device models via audio_chunks).\n\nType: boolean\nDefault: false\n\n**Required to start a transcription_incremental stream.** Without this true, the stream is declared but produces no data. Common pattern: a `start taking notes` voice trigger flips this true; a `stop taking notes` trigger flips it false.\n\nSide effect: while true, audio routes to HFP — the user briefly loses high-quality A2DP playback if any.\n\nMinimal:\n { \"type\": \"set_toggle\", \"key\": \"transcription_enabled\", \"value\": true }\n\n─── battery_save_mode ───\n**Status (1.1.23-pair): ✅ IMPLEMENTED for video_frames clamp; partial.** When true, `glasses.camera.videoFrames(...)` is forced to LOW resolution + max 2 fps regardless of the requested config. Apps that called `videoFrames(VideoFrameConfig(frameRate = 30, resolution = HIGH))` get LOW + 2 silently. Battery_save_mode flipping mid-stream restarts the inner transport flow with the clamped config (camera renegotiates capture rate).\n\n**Still NOT implemented (F-R5-16b):** outgoing_video_stream disable, ask_user timeout shortening, speak_text progressive-response-filler suppression. The `thermal_throttle` preset trigger flips this toggle on critical thermal — the video_frames clamp now takes effect, but the other intended behaviors are deferred.\n\n*Intended (full) behavior:* video_frames clamps to LOW resolution + 2 fps (✅ shipped Bundle 15); outgoing_video_stream is disabled (deferred F-R5-16b); ask_user timeouts shorten (deferred F-R5-16b); speak_text suppresses progressive-response filler (deferred F-R5-16b).\n\nType: boolean\nDefault: false\n\nCanonical use: a `thermal_warning` trigger at severity moderate+ flips this true. Pair with the `thermal_throttle` initSpec preset for the canonical shape.\n\nMinimal:\n { \"type\": \"set_toggle\", \"key\": \"battery_save_mode\", \"value\": true }\n\n─── voice_confirmations ───\n**Status (1.1.24-pair): ✅ IMPLEMENTED.** Library's Interpreter plays an auto-earcon pair around voice triggers (VOICE_COMMAND, WAKE_WORD, PUSH_TO_TALK): `EarconSound.START` BEFORE walking the trigger's actions, `EarconSound.COMPLETE` after successful completion. When `voice_confirmations = false`, both earcons are skipped. Other trigger types (manual_launch, hardware events, fallback) NEVER auto-earcon — they fire silently regardless of the toggle. Aborts and unexpected failures play START but skip COMPLETE — no auto-error earcon (apps that want explicit failure feedback should call `earcon_beep` with `sound: \"error\"` inside their abort branch).\n\nWhen false, voice triggers fire silently. Useful for ambient assistants or accessibility flows where the user prefers quiet operation. The dev still controls per-action TTS via speak_text and per-action beeps via earcon_beep; this toggle only affects the AUTO-played pair around voice trigger flows.\n\nType: boolean\nDefault: true\n\nMalformed (wrong-typed) values default to true (defensive) — a misconfigured spec doesn't accidentally silence voice acks.\n\nMinimal:\n { \"type\": \"set_toggle\", \"key\": \"voice_confirmations\", \"value\": false }\n\n─── audio_video_coexistence_policy ───\n**Status (1.1.19-pair): NOT IMPLEMENTED.** Value persists but the library does NOT enforce coexistence — concurrent audio + video primitives currently surface platform errors (not policy decisions). F-R5-13.\n\n*Intended behavior (when implemented):* Controls what happens when a flow tries to open an audio primitive while a video primitive is active (or vice versa). DAT 0.5 routes audio over HFP, which suspends A2DP — the lib should enforce this policy to prevent overlapping primitives that conflict.\n\nType: enum\nDefault: `prefer_video`\nValues: `prefer_audio`, `prefer_video`, `strict_reject`\n- `prefer_audio`: pause the active video stream while audio runs; resume video on completion.\n- `prefer_video`: reject the new audio primitive; flow continues without it.\n- `strict_reject`: abort the flow with an error.\n\nSee `audio_video_coexistence` topic for the full coexistence model.\n\n─── privacy_mode ───\n**Status (1.1.22-pair): ✅ IMPLEMENTED for capture + audio + STT. Extended in 1.1.25-pair to also suppress notification forwarding (Bundle 20).** Library's `DefaultCameraClient` and `DefaultAudioClient` check `privacy_mode` FIRST on every primitive — when true, gates camera_streaming and audio_capture regardless of those individual toggles' values. capture_photo / capture_video / record_audio fail-fast with `DisabledByUser(\"privacy_mode\")` (note: error reports `privacy_mode` rather than the underlying gate, signaling the more general user intent). video_frames / audio_chunks / transcriptions emit nothing while privacy_mode is true. **As of 1.1.25-pair (Bundle 20): `phone_notification_forwarded` triggers are also suppressed** at the `HardwareEventRouter` dispatch layer — the trigger never fires while privacy_mode is true. Other hardware alerts (thermal, hinges, audio route, calls, lifecycle) still fire — privacy_mode is specifically about user-data exposure (camera/mic/STT/notifications), not all hardware events.\n\n**Still NOT implemented:** connection-page UI privacy indicator. F-R5-13b residual: the EscapeHatch UI doesn't yet render a clear \"privacy mode active\" badge. Cosmetic-only — no functional gap remaining.\n\n**The one super-toggle.** When true, forces camera + audio capture off, disables STT, and suppresses notification forwarding. Composes with all other gates: ANY of (privacy_mode true, camera_streaming false, audio_capture false, listening_mode \"off\") closes the relevant primitive.\n\nType: boolean\nDefault: false (opt-in — must be flipped on to engage)\n\nLayered semantics: every other toggle is FLAT (effective value = stored value). privacy_mode is the only LAYERED toggle — the library now computes effective gates at the camera/audio decision points. Concretely: when privacy_mode is true, camera + audio + STT all return DisabledByUser(\"privacy_mode\") regardless of their individual toggle values; flipping privacy_mode back to false restores the underlying toggles' effective state.\n\nMalformed (wrong-type) values default to false — a misconfigured spec doesn't silently privacy-mode the app.\n\nCommon pattern: a `set_toggle privacy_mode true` action is the sledgehammer for end-of-session cleanup. Always pair with a means to flip it back off (UI toggle on the connection page is the primary path).\n\nMinimal:\n { \"type\": \"set_toggle\", \"key\": \"privacy_mode\", \"value\": true }\n\n── Reading toggle state from flows ──\nBranch example — abort a photo flow if privacy is on (works today since the BRANCH read is implemented; the camera kill-switch part is NOT, so this branch is the recommended workaround until F-R5-13 lands):\n { \"type\": \"branch\",\n \"condition\": { \"variable\": \"toggles.privacy_mode\", \"operator\": \"equals\", \"value\": true },\n \"then\": [{ \"type\": \"abort\", \"reason\": \"privacy_mode_active\" }]\n }\n\nTemplate example — speak the current battery-save state:\n { \"type\": \"speak_text\", \"text\": \"Battery save is {{toggles.battery_save_mode}}.\" }\n\nWhen-connected predicate (preferred over branch for connection gating):\n { \"type\": \"when_connected\", \"action\": { ... }, \"predicate\": \"toggle_on:transcription_enabled\" }\n\n── Common mistakes ──\n- **Assuming a toggle's documented gating behavior is enforced by the library.** As of 1.1.24-pair, ENFORCED: `transcription_enabled` (Bundle 9), `camera_streaming_enabled` + `audio_capture_enabled` (Bundle 12), `listening_mode` (Bundle 13 — `off` value), `privacy_mode` (Bundle 14 — super-toggle gates capture + audio + STT), `battery_save_mode` (Bundle 15 — video_frames LOW+2fps clamp), `voice_confirmations` (Bundle 19 — auto-earcons around voice triggers). NOT YET ENFORCED: `audio_video_coexistence_policy`. F-R5-13 (coexistence) tracks the remaining gap; F-R5-13b tracks privacy_mode's residual gaps (notification suppression); F-R5-16b tracks battery_save_mode's residual gaps.\n- **Expecting `voice_confirmations = false` to also silence per-action `earcon_beep` calls or `speak_text`.** It does NOT — only the AUTO-played pair (START before voice trigger walk + COMPLETE after success) is gated. Spec actions remain in your control. To go fully silent, also avoid `earcon_beep` and `speak_text` actions inside voice triggers.\n- **Expecting `battery_save_mode = true` to disable outgoing_video_stream / shorten ask_user timeouts / suppress speak_text filler.** The library currently only clamps video_frames (LOW + 2 fps). The other intended behaviors (F-R5-16b) are deferred. Apps that need them must check `glasses.toggles.get(\"battery_save_mode\")` themselves and adapt.\n- **Expecting `privacy_mode = true` to ALSO render a UI privacy indicator on the connection page.** It does NOT yet — Bundle 20 closed the functional notification-suppression gap, but the EscapeHatch UI doesn't render a \"privacy mode active\" badge yet (F-R5-13b residual; cosmetic only). Apps that need a UI indicator can build their own using `glasses.toggles.state` flow.\n- **Setting `transcription_enabled: true` without declaring a `transcription_incremental` stream** — toggle has no effect; the lib has nothing to gate.\n- **Setting `listening_mode = \"on_demand\"` and expecting STT to only run during push_to_talk / ask_user.** The library currently treats `on_demand` as listening-on (same as `always_on`) — the trigger-context gate is deferred. Use `audio_capture_enabled = false` outside the active window or program your push_to_talk flow to flip `listening_mode` between `always_on` and `off` directly.\n- **Comparing toggle values with the wrong type in branch**: `transcription_enabled` is boolean — use `value: true`, not `value: \"true\"` (string). Validator may not catch type mismatches at spec-load; expect silent always-false branches.\n- **Trying to set toggles not in the 8-key allowlist** — validation rule R11 rejects with the list of valid keys.",
211
+ content: "Toggles are persistent runtime flags that gate hardware behaviour inside the library. They survive across app restarts (persisted to the user's account by the connection page UI) and are observable from your handler via `glasses.toggles.state` (Android `StateFlow<Toggles>`, iOS `ObservableState<Toggles>`). The user owns the toggle values — they flip them from the connection page; your handler reads them. Programmatic writes are possible (`glasses.toggles.update { it.copy(values = ...) }` / `.put(...)`) but rarely needed in customer code.\n\n" +
212
+ "**8 toggles.** Each has a defined type / default / library-enforced behaviour. Seven are enforced inside the library your handler does not need to honor them manually. One is cooperative state.\n\n" +
213
+ "── listening_mode ── (enforced for `off`)\nType: enum (`off`, `wake_word`, `on_demand`, `always_on`). Default: unset, treated as listening-on.\n\n`DefaultAudioClient.transcriptions(...)` gates the recognizer flow on `listening_mode != \"off\"`. When `off`, no transcripts arrive — transport-side recognizer is not started, SCO mic released. `wake_word` requires a Meta API DAT 0.5 doesn't expose (treated as listening-on); `on_demand`'s intended 'only-during-push-to-talk' semantics are deferred (also treated as listening-on for now).\n\nComposes with `audio_capture_enabled` and `transcription_enabled` — ALL three must be on for transcripts to flow. The user-facing UI on the connection page exposes this as the master mic toggle.\n\n── camera_streaming_enabled ── (enforced)\nType: boolean. Default: true.\n\nGlobal kill switch for every camera primitive: `videoFrames` stream emits nothing while false (transport-side stream cancelled, camera released); `capturePhoto` and `captureVideo` fail-fast with `CaptureError.DisabledByUser(\"camera_streaming_enabled\")`. Surfaces in `getEventLog` as a `camera.capture_failed` event with the toggle name as the reason.\n\n── audio_capture_enabled ── (enforced)\nType: boolean. Default: true.\n\nGlobal kill switch for every audio-capture primitive: `recordDiscrete` fails with `AudioError.DisabledByUser`; `audioChunks` and `transcriptions` emit nothing while false. Composes with `transcription_enabled` — gating the toggle here kills audio upstream of STT, so transcripts disappear too.\n\nDistinct from `transcription_enabled`: this gates raw audio acquisition; `transcription_enabled` gates STT processing on top of it.\n\n── transcription_enabled ── (enforced)\nType: boolean. Default: false.\n\nWhen false, `glasses.audio.transcriptions(...)` returns an empty stream. Common pattern: a handler subscribing for wake-word matching flips this on at startup; a 'pause listening' user-toggle flips it off. The toggle gates only the recognizer — `audio.audioChunks(...)` (raw audio) still flows.\n\nSide effect: while true, audio routes over HFP, which suspends A2DP — the user briefly loses high-quality stereo playback. Flipping this on while music is playing is the most-common UX surprise.\n\n── battery_save_mode ── (enforced for video_frames clamp; partial)\nType: boolean. Default: false.\n\nWhen true, `glasses.camera.videoFrames(...)` is clamped to LOW resolution + 2 fps regardless of the requested config; the camera renegotiates capture rate when the toggle flips mid-stream. Other intended behaviours (disable outgoing_video_stream, shorten ask_user timeouts, suppress speak filler) are deferred — apps that need them must read the toggle directly via `glasses.toggles.state` and adapt their handler.\n\nThermal-warning paths typically flip this on automatically when the device reports critical thermal state.\n\n── voice_confirmations ── (enforced)\nType: boolean. Default: true.\n\nWhen true, the library plays an auto-earcon pair around any handler that the user invokes through the connection page's voice surface: `EarconSound.START` before the handler runs, `EarconSound.COMPLETE` after success. Aborts skip the COMPLETE earcon (no auto-error earcon — call `earcon` directly with `sound: \"error\"` from your handler if you want explicit failure feedback). When false, voice-initiated handlers run silently.\n\nUseful for ambient assistants or accessibility flows where the user prefers quiet operation. Your per-handler `speak()` and `earcon()` calls are NOT affected — only the auto-played pair around voice-initiated handlers.\n\n── audio_video_coexistence_policy ── (NOT enforced — cooperative)\nType: enum (`prefer_audio`, `prefer_video`, `strict_reject`). Default: `prefer_video`.\n\nValue persists but the library does NOT yet enforce coexistence — concurrent audio + video primitives currently surface platform errors rather than policy decisions. Treat as cooperative state your handler can read (`glasses.toggles.state.value.audioVideoCoexistencePolicy`) and respect manually if you care about the ordering. See `audio_video_coexistence` topic.\n\n── privacy_mode ── (enforced — the super-toggle)\nType: boolean. Default: false (opt-in).\n\nThe one LAYERED toggle. When true, gates ALL camera + audio + STT primitives regardless of the underlying per-feature toggles' values: `capturePhoto` / `captureVideo` / `recordDiscrete` fail-fast with `DisabledByUser(\"privacy_mode\")`; `videoFrames` / `audioChunks` / `transcriptions` emit nothing. As of library 1.1.25-pair, also suppresses phone-notification forwarding at the dispatch layer.\n\nOther hardware events (thermal, hinges, audio route, calls, lifecycle) still fire — privacy_mode is specifically about user-data exposure (camera/mic/STT/notifications), not all hardware events.\n\nCanonical use: a 'panic privacy' UI control that flips this true. Always pair with a means to flip it back off — the connection page UI is the primary path.\n\n── Reading toggle state from your handler ──\n\nKotlin (StateFlow):\n glasses.toggles.state.collect { toggles ->\n if (toggles.privacyMode) { skipExpensiveWork(); return@collect }\n if (!toggles.transcriptionEnabled) showStartHint()\n }\n\nSwift (ObservableState):\n for await toggles in glasses.toggles.state.stream {\n if toggles.privacyMode { continue }\n if !toggles.transcriptionEnabled { showStartHint() }\n }\n\nOne-shot read:\n val v = glasses.toggles.state.value\n let v = await glasses.toggles.state.current\n\n── Writing toggles programmatically ──\n\nThe user owns toggle values via the connection page; programmatic writes are valid but uncommon. The closure form is the canonical API:\n\n Kotlin: glasses.toggles.update { it.copy(values = it.values + (\"privacy_mode\" to JsonPrimitive(true))) }\n Swift: await glasses.toggles.update { old in Toggles(values: old.values.merging([\"privacy_mode\": .bool(true)]) { _, new in new }) }\n\nAndroid additionally exposes a `put(key, value)` convenience method. Source tagging: pass `ToggleSource.UI` for user-initiated writes, `ToggleSource.HOST_APP` for handler-initiated writes — surfaces in `getEventLog` so you can tell who flipped what.\n\n── Common mistakes ──\n\n- **Subscribing to `glasses.audio.transcriptions()` and seeing nothing.** Check `transcription_enabled` AND `audio_capture_enabled` AND `privacy_mode == false` AND `listening_mode != \"off\"`. ANY of those four can silence the stream.\n- **Setting `listening_mode = \"on_demand\"` and expecting STT to only run during push-to-talk.** The library currently treats `on_demand` as listening-on (same as `always_on`). Your handler can implement the gating itself by flipping `audio_capture_enabled` around the active window.\n- **Comparing toggle values with the wrong type.** The values are typed JSON — booleans are `JsonPrimitive(true)`, not `JsonPrimitive(\"true\")`. Pattern-match the JSONValue or use the typed accessors on Toggles.\n- **Trying to set a key not in the 8-toggle allowlist.** Library rejects unknown keys; the canonical list is enforced server-side too (the connection page UI only exposes these 8).\n- **Flipping `battery_save_mode` and assuming the full intended behaviour is in effect.** Only the videoFrames LOW+2fps clamp ships today. Other intended effects (outgoing_video_stream off, ask_user shorter timeouts) are deferred; if your handler depends on them, read the toggle and implement the behaviour yourself.",
737
214
  keywords: [
738
215
  "toggle",
739
216
  "privacy_mode",
740
- "set_toggle",
741
217
  "listening_mode",
742
218
  "transcription_enabled",
743
219
  "camera_streaming_enabled",
@@ -748,10 +224,11 @@ export const DOC_INDEX = [
748
224
  "state",
749
225
  "runtime",
750
226
  "super",
751
- "branch",
752
- "template",
227
+ "togglesClient",
228
+ "update",
229
+ "put",
753
230
  ],
754
- relatedTools: ["updateSpec", "initSpec", "getVoiceCommandGuidance"],
231
+ relatedTools: ["getCapabilityGuide"],
755
232
  },
756
233
  {
757
234
  topic: "connection_state_model",
@@ -763,168 +240,188 @@ export const DOC_INDEX = [
763
240
  {
764
241
  topic: "audio_video_coexistence",
765
242
  title: "Audio / Video Coexistence",
766
- content: "Bluetooth A2DP (stereo media) and HFP (mic + mono audio) cannot run simultaneously on most Meta Ray-Ban pairings. audio_video_coexistence_policy toggle controls behavior: 'mic_priority' downgrades playback to mono when mic is active; 'stereo_priority' disables mic during stereo playback. Events surface 'degraded' markers when fallback occurs.",
767
- keywords: ["audio", "video", "coexistence", "a2dp", "hfp", "bluetooth", "degraded", "priority"],
768
- relatedTools: ["updateSpec"],
243
+ content: "Bluetooth A2DP (stereo media playback) and HFP (mic + mono audio capture) cannot run simultaneously on most Meta Ray-Ban pairings the link can serve either profile at a time. The `audio_video_coexistence_policy` toggle declares the policy your app prefers when handlers try to use both at once.\n\n" +
244
+ "**Values** (enum):\n" +
245
+ "- `prefer_audio` — pause the active video stream while mic capture runs; resume video when the audio operation completes.\n" +
246
+ "- `prefer_video` (default) — reject the audio operation while video is active; your handler sees `AudioError.Conflict` (or equivalent) and must decide how to proceed.\n" +
247
+ "- `strict_reject` — abort either operation with an error if the other is already running.\n\n" +
248
+ "**Enforcement status:** the toggle value persists, but the library does NOT yet enforce the policy automatically — concurrent audio + video primitives currently surface platform-level errors rather than policy-driven decisions. F-R5-13 tracks closing this gap. Until then, treat the policy as cooperative state your handler reads via `glasses.toggles.state` and respects manually.\n\n" +
249
+ "**Diagnostics:** when the conflict actually fires (A2DP suspending under HFP, or vice versa), the event log surfaces a `transport.audio_route_degraded` event so you can see which profile won. Useful when a user reports 'my music dropped to mono when the glasses started transcribing' — that's the expected HFP pre-emption, not a bug.",
250
+ keywords: ["audio", "video", "coexistence", "a2dp", "hfp", "bluetooth", "degraded", "policy", "prefer_audio", "prefer_video", "strict_reject"],
251
+ relatedTools: ["getCapabilityGuide"],
769
252
  },
770
253
  {
771
254
  topic: "library_api",
772
255
  title: "Extentos Library API",
773
- content: "ExtentosGlasses is the top-level object, carrying 8 consumer-facing sub-clients (identical surface on Android and iOS). Creation: `ExtentosGlasses.create(config)` on Android / `Extentos.create(config)` on iOS. ExtentosConfig controls transport override, debug, telemetryConsent, applicationContext (Android), and metaCredentials.\n\n" +
774
- "**Lifecycle / Result types**: every suspending op returns `ExtentosResult.Ok(value)` or `ExtentosResult.Err(error)` pattern-match both. Do NOT wrap in Kotlin `runCatching` (which only catches thrown exceptions; library ops use the Result channel for failures and never throw).\n\n" +
775
- "── glasses.connection ── (`ConnectionClient`)\n" +
776
- "- `val state: StateFlow<GlassesState>` — observe Connecting / SessionActive / Disconnected / etc.\n" +
777
- "- `val simulatorHint: StateFlow<SimulatorHint?>` — non-null when transport is simulator (BrowserSim/LocalSim) so UI can surface a hint badge.\n" +
778
- "- `suspend fun connect(deviceId: DeviceId? = null): ExtentosResult<Unit, ConnectError>` initiate connection. Bootstrap calls this in onCreate.\n" +
779
- "- `suspend fun disconnect()` graceful tear-down.\n\n" +
780
- "── glasses.camera ── (`CameraClient`)\n" +
781
- "- `suspend fun capturePhoto(config: PhotoConfig = PhotoConfig()): ExtentosResult<Photo, CaptureError>` — one-shot still. Same path the `capture_photo` block uses.\n" +
782
- "- `suspend fun captureVideo(config: VideoConfig = VideoConfig()): ExtentosResult<VideoClip, CaptureError>` — bounded clip.\n" +
783
- "- `fun videoFrames(config: VideoFrameConfig = VideoFrameConfig()): Flow<VideoFrame>` frame stream. Same path the `video_frames` stream uses.\n\n" +
784
- " Photo display helper (extension on `Photo`for code holding a `Photo` object directly, e.g. from `glasses.camera.capturePhoto()`):\n" +
785
- " - Android: `suspend fun Photo.loadBitmap(): Bitmap?` runs on `Dispatchers.IO`. Decodes the photo regardless of which transport produced the URI; handles `data:` (BrowserSim base64-inline), `file://` (RealMeta + LocalSim), and bare absolute paths uniformly. Returns null on null URI, unrecognized scheme, missing file, or decode failure.\n" +
786
- " - iOS: `func loadImage() async -> PlatformImage?` (`PlatformImage = UIImage` on iOS targets, `NSImage` on macOS). Same scheme dispatch (`file:` and `data:`); decode runs on a detached high-priority Task. Returns nil on unrecognized scheme, missing file, or decode failure.\n" +
787
- " - Use this instead of writing your own URI decoder Coil's `AsyncImage` and SwiftUI's `AsyncImage` silently fail on the BrowserSim `data:` shape; this helper exists exactly to close that leaky abstraction.\n\n" +
788
- " Photo URI-string helpers (`object Photos` top-level, for `app_callback` handlers that receive `call.input.image: String`, library 1.1.13-pair+):\n" +
789
- " - Android: `suspend fun Photos.loadBase64(uri: String): String?`bare base64 payload, ready for Anthropic Claude Vision / OpenAI GPT-4V request bodies. Fast path: `data:` URIs return the substring after the comma directly (no decode + re-encode round trip).\n" +
790
- " - Android: `suspend fun Photos.loadBytes(uri: String): ByteArray?` — raw image bytes (multipart upload, custom encoders).\n" +
791
- " - Android: `suspend fun Photos.loadBitmap(uri: String): Bitmap?` — same Bitmap result as `photo.loadBitmap()` but takes the URI string the handler actually receives.\n" +
792
- " - Android: `fun Photos.mediaTypeFromUri(uri: String): String?` — `\"image/jpeg\"` / `\"image/png\"` / `\"image/webp\"` / etc. Reads the embedded MIME from `data:` URIs, infers from file extension otherwise. Most AI vision APIs need both base64 + media type.\n" +
793
- " - All loaders run on `Dispatchers.IO`, return null on missing file / decode failure / OOM, never throw. Use `java.util.Base64` (JVM-standard) so the helpers are JVM-unit-testable.\n" +
794
- " - iOS: equivalent `Photos.loadBase64(uri:)`, `Photos.loadBytes(uri:)`, `Photos.loadImage(uri:)`, `Photos.mediaType(forUri:)` are landing in a follow-up library cut. Until then iOS handlers can take the existing `loadImage()` extension if they hold a `Photo` object, or do the URI decode inline.\n\n" +
795
- " Video URI-string helpers (`object Videos` top-level, library 1.1.14-pair+, mirrors Photos for `capture_video.uri`):\n" +
796
- " - Android: `suspend fun Videos.copyToFile(uri: String, dst: File): Boolean` — the canonical save-to-gallery path. Streams from `file://` / bare-path sources via `File.copyTo`; decodes `data:` URIs in-memory. Creates parent dirs, overwrites dst, returns true/false (no throw).\n" +
797
- " - Android: `suspend fun Videos.loadBytes(uri: String): ByteArray?`raw bytes (for inline-video AI APIs like Gemini 1.5 Pro). Loads the whole clip in memory — prefer `copyToFile` for clips >50 MB.\n" +
798
- " - Android: `suspend fun Videos.loadBase64(uri: String): String?` — base64 with the same `data:` URI fast path.\n" +
799
- " - Android: `fun Videos.mediaTypeFromUri(uri: String): String?` — `\"video/mp4\"`, `\"video/quicktime\"`, `\"video/webm\"`, `\"video/x-matroska\"`, `\"video/x-msvideo\"`, `\"video/x-m4v\"`, `\"video/3gpp\"`. Reads `data:video/<type>;...` MIME directly; falls back to extension match.\n" +
800
- " - All loaders run on `Dispatchers.IO`, never throw.\n" +
801
- " - iOS parity is queued (handoff at `shared-context/ios-videos-helpers-handoff.md`); until that lands, iOS handlers should hand-roll the `data:` / `file://` branch.\n\n" +
802
- "── glasses.audio ── (`AudioClient`)\n" +
803
- "- `suspend fun recordDiscrete(config: AudioRecordConfig = AudioRecordConfig()): ExtentosResult<AudioRecording, AudioError>` bounded clip with auto-STT. Same path the `record_audio` block uses.\n" +
804
- "- `fun audioChunks(config: AudioChunkConfig = AudioChunkConfig()): Flow<AudioChunk>` raw audio chunk stream.\n" +
805
- "- `fun transcriptions(config: TranscriptionConfig = TranscriptionConfig()): Flow<Transcript>` live STT stream (Partial + Final). The same stream the `transcription_incremental` stream type and the `voice_command` matcher consume; also the underlying source for the push-to-talk helper below.\n" +
806
- "- `suspend fun speak(text: String, config: SpeakConfig = SpeakConfig()): ExtentosResult<Unit, AudioError>` TTS.\n" +
807
- "- `suspend fun earcon(sound: EarconSound, volume: Double = 1.0)` — short canned tone.\n\n" +
808
- " Push-to-talk helper (extension on `AudioClient`, returns a `PushToTalkSession`):\n" +
809
- " - Android: `fun AudioClient.startPushToTalk(scope: CoroutineScope, config: TranscriptionConfig = TranscriptionConfig(language = \"en-US\", partial = false)): PushToTalkSession` — call on press; subscribes to `transcriptions(config)` in `scope` and accumulates `Transcript.Final` segments. Cancelling `scope` also stops the session.\n" +
810
- " - iOS: `func startPushToTalk(config: TranscriptionConfig = TranscriptionConfig(language: \"en-US\", partial: false)) -> PushToTalkSession` — same shape; uses an internal unstructured Task so no scope parameter.\n" +
811
- " - `suspend fun PushToTalkSession.stopAndFlush(): String` (Android) / `func stopAndFlush() async -> String` (iOS) — call on release; cancels the underlying subscription and returns the joined text of every Final transcript received during the session. Idempotent.\n" +
812
- " - Standard usage: on press, `val session = audio.startPushToTalk(scope)` (or `let session = audio.startPushToTalk()`); on release, `val text = session.stopAndFlush()` then `runtime.fireTrigger(triggerId, mapOf(\"transcript\" to JsonPrimitive(text), \"rawUtterance\" to JsonPrimitive(text)))`. See `searchDocs(topic: 'trigger_types')` push_to_talk section for the full both-platforms snippet.\n\n" +
813
- "── glasses.runtime ── (`RuntimeClient`)\n" +
814
- "- `val events: Flow<RuntimeEvent>` every runtime event (TriggerFired, BlockStarted, AppCallbackInvoked, FlowCompleted, Log, HandlerFailure, etc.). Tap from on-device diagnostics or to mirror to your own logger.\n" +
815
- "- `suspend fun loadSpec(jsonString: String): ExtentosResult<LoadedSpecInfo, List<SpecValidationError>>`load/replace the spec. Bootstrap calls this from assets at startup.\n" +
816
- "- `fun currentSpec(): ExtentosSpec?` — currently-loaded spec (null until loadSpec succeeds).\n" +
817
- "- `suspend fun fireTrigger(triggerId: String, payload: Map<String, JSONValue> = emptyMap()): FlowEndCause` — programmatically fire a trigger. The unified API for host-side trigger firing. There is no `runtime.invokeManualLaunch()`; for `manual_launch`, call `fireTrigger` directly. For `push_to_talk` the recommended path is `audio.startPushToTalk()` + `session.stopAndFlush()` (above), which wraps `transcriptions()` collection and ultimately calls `fireTrigger` for you.\n" +
818
- "- `suspend fun snapshotEvents(): List<RuntimeEvent>`drain the on-device 512-entry ring buffer for export.\n\n" +
819
- "── glasses.toggles ── (`ToggleClient`)\n" +
820
- "- `val state: StateFlow<Map<String, JSONValue>>`observe all 8 toggles' current values.\n" +
821
- "- `fun put(key: String, value: JSONValue, source: ToggleSource = ToggleSource.UI)` — set a toggle. Same surface a `set_toggle` action drives.\n" +
822
- "- `fun get(key: String): JSONValue?` read one toggle.\n\n" +
823
- "── glasses.extensions ── (`ExtensionsClient`)\n" +
824
- "- `fun registerAppCallbackHandler(handler: AppCallbackHandler)` — wire the dispatch lambda generated by `generateConsumer(kind: \"callback\")`. Bootstrap registers this once; all `app_callback` actions route through it.\n" +
825
- "- `fun registerCustomBlock(type, handler)` — `@ExperimentalExtentosApi`. Define your own block type; the spec's `block_call` resolves it.\n" +
826
- "- `fun registerCustomTrigger(type, factory)` — `@ExperimentalExtentosApi`. Same idea for triggers.\n\n" +
827
- "── glasses.debug ── (`DebugClient`)\n" +
828
- "- `suspend fun injectVoiceCommand(transcript, confidence)` — bypass STT, inject a transcript directly. Useful for unit tests + the bypass-button path in the simulator.\n" +
829
- "- `suspend fun injectHardwareAlert(alert)` — fake a thermal/hinges/audio-route event.\n" +
830
- "- `suspend fun setConnectionState(state)` — fake a transport state for testing.\n" +
831
- "- `suspend fun mockAppCallback(handler, result)` — return canned data from a handler without running its body.\n" +
832
- "- `suspend fun clearAllMocks()` reset all debug overrides.\n" +
833
- "- `suspend fun setSimulatorSession(url)` programmatically swap simulator URL at runtime.\n\n" +
834
- "── glasses.telemetry ── (`TelemetryClient`)\n" +
835
- "- `val consent: Boolean` read current telemetry consent state.\n" +
836
- "- `fun setUserSegment(segment)` — tag the install for cohort analysis.\n" +
837
- "- `fun trackEvent(name, properties)` emit a custom telemetry event (subject to consent).\n\n" +
838
- "── Type guide (commonly-needed) ──\n" +
839
- "- `JSONValue` is `kotlinx.serialization.json.JsonElement` (Android) / equivalent on iOS. Build with `JsonPrimitive(...)` / `buildJsonObject { put(\"k\", v) }`.\n" +
840
- "- `AppCall` exposes `handler: String`, `input: JSONValue` (always a JsonObject), `flowContext: FlowContext`, and on Android `applicationContext: android.content.Context?` (1.1.16-pair+) — non-null when the bootstrap was created with `ExtentosConfig.applicationContext = this`. Read inputs via `call.input.jsonObject[\"key\"]?.jsonPrimitive?.contentOrNull`. iOS handlers use `FileManager.default` / `UserDefaults.standard` / `Bundle.main` for app-level access — there's no iOS analog field on AppCall.\n" +
841
- "- `AppCallbackResult.Success(JsonElement)` populates the flow's `save_as`; `AppCallbackResult.Error(code, message)` short-circuits the flow.",
842
- keywords: ["library", "api", "extentosglasses", "config", "subclient", "create", "register", "runtime", "loadSpec", "ExtentosResult", "fireTrigger", "transcriptions", "audioClient", "cameraClient", "extensionsClient", "togglesClient", "debugClient", "manual_launch", "push_to_talk", "loadBitmap", "loadImage", "loadBase64", "loadBytes", "Photos", "mediaTypeFromUri", "startPushToTalk", "PushToTalkSession", "Photo", "Bitmap", "PlatformImage", "UIImage", "vision", "anthropic", "openai"],
843
- relatedTools: ["generateConnectionModule", "generateConsumer"],
844
- },
845
- {
846
- topic: "app_callback_guide",
847
- title: "App Callback Handlers",
848
- content: "app_callback routes flow control from the spec into the developer's native code — for AI calls, domain logic (DB writes, file IO, repository ops), or external service access. Register via glasses.extensions.registerAppCallbackHandler(handler). AppCall carries the handler name, the input map declared in the spec's app_callback action, and caller context (triggerId, flowId). Returning AppCallbackResult.Success(value) populates the flow's save_as variable; returning Error short-circuits the flow. Timeouts surface as 'callback.timeout' events.\n\n" +
849
- "── Handler stub generation ──\n" +
850
- "Call generateConsumer(kind: 'callback', handlers: [...]) → emits ExtentosCallbacks.{kt,swift} with marker-bounded dispatch and a USER-CODE block per handler for your implementation. The bootstrap dispatch region gets a replace_region patch that wires registration. Pass `suggestedProvider: 'anthropic' | 'openai' | ...` to get an AI-provider-specific TODO comment in the stub body; omit it for a generic implementation TODO suitable for non-AI handlers.\n\n" +
851
- "Android handler shape (Kotlin):\n" +
852
- " private suspend fun handleSaveClip(call: AppCall): AppCallbackResult {\n" +
853
- " // USER-CODE-START:save_clip\n" +
854
- " val uri = call.input.jsonObject[\"uri\"]?.jsonPrimitive?.contentOrNull\n" +
855
- " ?: return AppCallbackResult.Error(code = \"missing_uri\", message = \"input.uri was null\")\n" +
856
- " val bytes = Photos.loadBytes(uri)\n" +
857
- " ?: return AppCallbackResult.Error(code = \"decode_failed\", message = \"could not read \\$uri\")\n" +
858
- " val savedAt = MyClipRepository.persist(bytes)\n" +
859
- " return AppCallbackResult.Success(buildJsonObject { put(\"savedAt\", savedAt) })\n" +
860
- " // USER-CODE-END:save_clip\n" +
256
+ content: "ExtentosGlasses is the top-level handle. Identical-shape sub-clients on Android (Kotlin) and iOS (Swift). Construction:\n\n" +
257
+ " Android: `val glasses = ExtentosGlasses.create(ExtentosConfig(applicationContext = this, debug = BuildConfig.DEBUG))`\n" +
258
+ " iOS: `let glasses = Extentos.create(config: ExtentosConfig(debug: isDebugBuild, telemetryConsent: true))`\n\n" +
259
+ "**Result types.** Android suspending ops return `ExtentosResult<T, E>` (sealed `Ok` / `Err`). Pattern-match — do NOT wrap in Kotlin `runCatching`, which only catches thrown exceptions and would miss `Err` variants. iOS uses the same `ExtentosResult<T, E>` enum (`.success(T)` / `.failure(E)`), so `try? await glasses.audio.speak(...)` is wrong use `guard case .success = await glasses.audio.speak(...) else { ... }`. The capability guides (`getCapabilityGuide(feature: ...)`) show the per-feature unwrap pattern for every op.\n\n" +
260
+ "── glasses.connection ── (ConnectionClient)\n\n" +
261
+ "- `state` `StateFlow<GlassesState>` (Android) / `ObservableState<GlassesState>` (iOS, iterate via `.stream`). 6 buckets: `notRegistered`, `registered`, `deviceDiscovered(DeviceId)`, `connecting(DeviceId)`, `active(ActiveState)`, `disconnected(DisconnectCause)`.\n" +
262
+ "- `simulatorHint` `StateFlow<SimulatorHint?>` / `ObservableState<SimulatorHint?>`. Non-null when the active transport is BrowserSim or LocalSim; UI surfaces a hint badge so users know they're not on real hardware.\n" +
263
+ "- `connect(deviceId: DeviceId? = null)` — opens the persistent transport. Bootstrap calls this in `onCreate` / on init.\n" +
264
+ "- `disconnect()` graceful tear-down. Reverses `connect`; safe to call repeatedly.\n\n" +
265
+ "── glasses.camera ── (CameraClient)\n\n" +
266
+ "- `capturePhoto(config: PhotoConfig = ...)`one-shot still. Returns a `Photo` with `uri` (Android: String) / `url` (iOS: URL), width, height, format.\n" +
267
+ "- `captureVideo(config: VideoConfig = ...)` — bounded clip (silence-VAD or max duration). Returns a `VideoClip`.\n" +
268
+ "- `videoFrames(config: VideoFrameConfig = ...)`continuous frame stream. Kotlin `Flow<VideoFrame>`, iOS `AsyncStream<VideoFrame>`.\n\n" +
269
+ "**Photo display helpers (extension on `Photo`):**\n" +
270
+ " Android: `suspend fun Photo.loadBitmap(): Bitmap?` runs on `Dispatchers.IO`; handles `data:` (BrowserSim base64), `file://` (RealMeta + LocalSim), and bare paths uniformly. Returns null on missing file / decode failure / unrecognized scheme. Use this instead of Coil's `AsyncImage`, which silently fails on `data:` URIs.\n" +
271
+ " iOS: `func Photo.loadImage() async -> PlatformImage?` (`UIImage` on iOS, `NSImage` on macOS). Same scheme dispatch; decode runs on a detached high-priority Task.\n\n" +
272
+ "**Photo URI-string helpers (`object Photos`, Android only — for handler code that already has a URI String):**\n" +
273
+ " - `suspend fun Photos.loadBase64(uri: String): String?` — bare base64 ready for Claude Vision / GPT-4V bodies. Fast path on `data:` URIs (no decode + re-encode).\n" +
274
+ " - `suspend fun Photos.loadBytes(uri: String): ByteArray?` — raw bytes (multipart upload, custom encoders).\n" +
275
+ " - `suspend fun Photos.loadBitmap(uri: String): Bitmap?` — same result as `photo.loadBitmap()` from a URI String.\n" +
276
+ " - `fun Photos.mediaTypeFromUri(uri: String): String?` `image/jpeg` / `image/png` / `image/webp` / etc.\n" +
277
+ " - All loaders run on `Dispatchers.IO`, return null on failure, never throw.\n" +
278
+ " - iOS parity (`Photos` namespace) is queued; current iOS handlers use `Photo.loadImage()` if they hold a Photo, or hand-roll the `data:` / `file://` branch from a URL.\n\n" +
279
+ "**Video URI-string helpers (`object Videos`, Android only):**\n" +
280
+ " - `suspend fun Videos.copyToFile(uri: String, dst: File): Boolean`the canonical save-to-gallery path. Streams `file://` / bare-path; decodes `data:` URIs in-memory.\n" +
281
+ " - `suspend fun Videos.loadBytes(uri: String): ByteArray?` — raw bytes (inline-video AI APIs like Gemini 1.5 Pro). Prefer `copyToFile` for clips >50 MB.\n" +
282
+ " - `suspend fun Videos.loadBase64(uri: String): String?` / `fun Videos.mediaTypeFromUri(uri: String): String?` mirrors Photos.\n" +
283
+ " - iOS parity queued.\n\n" +
284
+ "── glasses.audio ── (AudioClient)\n\n" +
285
+ "- `recordDiscrete(config: AudioRecordConfig = ...)` — bounded clip with built-in silence-VAD and auto-transcription. Returns `AudioRecording` with `transcript`, `audioDurationMs`, `rawAudioUri`. The canonical free-form-question-capture primitive.\n" +
286
+ "- `audioChunks(config: AudioChunkConfig = ...)`raw audio chunk stream for custom on-device STT / passthrough.\n" +
287
+ "- `transcriptions(config: TranscriptionConfig = ...)`continuous Partial + Final transcript stream. The wake-phrase primitive (subscribe + match strings) AND the live-captions primitive.\n" +
288
+ "- `speak(text: String, config: SpeakConfig = ...)`TTS via the platform engine (Android TextToSpeech, iOS AVSpeechSynthesizer). Audio bytes route over HFP to the glasses speaker. Blocks until done by default; for parallel listen-and-speak run it in its own coroutine/Task.\n" +
289
+ "- `cancelSpeak()` interrupt the currently-speaking utterance immediately (the barge-in primitive). Fire-and-forget; idempotent. See the `barge_in_speak` getCodeExample for the canonical TaskGroup / structured concurrency pattern.\n" +
290
+ "- `earcon(sound: EarconSound, volume: Double = 1.0)` — short canned tone (START / COMPLETE / ERROR / NOTIFY).\n\n" +
291
+ "**Push-to-talk helper (extension on `AudioClient`):**\n" +
292
+ " Android: `fun AudioClient.startPushToTalk(scope: CoroutineScope, config: TranscriptionConfig = TranscriptionConfig(language = \"en-US\", partial = false)): PushToTalkSession`\n" +
293
+ " iOS: `func startPushToTalk(config: TranscriptionConfig = ...) -> PushToTalkSession`\n" +
294
+ " Call on press; subscribes to `transcriptions(config)` and accumulates `Transcript.Final` segments. Call `stopAndFlush()` on release returns the joined text of every Final transcript received during the session. Idempotent; cancellation-safe.\n\n" +
295
+ "── glasses.runtime ── (RuntimeClient)\n\n" +
296
+ "- `events` `Flow<RuntimeEvent>` (Android) / `AsyncStream<RuntimeEvent>` (iOS). Capability-layer events (toggle changed, connection state changed, capability stream lifecycle, log entries, unrecognized utterances). Tap for in-app diagnostics or to mirror to your own logger / crash reporter.\n" +
297
+ "- `snapshotEvents()`drain the on-device 512-entry ring buffer (suspend / async). Useful for one-shot export at app exit.\n\n" +
298
+ "*Note: pre-pivot RuntimeClient also exposed `loadSpec`, `currentSpec`, `fireTrigger`, and rich `RuntimeEvent` variants like `TriggerFired`/`BlockStarted`/`AppCallbackInvoked`/`FlowCompleted`. Those are gone the library no longer interprets a spec, and trigger/block/callback events are no longer emitted. The remaining `RuntimeEvent` variants are `ToggleChanged`, `CoexistenceWarning`, `Log`, `UnrecognizedUtterance` plus capability-stream lifecycle events.*\n\n" +
299
+ "── glasses.toggles ── (ToggleClient)\n\n" +
300
+ "- `state` `StateFlow<Toggles>` (Android) / `ObservableState<Toggles>` (iOS). Observe all 8 toggles' current values; see `searchDocs(topic: 'toggles')` for the full list.\n" +
301
+ "- `update { transform }`closure form for setting toggles atomically. The canonical API on both platforms.\n" +
302
+ "- `put(key: String, value: JSONValue, source: ToggleSource = ToggleSource.UI)` Android only convenience; rarely needed in customer code (the user owns toggle values via the connection page UI).\n" +
303
+ "- `get(key: String): JSONValue?`read one toggle without subscribing.\n\n" +
304
+ "── glasses.debug ── (DebugClient)\n\n" +
305
+ "- `injectVoiceCommand(transcript, confidence)`bypass STT, inject a transcript directly. Useful for unit tests + the simulator's bypass-button path.\n" +
306
+ "- `injectHardwareAlert(alert)` fake a thermal / hinges / audio-route event.\n" +
307
+ "- `setConnectionState(state)` — fake a transport state for testing.\n" +
308
+ "- `clearAllMocks()` — reset all debug overrides.\n" +
309
+ "- `setSimulatorSession(url)` — programmatically swap simulator URL at runtime.\n\n" +
310
+ "── glasses.telemetry ── (TelemetryClient)\n\n" +
311
+ "- `consent: Boolean` — read current consent state.\n" +
312
+ "- `setUserSegment(segment)` — tag the install for cohort analysis.\n" +
313
+ "- `trackEvent(name, properties)` — emit a custom telemetry event (subject to consent).\n\n" +
314
+ "── Type guide ──\n\n" +
315
+ "- `JSONValue` is `kotlinx.serialization.json.JsonElement` (Android) / sealed enum (iOS). Build via `JsonPrimitive(...)` / `buildJsonObject { put(\"k\", v) }` on Android; `.string(...)` / `.bool(...)` / `.object_(...)` on iOS. Pattern-match to read on both.\n" +
316
+ "- `Transcript` is a sealed type — Kotlin uses `data class Partial(...)` / `Final(...)`; iOS uses `case partial(text:, confidence:)` / `final(text:, startMs:, endMs:, confidence:)`. Pattern-match to extract the text + finality.\n" +
317
+ "- `Photo.uri` (Android, String) vs `Photo.url` (iOS, URL) — note the platform difference.\n" +
318
+ "- `GlassesState.active(ActiveState)` carries an associated value on iOS; pattern-bind to read it. Kotlin uses `is GlassesState.Active` with property access.\n\n" +
319
+ "── Customer-side handler structure ──\n\n" +
320
+ "Your code subscribes to these primitives from a Handler class (your own naming) instantiated in your Application / @main App. There is no library-side registration step; nothing to `register`. `searchDocs(topic: 'custom_handlers')` shows the canonical handler shape; `getCodeExample(pattern)` returns full compositions for the 6 common voice-glasses use cases.",
321
+ keywords: ["library", "api", "extentosglasses", "config", "subclient", "create", "ExtentosResult", "transcriptions", "audioClient", "cameraClient", "togglesClient", "debugClient", "telemetryClient", "connectionClient", "loadBitmap", "loadImage", "loadBase64", "loadBytes", "Photos", "Videos", "mediaTypeFromUri", "startPushToTalk", "PushToTalkSession", "Photo", "Bitmap", "PlatformImage", "UIImage", "vision", "anthropic", "openai", "cancelSpeak", "barge_in"],
322
+ relatedTools: ["generateConnectionModule", "getCapabilityGuide", "getCodeExample"],
323
+ },
324
+ {
325
+ topic: "custom_handlers",
326
+ title: "Custom Handlers",
327
+ content: "Post pure-SDK, there's no spec runtime and no `app_callback` action — handlers are just normal Kotlin/Swift code you write. You subscribe to SDK Flows (transcripts, video frames, hardware events) and call SDK methods (`glasses.audio.speak`, `glasses.camera.capturePhoto`, `glasses.audio.recordDiscrete`) from your handler. No special registration, no dispatch table, no AppCall envelope — your class IS the handler.\n\n" +
328
+ "── Canonical handler shape (Android, Kotlin) ──\n\n" +
329
+ "Write a class that takes the `ExtentosGlasses` reference and your domain dependencies. Start it from your Application or a ViewModel:\n\n" +
330
+ " class CoachHandler(\n" +
331
+ " private val glasses: ExtentosGlasses,\n" +
332
+ " private val anthropic: AnthropicClient,\n" +
333
+ " private val repo: WorkoutRepository,\n" +
334
+ " ) {\n" +
335
+ " private val scope = CoroutineScope(SupervisorJob() + Dispatchers.Default)\n\n" +
336
+ " fun start() = scope.launch {\n" +
337
+ " // Wake phrase: subscribe to transcripts, match strings yourself.\n" +
338
+ " glasses.audio.transcriptions().collect { t ->\n" +
339
+ " if (t.isFinal && \"ask my coach\" in t.text.lowercase()) {\n" +
340
+ " glasses.audio.speak(\"What would you like to know?\")\n" +
341
+ " val q = glasses.audio.recordDiscrete(\n" +
342
+ " AudioRecordConfig(silenceTimeoutSeconds = 3.0)\n" +
343
+ " ).getOrNull() ?: return@collect\n" +
344
+ " val answer = anthropic.ask(q.transcript, repo.history())\n" +
345
+ " glasses.audio.speak(answer)\n" +
346
+ " }\n" +
347
+ " }\n" +
348
+ " }\n\n" +
349
+ " fun stop() = scope.cancel()\n" +
861
350
  " }\n\n" +
862
- "iOS handler shape (Swift):\n" +
863
- " private func handleSaveClip(_ call: AppCall) async -> AppCallbackResult {\n" +
864
- " // USER-CODE-START:save_clip\n" +
865
- " guard let uri = call.input.string(at: \"uri\") else {\n" +
866
- " return .error(code: \"missing_uri\", message: \"input.uri was null\")\n" +
867
- " }\n" +
868
- " ...\n" +
869
- " // USER-CODE-END:save_clip\n" +
351
+ "── Canonical handler shape (iOS, Swift) ──\n\n" +
352
+ " actor CoachHandler {\n" +
353
+ " private let glasses: any ExtentosGlasses\n" +
354
+ " private let anthropic: AnthropicClient\n" +
355
+ " private let repo: WorkoutRepository\n" +
356
+ " private var task: Task<Void, Never>?\n\n" +
357
+ " func start() {\n" +
358
+ " task = Task {\n" +
359
+ " for await t in glasses.audio.transcriptions() {\n" +
360
+ " guard t.isFinal,\n" +
361
+ " t.text.lowercased().contains(\"ask my coach\")\n" +
362
+ " else { continue }\n" +
363
+ " try? await glasses.audio.speak(\"What would you like to know?\")\n" +
364
+ " let q = try? await glasses.audio.recordDiscrete(\n" +
365
+ " AudioRecordConfig(silenceTimeoutSeconds: 3.0)\n" +
366
+ " )\n" +
367
+ " guard let q = q else { continue }\n" +
368
+ " let answer = try? await anthropic.ask(q.transcript, history: repo.history())\n" +
369
+ " if let answer { try? await glasses.audio.speak(answer) }\n" +
370
+ " }\n" +
371
+ " }\n" +
372
+ " }\n\n" +
373
+ " func stop() { task?.cancel() }\n" +
870
374
  " }\n\n" +
871
- "── Reading structured input ──\n" +
872
- "`call.input` is always a JsonObject (or its iOS equivalent). Read fields with kotlinx.serialization's accessors:\n" +
873
- " - `call.input.jsonObject[\"key\"]?.jsonPrimitive?.contentOrNull` String (null if missing or non-string)\n" +
874
- " - `call.input.jsonObject[\"key\"]?.jsonPrimitive?.intOrNull`Int (works on number primitives)\n" +
875
- " - `call.input.jsonObject[\"key\"]?.jsonPrimitive?.booleanOrNull` — Bool\n" +
876
- " - `call.input.jsonObject[\"key\"]?.jsonObject` nested object (unwrap recursively)\n" +
877
- " - `call.input.jsonObject[\"key\"]?.jsonArray` array\n" +
878
- "iOS exposes the same shape via `call.input.string(at:)`, `call.input.int(at:)`, etc. Template substitution from the spec (`input: { uri: \"{{photo.uri}}\" }`) resolves before the handler is invoked, so the handler only sees concrete values — never `{{...}}` literals.\n\n" +
879
- "── Reading Application Context (Android) ──\n" +
880
- "**Android handlers can access the host's Application `Context` via `call.applicationContext` (library 1.1.16-pair+).** Non-null whenever the host app passed `applicationContext = this` from `Application.onCreate` (the canonical bootstrap flow that `generateConnectionModule` emits). Use it for any handler that needs Android system access:\n" +
881
- " - `call.applicationContext?.filesDir` — save-to-gallery / persist-to-disk paths. Canonical for the `voice_command capture_video app_callback save_clip` flow that uses `Videos.copyToFile(uri, dst)`.\n" +
882
- " - `call.applicationContext?.contentResolver` — MediaStore inserts so saved clips appear in the system gallery app.\n" +
883
- " - `call.applicationContext?.getSystemService(...)` system services (PowerManager, NotificationManager, etc.).\n" +
884
- " - `androidx.preference.PreferenceManager.getDefaultSharedPreferences(call.applicationContext)` shared preferences.\n" +
885
- "Always null-check: a few minority configurations build the library with `applicationContext = null` (browser-sim-only test setups, headless harnesses). Return `AppCallbackResult.Error(\"no_context\", \"...\")` if you need it but it's null.\n" +
886
- "iOS doesn't have an equivalent Apple's runtime exposes app-level singletons globally (`FileManager.default`, `UserDefaults.standard`, `Bundle.main`, `NSPersistentContainer.viewContext`), so iOS handlers don't need a Context-bridge field on `AppCall`. Use those singletons directly.\n\n" +
887
- "── Returning results ──\n" +
888
- "**AppCallbackResult.Success(value)** populates the flow's `save_as` variable with `value` (a JsonElement). Subsequent actions can reference it via `{{save_as}}` or `{{save_as.field}}`. Use this for normal completion AND for handled-error-as-data cases (e.g. `Success(buildJsonObject { put(\"ok\", false); put(\"reason\", \"empty\") })`) when the spec branches on the result.\n\n" +
889
- "**AppCallbackResult.Error(code, message)** short-circuits the flow. The trigger's `on_failure` policy decides whether subsequent actions run (`continue`) or the entire flow aborts (`abort`, default for app_callback errors). Use this for unhandled errors where the rest of the flow doesn't make sense (e.g. network down, auth failed, decode_failed).\n\n" +
890
- "Rule of thumb: if a downstream `branch` action would inspect the result's `ok` field, return `Success({ok: false, ...})`. If the flow has no useful path forward without this handler succeeding, return `Error(code, message)`.\n\n" +
891
- "── Timeouts ──\n" +
892
- "Handlers have a default 10-second timeout per call. Override per action via `timeout_ms` (e.g. `{ type: \"app_callback\", handler: \"slow_ai\", timeout_ms: 30000, input: {...} }`). On timeout the runtime emits `callback.timeout` to the event log and the trigger's `on_failure` policy fires. Long-running work (AI generation, file IO over a slow network) should set `timeout_ms` generously this is a hard kill, not a graceful cancel.\n\n" +
893
- "── Performance + threading ──\n" +
894
- "**Android:** handlers are invoked from a coroutine on `Dispatchers.Default`. Suspend functions can call `withContext(Dispatchers.IO)` for blocking IO the library does NOT pin the handler to the main thread.\n" +
895
- "**iOS:** handlers are `async` functions invoked from a Task at `.userInitiated` priority. Use `Task.detached(priority: .utility)` for CPU-heavy work.\n\n" +
896
- "Heavy workloads (LLM calls, large file decodes) run inline in the handler — that's fine, the runtime expects it. Don't `launch` background work and return immediately; the result will be ignored. If you need fire-and-forget, do it after returning Success.\n\n" +
897
- "── Multi-handler dispatch ──\n" +
898
- "Each spec handler name (the `handler` field of an `app_callback` action) gets its own private function in ExtentosCallbacks.kt / .swift. The generated `handle(call:)` entry point dispatches by `call.handler`:\n" +
899
- " override suspend fun handle(call: AppCall): AppCallbackResult = when (call.handler) {\n" +
900
- " \"save_clip\" -> handleSaveClip(call)\n" +
901
- " \"describe_scene\" -> handleDescribeScene(call)\n" +
902
- " \"send_to_slack\" -> handleSendToSlack(call)\n" +
903
- " else -> AppCallbackResult.Error(code = \"unknown_handler\", message = \"no handler for \\${call.handler}\")\n" +
904
- " }\n" +
905
- "Adding a new handler: add the action to the spec via updateSpec, then re-run generateConsumer to emit the new dispatch arm + USER-CODE block. Existing USER-CODE bodies are preserved (server-side merge if you pass projectPath, agent-side otherwise — see file_actions topic).\n\n" +
906
- "── Common mistakes ──\n" +
907
- "- Returning `Success(JsonNull)` to indicate failure — confuses downstream branches. Use `Error(code, message)` or `Success({ok: false})` consistently.\n" +
908
- "- Reading `call.input` as a Map<String, String> directly — JSON values are JsonElement; cast or unwrap explicitly.\n" +
909
- "- Doing async work via `launch { ... }` and returning Success immediately — the launched work outlives the handler and its result is discarded. Do all work synchronously (within the suspend function) before returning.\n" +
910
- "- Setting `timeout_ms: 1000` for an LLM call — anthropic / openai p95 is 3-8s; pick `timeout_ms: 15000` or higher for streaming, more for non-streaming.\n" +
911
- "- Forgetting to register the handler — generateConsumer emits the registration call inside the bootstrap's EXTENTOS-GENERATED-START:dispatch region; if you copy-paste handlers without re-running generateConsumer, registration drifts and handlers never fire.",
912
- keywords: ["app_callback", "callback", "handler", "register", "appcall", "timeout", "byok", "user-code", "dispatch", "appcallbackresult", "input", "json", "threading", "dispatcher"],
913
- relatedTools: ["generateConsumer", "getCredentialGuide", "updateSpec"],
914
- },
915
- {
916
- topic: "voice_proxy",
917
- title: "Simulator Voice Proxy",
918
- content: "Simulator-time STT + TTS runs on Extentos infrastructure — automatic inside BrowserSimTransport, no developer code. Rides on the simulator event meter (anonymous or account tier). Production voice is phone-native: Android SpeechRecognizer + TextToSpeech; iOS SFSpeechRecognizer + AVSpeechSynthesizer + AVAudioSession. Zero Extentos runtime path in production.",
919
- keywords: ["voice_proxy", "stt", "tts", "simulator", "meter", "proxy", "production voice"],
375
+ "── Lifecycle ──\n\n" +
376
+ "Start the handler from your Application onCreate (Android) or AppDelegate / SceneDelegate (iOS), passing in the same `glasses` instance you used for `ExtentosConnectionPage`. Cancel on app teardown. Multiple handlers can coexist — each owns its own scope/task and listens to its own subset of SDK flows.\n\n" +
377
+ "── Reading Application Context (Android) ──\n\n" +
378
+ "Your handler class can hold whatever it needs at construction including the host's `Context`:\n\n" +
379
+ " class SavePhotoHandler(\n" +
380
+ " private val glasses: ExtentosGlasses,\n" +
381
+ " private val context: Context,\n" +
382
+ " ) { ... }\n\n" +
383
+ "Use `context.filesDir` for save-to-gallery paths, `context.contentResolver` for MediaStore inserts, `context.getSystemService(...)` for system services. No special bridge field — it's regular Android code.\n\n" +
384
+ "── Returning results ──\n\n" +
385
+ "There's nothing to \"return\" to — your handler runs in your scope; it does what it wants. The library doesn't poll your handler for results. If you need to surface state to your UI, use a `StateFlow` / `@Published` property or your existing state management; if you need to chain SDK operations, call them sequentially in the handler.\n\n" +
386
+ "── Errors ──\n\n" +
387
+ "SDK methods return `ExtentosResult<T, E>` (Kotlin) or throw (Swift). Handle them with normal language patterns:\n\n" +
388
+ " Kotlin: `val photo = glasses.camera.capturePhoto().getOrNull() ?: return`\n" +
389
+ " Swift: `guard let photo = try? await glasses.camera.capturePhoto() else { return }`\n\n" +
390
+ "No timeout systemwrap calls in `withTimeoutOrNull` / `Task.withTimeout` if your domain needs a deadline. No global retry policy — write your own. Everything that was implicit in the spec runtime (timeouts, branch-on-result, on_failure) is now explicit in your code.\n\n" +
391
+ "── Threading ──\n\n" +
392
+ "**Android:** start handlers on a `CoroutineScope(SupervisorJob() + Dispatchers.Default)`. Use `withContext(Dispatchers.IO)` for blocking IO. SDK methods are `suspend` and safe to call from any dispatcher.\n\n" +
393
+ "**iOS:** start handlers in a `Task` or actor. SDK methods are `async`; the runtime threads them onto the cooperative pool.\n\n" +
394
+ "── Common mistakes ──\n\n" +
395
+ "- Subscribing to `glasses.audio.transcriptions()` multiple times from different scopes — each subscription is independent and they don't share state. Subscribe once per use case.\n" +
396
+ "- Forgetting to call `scope.cancel()` / `task?.cancel()` on teardown the handler keeps running forever, holding `glasses` alive. Tie lifecycle to your Application / scene.\n" +
397
+ "- Doing fire-and-forget background work inside a `.collect { ... }` block without launching a new coroutine — blocks the collector. Wrap in `launch { ... }` if you want concurrent processing of incoming events.\n" +
398
+ "- Importing the BuildConfig pattern for API keys — kotlinc inlines `BuildConfig.MY_KEY` at compile time and stale keys silently survive rotations. Use `resValue` + `context.getString(R.string.my_key)` instead. See getCredentialGuide for the canonical pattern.",
399
+ keywords: ["handler", "custom_handlers", "kotlin", "swift", "sdk", "subscribe", "flow", "scope", "lifecycle", "byok"],
400
+ relatedTools: ["generateConnectionModule", "getCredentialGuide"],
401
+ },
402
+ {
403
+ topic: "voice_integration",
404
+ title: "Voice Integration",
405
+ content: "Voice (STT + TTS) is bridged through the SDK: `glasses.audio.transcriptions()` for continuous transcripts, `glasses.audio.recordDiscrete()` for one-shot capture-with-silence-VAD, `glasses.audio.speak()` for TTS. Same API in browser-sim and on real hardware — only the transport underneath differs.\n\n" +
406
+ "**Simulator transport** routes audio through the browser tab's Web Speech API → Whisper fallback (STT) and Web SpeechSynthesisUtterance (TTS). Runs on Extentos infrastructure; no developer code beyond calling the SDK. Rides the simulator event meter (anonymous or account tier).\n\n" +
407
+ "**Production transport** routes audio through the phone's native engines: Android `SpeechRecognizer` + `TextToSpeech`; iOS `SFSpeechRecognizer` + `AVSpeechSynthesizer` + `AVAudioSession`. Zero Extentos runtime path — the library opens the phone APIs and surfaces results as the same `Transcript` / `AudioRecording` types your handler already consumes.\n\n" +
408
+ "Customer doesn't write transport-switching code — the SDK handles routing based on the active connection.",
409
+ keywords: ["voice", "stt", "tts", "transcriptions", "speak", "recordDiscrete", "voice_integration", "simulator", "proxy", "production voice"],
920
410
  relatedTools: ["createSimulatorSession", "getCredentialGuide"],
921
411
  },
922
412
  {
923
413
  topic: "custom_extensions",
924
- title: "Custom Extensions",
925
- content: "Escape hatches for capabilities beyond the spec: glasses.extensions.registerCustomBlock(name, impl) adds a block type; glasses.extensions.registerCustomTrigger(name, impl) adds a trigger type. Custom registrations live outside the generated /extentos directory in developer-owned code. Spec references the custom name like any built-in type.",
926
- keywords: ["custom", "extension", "registerCustomBlock", "registerCustomTrigger", "escape hatch"],
927
- relatedTools: ["generateConsumer"],
414
+ title: "Going Beyond Built-in Capabilities",
415
+ content: "Post pure-SDK, there is no `registerCustomBlock` / `registerCustomTrigger` escape hatch — there is no spec runtime that would dispatch to them. The library exposes the device's capabilities directly; anything you want to do beyond what `getPlatformInfo` lists, you build in your own handler code using whatever phone / cloud APIs you already use.\n\n" +
416
+ "**Examples of 'custom' extensions, post-pivot:**\n" +
417
+ "- Custom on-device STT — subscribe to `glasses.audio.audioChunks(...)` and feed the bytes into your model (e.g. Whisper.cpp, on-device CoreML). The library's built-in transcriptions stream is one consumer; nothing stops you from being another.\n" +
418
+ "- Custom vision model — subscribe to `glasses.camera.videoFrames(...)` or call `glasses.camera.capturePhoto()` and ship the bytes to your inference path. Nothing about the SDK is opinionated about where vision happens.\n" +
419
+ "- Custom TTS provider (e.g. ElevenLabs) — fetch the audio bytes yourself, then play them through the phone speaker OR fall back to `glasses.audio.speak(...)` for plain text. Direct routing of arbitrary audio bytes to the glasses speaker is a future SDK feature; for now, premium-voice TTS plays from the phone.\n" +
420
+ "- New hardware-event listeners — the library surfaces a fixed set of hardware alerts (thermal, hinges, audio route, call state, lifecycle). Anything not on the list isn't reachable from the SDK; ask for it in the Extentos repo / feedback channel.\n\n" +
421
+ "The SDK keeps a tight, vendor-validated surface intentionally — every primitive is something we can wire to multiple glasses vendors. Adding a primitive here means we commit to maintaining it across the line; custom one-off integrations belong in your handler, not the library.\n\n" +
422
+ "If you find yourself wishing for a missing primitive, see the feedback flow in `searchDocs(topic: 'getting_started')` — that's the path to getting it into the library.",
423
+ keywords: ["custom", "extension", "byo", "escape hatch", "missing capability", "stt", "tts", "vision", "audio chunks", "video frames"],
424
+ relatedTools: ["getPlatformInfo"],
928
425
  },
929
426
  {
930
427
  topic: "simulator_local_mode",
@@ -936,69 +433,84 @@ export const DOC_INDEX = [
936
433
  {
937
434
  topic: "simulator_browser_mode",
938
435
  title: "Simulator Browser Mode",
939
- content: "Browser mode provisions a web UI via createSimulatorSession — agent receives sessionUrl, MCP attempts OS-native browser auto-open (best-effort), browser loads the spec and drives simulated hardware. WebSocket hub coordinates app ↔ browser ↔ optional observer. Supports injection (forced events) and replay of recorded sessions. Response includes `sessionUrl` + `autoOpenAttempted: boolean` + `autoOpenPlatform`. When `autoOpenAttempted: true`, the URL was sent to the developer's default browser — surface as a confirmation message (e.g., 'the simulator should be open in your browser'). When `autoOpenAttempted: false` (EXTENTOS_NO_AUTO_OPEN=1, headless/SSH/sandboxed environment, per-call `autoOpenBrowser: false`, or no compatible OS launcher), explicitly print the `sessionUrl` to the developer with copy-paste instructions. The URL is the guaranteed delivery channel — auto-open is a best-effort optimization.\n\n" +
436
+ content: "Browser mode provisions a web UI via createSimulatorSession — agent receives `sessionUrl`, MCP attempts OS-native browser auto-open (best-effort), the browser loads the simulator and drives a Meta-DAT-shaped transport stub. WebSocket hub coordinates app ↔ browser ↔ optional observer. Supports injecting events (transcripts, photo captures, hardware alerts) and replaying recorded sessions.\n\n" +
437
+ "Response includes `sessionUrl` + `autoOpenAttempted: boolean` + `autoOpenPlatform`. When `autoOpenAttempted: true`, the URL was sent to the dev's default browser — surface as a confirmation ('the simulator should be open in your browser'). When `autoOpenAttempted: false` (EXTENTOS_NO_AUTO_OPEN=1, headless/SSH/sandboxed environment, per-call `autoOpenBrowser: false`, or no compatible OS launcher), print the `sessionUrl` for the dev to copy. The URL is the guaranteed delivery channel — auto-open is a best-effort optimization.\n\n" +
940
438
  "── Persistent-sim model (get-or-create) ──\n\n" +
941
439
  "**`createSimulatorSession` is get-or-create, NOT mint-every-time.** A saved sim is keyed on `(account, projectInstallId, platform)` where `projectInstallId` derives from `extentos.manifest.json`'s `appPackage` (with `settings.gradle.kts` `rootProject.name` as fallback for unlabeled Android projects). Two response shapes:\n" +
942
440
  "- `status: \"active\"` — first call from this project; new sim minted.\n" +
943
- "- `status: \"resumed\"` — saved sim already existed; same `sessionUrl`, no rebuild needed for URL-bake users. Spec is live-pushed if it changed since last resume.\n\n" +
441
+ "- `status: \"resumed\"` — saved sim already existed; same `sessionUrl`, no rebuild needed for URL-bake users.\n\n" +
944
442
  "Saved sims live on the user's dashboard at extentos.com/s. They have NO 24-hour absolute lifetime. After 30 minutes of inactivity (no role connected) a sim flips to `idle` — still listed on the dashboard, still resumable; reconnecting any role transitions it back to `waiting`/`active`. The 90-day cleanup sweep eventually purges abandoned `idle` rows (with a 7-day undo grace via `deleted` state).\n\n" +
945
443
  "**Multi-platform note:** Android and iOS versions of the same app share ONE project on the dashboard when the Android `applicationId` matches the iOS bundle identifier (the standard reverse-DNS convention). The platform-keyed slot still keeps them as two separate saved sims (one per platform) — they just appear under one card. When identifiers diverge, the dashboard's Settings → Danger Zone → Merge consolidates them. Full details: searchDocs(topic: 'multi_platform_projects').\n\n" +
946
- "── Iteration model what changes when ──\n\n" +
947
- "Three things change independently during a typical dev loop, each at a different rate.\n\n" +
444
+ "── Iteration model: two things change at different rates ──\n\n" +
948
445
  "**1. THE SESSION** (the saved sim row + WebSocket hub)\n" +
949
446
  "- Persists indefinitely on the user's account. One per `(account, project, platform)`. No mint-per-iteration.\n" +
950
447
  "- Calling `createSimulatorSession` again with the same project returns the saved sim (`status: \"resumed\"`) — no new sessionId, no rebuild, no URL change.\n" +
951
448
  "- Need a clean ID/state? Call `createSimulatorSession({ resetFresh: true })` OR click \"Reset\" on the dashboard card. The previous sim is archived; a new sim is minted with the same project key. Live device sockets receive `session_moved` and re-bind automatically; URL-bake apps need one rebuild after force-fresh.\n" +
952
449
  "- Need to throw a sim away entirely? Click \"Delete\" on the dashboard card. 7-day undo grace.\n\n" +
953
- "**2. THE SPEC** (`extentos.spec.json` content — blocks, streams, triggers, actions)\n" +
954
- "- Live-pushed automatically. Every successful `updateSpec` call pushes the new spec over the existing WebSocket to the running app via the `spec_updated` frame. The library's `runtime.loadSpec()` applies it in-place; the app immediately knows about new triggers/blocks without a restart.\n" +
955
- "- Response includes `liveSessionUpdate: { status: \"applied\", appNotified: true }` when this fires. **No rebuild, no reinstall, no remint.** Confirm via `getEventLog` (look for `runtime:SpecReloaded`).\n" +
956
- "- Works for: changing trigger phrases, adding/removing/updating triggers, changing action sequences, swapping handler names anything spec-shaped. The dev keeps testing immediately after the call returns.\n\n" +
957
- "**3. APP CODE** (Kotlin/Swift handler implementations, custom UI, host-side logic)\n" +
958
- "- Requires rebuild + reinstall. New bytecode → new APK → adb install. Spec hot-push doesn't apply — this is binary code, not data.\n" +
959
- "- The library re-attaches to the saved sim via auto-bind on cold launch (or BuildConfig URL bake), so the SAME sim picks up where it left off. **No remint needed for code changes either.**\n" +
960
- "- If you (the agent) can rebuild + reinstall the app yourself: do it, the dev never sees a pause. If you can't: hand the rebuild + reinstall step to the developer with explicit commands (e.g., `./gradlew :app:assembleDebug && adb install -r app/build/outputs/apk/debug/app-debug.apk && adb shell am start -n <pkg>/.MainActivity`). Same auto-attach behavior on the other end — the dev doesn't need to mint a new session, just rebuild.\n\n" +
450
+ "**2. APP CODE** (Kotlin/Swift handler implementations, custom UI, host-side logic)\n" +
451
+ "- Requires rebuild + reinstall. New bytecode new APK adb install this is binary code, not data.\n" +
452
+ "- The library re-attaches to the saved sim via auto-bind on cold launch (or via the baked BuildConfig URL), so the SAME sim picks up where it left off. **No remint needed for code changes.**\n" +
453
+ "- If you (the agent) can rebuild + reinstall the app yourself: do it, the dev never sees a pause. If you can't: hand the rebuild + reinstall step to the developer with explicit commands (e.g., `./gradlew :app:assembleDebug && adb install -r app/build/outputs/apk/debug/app-debug.apk && adb shell am start -n <pkg>/.MainActivity`). Same auto-attach behaviour on the other end — the dev doesn't need to mint a new session, just rebuild.\n\n" +
454
+ "**There is no third \"spec\" axis post-pivot.** Pre-pivot the library interpreted a spec/DSL that could be hot-pushed over the WebSocket as a separate iteration loop. Post-pivot the handler code IS the only authoring surface; behaviour changes mean rebuild + reinstall. The simulator picks up the new APK / IPA automatically thanks to auto-bind, but there is no live-push of behaviour.\n\n" +
961
455
  "── How the app finds the session URL on launch ──\n\n" +
962
456
  "Three mechanisms in priority order:\n\n" +
963
- "1. **Auto-bind via local MCP probe** (preferred — zero URL handling). At startup, the library probes its host MCP server's local-bridge endpoint and learns the `mcpInstallId`. It opens `/ws/pending` announcing that ID + the device's `device_install_id`. When `createSimulatorSession` runs (or when the saved sim is resumed), the backend matches by `(mcpInstallId, deviceInstallId)` and binds the pending socket to the right saved sim. The URL never touches your binary, and multi-project devs route correctly.\n" +
964
- "2. **URL-bake fallback** (`BuildConfig.EXTENTOS_SESSION_URL`). Set when the MCP probe path doesn't reach (cellular phone, cloud-hosted agent, sandboxed environments where the emulator can't reach the host MCP). `createSimulatorSession` returns `androidBuildConfigPatch` with the snippet to paste into `app/build.gradle.kts`; rebuild the app once after pasting. The baked URL is stable across resumes — only force-fresh requires another rebuild.\n" +
457
+ "1. **Auto-bind via local MCP probe** (preferred — zero URL handling). At startup the library probes its host MCP server's local-bridge endpoint (`http://localhost:31337/whoami` via `10.0.2.2` from the Android emulator, or `adb reverse` from USB devices) and learns the `mcpInstallId`. It opens `/ws/pending` announcing that ID + the device's `device_install_id`. When `createSimulatorSession` runs (or when the saved sim is resumed), the backend matches by `(mcpInstallId, deviceInstallId)` and binds the pending socket to the right saved sim. The URL never touches your binary, and multi-project devs route correctly.\n" +
458
+ "2. **URL-bake fallback** (`BuildConfig.EXTENTOS_SESSION_URL` on Android, `extentos.session.plist` on iOS). Set when the MCP probe path doesn't reach (cellular phone, cloud-hosted agent, sandboxed environments where the emulator can't reach the host MCP). `createSimulatorSession` returns the snippet to paste; rebuild once after pasting. The baked URL is stable across resumes — only force-fresh requires another rebuild.\n" +
965
459
  "3. **Typed pairing code** (manual). User types a 5-char code in the simulator UI from another machine. Last resort.\n\n" +
966
460
  "── Practical recipes ──\n\n" +
967
- "- **Spec-only change** (renamed phrase, added trigger, tweaked action): one `updateSpec` call. Done. No mint, no rebuild.\n" +
968
- "- **Handler logic change** (new business logic in your callback): rebuild + reinstall the app. Same saved sim continues.\n" +
969
- "- **New handler added to spec**: one `updateSpec` (live-pushes the spec) + one rebuild + reinstall (to ship the new dispatch entry). Same saved sim continues.\n" +
461
+ "- **Handler logic change** (new business logic in your handler): rebuild + reinstall the app. Same saved sim continues.\n" +
970
462
  "- **App force-stopped or emulator restarted**: just relaunch the app. The library re-runs the MCP probe and the backend re-binds it to the saved sim. No mint, no agent intervention.\n" +
971
463
  "- **Resuming work the next morning** (sim went `idle` overnight): just launch the app. Auto-bind reconnects, sim transitions `idle → waiting → active` on its own. Dev never thinks about it.\n" +
972
464
  "- **Want a clean state slate / rotate the sessionId**: `createSimulatorSession({ resetFresh: true })` OR click \"Reset\" on the dashboard. Backend archives the existing sim, mints a replacement with the same project key, pushes `session_moved` to live sockets. URL-bake apps rebuild once after this.\n" +
973
- "- **Backend restarted while app was offline**: next reconnect to the baked URL works — Phase 0 backend recovery clears orphan-roles instead of expiring the row, and the device's normal `session_init` handshake re-attaches as the `app` role on the alive saved sim. No `session_unknown`, no manual remint.",
974
- keywords: ["browser", "session", "websocket", "injection", "replay", "simulator", "url", "auto-open", "autoOpenAttempted", "iteration", "rebuild", "live-push", "spec_updated", "auto-bind", "url-bake", "idle", "resumed", "resetFresh", "get-or-create", "dashboard", "persistent", "dev loop"],
975
- relatedTools: ["createSimulatorSession", "getSimulatorStatus", "getEventLog", "updateSpec"],
465
+ "- **Backend restarted while app was offline**: next reconnect to the baked URL works — backend recovery clears orphan-roles instead of expiring the row, and the device's normal `session_init` handshake re-attaches as the `app` role on the alive saved sim. No `session_unknown`, no manual remint.",
466
+ keywords: ["browser", "session", "websocket", "injection", "replay", "simulator", "url", "auto-open", "autoOpenAttempted", "iteration", "rebuild", "auto-bind", "url-bake", "idle", "resumed", "resetFresh", "get-or-create", "dashboard", "persistent", "dev loop"],
467
+ relatedTools: ["createSimulatorSession", "getSimulatorStatus", "getEventLog"],
976
468
  },
977
469
  {
978
470
  topic: "event_log_schema",
979
471
  title: "Event Log Schema",
980
- content: "Event types across 7 layers (transport, trigger, block, callback, toggle, stream, system). flowId correlates events inside one trigger execution. type uses dot notation: trigger.fired, block.completed, callback.timeout, stream.started, spec.loaded, account.meter_exhausted, simulator.session_expired. 10 getEventLog filter values: all, errors, transport, triggers, blocks, callbacks, toggles, streams, system, spec. errors filter is cross-layer severity-based; spec filter is type-prefixed.",
981
- keywords: ["event", "log", "flowid", "layer", "filter", "schema", "29", "trigger.fired"],
472
+ content: "Every event has a `layer`, a `type` (dot notation), a `severity` (info / warn / error), and a `message` + optional `details` object. The layer is the canonical filter axis in `getEventLog({ filter })` pass the layer name to scope the trace.\n\n" +
473
+ "── Layers ──\n\n" +
474
+ "**transport** — the WebSocket / DAT-shaped link between the app, the simulator, and the backend. Connection open/close, frame relays, role bind/unbind, session lifecycle. Surfaces protocol-level surprises (auth failures, frame schema mismatches, backend evictions).\n" +
475
+ " Examples: `connection.opened`, `connection.closed`, `frame.relayed`, `session.role_attached`, `session.role_evicted`.\n\n" +
476
+ "**audio** — every `glasses.audio.*` op. STT subscriptions starting / stopping, recordDiscrete invocations, speak / cancelSpeak, raw audio chunks.\n" +
477
+ " Examples: `audio.transcriptions_subscribed`, `audio.transcriptions_unsubscribed`, `audio.record_discrete_started`, `audio.record_discrete_completed`, `audio.speak_started`, `audio.speak_cancelled`, `audio.speak_completed`, `audio.chunks_subscribed`.\n\n" +
478
+ "**camera** — every `glasses.camera.*` op. Photo / video capture, frame-stream lifecycle, capture failures (toggle gates, exclusivity conflicts).\n" +
479
+ " Examples: `camera.capture_photo_started`, `camera.capture_photo_completed`, `camera.video_frames_subscribed`, `camera.capture_failed`.\n\n" +
480
+ "**speak** — distinct sub-layer under audio specifically for TTS. Carved out so wake-and-listen flows can filter to just the talk-back side when debugging barge-in.\n" +
481
+ " Examples: `speak.started`, `speak.cancelled`, `speak.completed`.\n\n" +
482
+ "**toggles** — the 8 runtime toggles changing value. Carries `key`, `previous`, `next`, `source` (UI / HOST_APP / SYSTEM).\n" +
483
+ " Examples: `toggle.changed`.\n\n" +
484
+ "**streams** — the lifecycle of capability streams (videoFrames, audioChunks, transcriptions). Started, stopped, error.\n" +
485
+ " Examples: `stream.started`, `stream.stopped`, `stream.error`.\n\n" +
486
+ "**system** — the library + host app's own framing. App startup, library init, account state, meter exhaustion, simulator-session expiry.\n" +
487
+ " Examples: `app.initialized`, `account.meter_exhausted`, `simulator.session_expired`.\n\n" +
488
+ "── Special filter values ──\n\n" +
489
+ "- `all` (default) — every layer.\n" +
490
+ "- `errors` — every event with severity ≥ error across all layers. The first filter to try when investigating a misbehaviour.\n\n" +
491
+ "── Retired event types (do not exist post-pivot) ──\n\n" +
492
+ "Pre-pivot the event log also carried spec-runtime events: `trigger.fired`, `block.executed`, `callback.invoked`, `flow.completed`, `spec.loaded`, `meter.exhausted_reached`. These were retired with the DSL — the library no longer interprets a spec, so the runtime events that described its execution don't exist. The `triggers` / `blocks` / `callbacks` / `spec` filter values are also gone from `getEventLog`'s enum.",
493
+ keywords: ["event", "log", "layer", "filter", "schema", "transport", "audio", "camera", "speak", "toggle", "stream", "system", "severity"],
982
494
  relatedTools: ["getEventLog", "getSimulatorStatus"],
983
495
  },
984
496
  {
985
497
  topic: "permissions",
986
498
  title: "Permissions Derivation",
987
- content: "Android runtime permissions and iOS Info.plist keys derive from spec contents. Photo/video blocks → CAMERA. Audio-recording blocks, mic streams, voice triggers → RECORD_AUDIO + NSMicrophoneUsageDescription + NSSpeechRecognitionUsageDescription. Always added: BLUETOOTH_CONNECT, BLUETOOTH_SCAN, INTERNET, NSBluetoothAlwaysUsageDescription. Streams require FOREGROUND_SERVICE + foregroundServiceType (Android) or UIBackgroundModes (iOS). Meta DAT scopes derive from same surface: glasses.connection always; glasses.camera.{photo,video,stream}, glasses.audio.{record,stream} conditionally.",
988
- keywords: ["permission", "manifest", "info.plist", "camera", "microphone", "bluetooth", "dat", "scope"],
499
+ content: "Android runtime permissions and iOS Info.plist keys derive from the list of SDK capabilities the app uses — feed `getPermissions({ capabilities: [...] })` the names from `getPlatformInfo.features[].name`. `capture_photo` / `capture_video` / `video_frames` / `outgoing_video_stream` → CAMERA. `record_audio` / `audio_chunks` / `transcription_incremental` / `outgoing_audio_stream` / `voice_command` / `wake_word` / `push_to_talk` → RECORD_AUDIO + NSMicrophoneUsageDescription + NSSpeechRecognitionUsageDescription. `location_updated` → ACCESS_FINE_LOCATION + NSLocationWhenInUseUsageDescription. `phone_notification_forwarded` → BIND_NOTIFICATION_LISTENER_SERVICE plus the lib's `<service>` declaration. Always added: BLUETOOTH_CONNECT, BLUETOOTH_SCAN, INTERNET, NSBluetoothAlwaysUsageDescription. Stream capabilities require FOREGROUND_SERVICE + foregroundServiceType (Android) or UIBackgroundModes (iOS). Meta DAT scopes derive from the same surface: glasses.connection always; glasses.camera.{photo,video,stream}, glasses.audio.{record,stream} conditionally.",
500
+ keywords: ["permission", "manifest", "info.plist", "camera", "microphone", "bluetooth", "dat", "scope", "capabilities"],
989
501
  relatedTools: ["getPermissions", "validateIntegration", "getProductionChecklist"],
990
502
  },
991
503
  {
992
504
  topic: "credentials",
993
505
  title: "Credentials",
994
- content: "Simulator voice proxy is automatic — no setup. Meta Developer Center registration is required at first real-hardware test (per-developer): create app, enable Wearables DAT, register package name + signing signature (Android) or bundle ID + Team ID (iOS), copy App ID + Client Token into extentos.manifest.json (Android) or Info.plist MWDAT dict (iOS). BYOK provider keys (Anthropic / OpenAI / Google / etc.) live in the developer's secret storage and get consumed inside app_callback handlers — never stored in the manifest.",
995
- keywords: ["credential", "meta", "developer", "api key", "byok", "dat", "client token", "registration"],
506
+ content: "Simulator voice routing is automatic — no setup. Meta Developer Center registration is required at first real-hardware test (per-developer): create app, enable Wearables DAT, register package name + signing signature (Android) or bundle ID + Team ID (iOS), copy App ID + Client Token into extentos.manifest.json (Android) or Info.plist MWDAT dict (iOS). BYOK provider keys (Anthropic / OpenAI / Google / etc.) live in the developer's secret storage and get consumed inside the customer's handler code — never stored in the Extentos manifest. **Android storage:** use `resValue` + `R.string` lookups, NOT `buildConfigField`. BuildConfig String constants are compile-time-inlined by kotlinc and stale keys silently survive rotations. See `getCredentialGuide` for canonical patterns per provider.",
507
+ keywords: ["credential", "meta", "developer", "api key", "byok", "dat", "client token", "registration", "resValue", "buildConfig"],
996
508
  relatedTools: ["getCredentialGuide", "getProductionChecklist"],
997
509
  },
998
510
  {
999
511
  topic: "production_checklist",
1000
512
  title: "Production Checklist",
1001
- content: "8 personalized categories: Meta Developer Account, DAT Registration, Credential Swap, API Keys (skipped if no handlers), Permissions Audit, Foreground Service (skipped if no streams), Transport Config, Store Listing. Each step is personalized to the specstreams add foreground service / background modes; handlers add API key wiring; simulator URL must be stripped from release builds.",
513
+ content: "9 personalized categories: Meta Developer Account, DAT Registration, Credential Swap, API Keys (skipped if no handlers declared), Permissions Audit, Foreground Service / Background Modes (skipped if no stream capabilities), Transport Config, Real-Hardware Verification (when the app uses voice / video / audio capture), Store Listing. Each step is personalized by the capability list + handler names passed to `getProductionChecklist` stream capabilities add the foreground service / background modes step; handlers add API key wiring; simulator URL must be stripped from release builds.",
1002
514
  keywords: ["production", "checklist", "release", "ship", "store", "ready"],
1003
515
  relatedTools: ["getProductionChecklist", "getCredentialGuide", "getPermissions"],
1004
516
  },
@@ -1007,14 +519,30 @@ export const DOC_INDEX = [
1007
519
  title: "Concurrency Modes",
1008
520
  content: "Trigger-level concurrency: 'restart' cancels any in-flight flow of the same id and runs the new one; 'single' ignores new invocations while one is running; 'queued' enqueues up to N (default 3). Default is 'single' for voice_command, 'restart' for stream-derived triggers. Applies independently per trigger id.",
1009
521
  keywords: ["concurrency", "restart", "single", "queued", "trigger", "in-flight"],
1010
- relatedTools: ["updateSpec"],
522
+ relatedTools: [],
1011
523
  },
1012
524
  {
1013
525
  topic: "manifest_format",
1014
526
  title: "extentos.manifest.json Format",
1015
- content: "Manifest version 1. Top-level: platform ('android'|'ios'), glasses ('meta_rayban'), appPackage, extentos { libraryVersion, specVersion, installedAt, lastUpdatedAt }, spec { path, hash }, generatedFiles[], one of gradle { coordinate, buildFile, additionalArtifacts } or spm { package, packageURL, version, projectFile, additionalProducts }, permissions[], integration { blocks, streams, triggers, handlerNames, togglesOverridden }. Tool-managed; hand-edits trigger drift in validateIntegration.",
1016
- keywords: ["manifest", "json", "extentos.manifest", "spec.hash", "generatedfiles", "gradle", "spm"],
1017
- relatedTools: ["inspectIntegration", "validateIntegration"],
527
+ content: "Manifest version 2 (post pure-SDK pivot). Top-level fields:\n\n" +
528
+ "- `manifestVersion: 2` bumped from v1 when the spec / integration sub-object dropped.\n" +
529
+ "- `platform: \"android\" | \"ios\"`\n" +
530
+ "- `glasses: \"meta_rayban\"` — only one vendor for MVP.\n" +
531
+ "- `appPackage: string` — Android `applicationId` / iOS bundle identifier. The dashboard groups sims by this value.\n" +
532
+ "- `extentos: { libraryVersion, installedAt, lastUpdatedAt }` — install metadata.\n" +
533
+ "- `generatedFiles: [{ path, template, hash, generatedAt, generatedByTool }]` — the bootstrap (and any future tool-managed files). Detected by `inspectIntegration` / `validateIntegration` for drift via the recorded SHA-256.\n" +
534
+ "- `gradle: { coordinate, buildFile, additionalArtifacts, minimumAgpVersion, minimumGradleVersion }` — Android-only. Mutually exclusive with `spm`.\n" +
535
+ "- `spm: { package, packageURL, version, projectFile, additionalProducts, minimumXcodeVersion }` — iOS-only. Mutually exclusive with `gradle`.\n" +
536
+ "- `permissions: string[]` — declared platform permissions. Android: `android.permission.*`. iOS: Info.plist key names. Should equal `getPermissions(capabilities, platform)` modulo any extras the host app needs for non-Extentos reasons.\n" +
537
+ "- `capabilities: string[]` — the SDK feature names the customer's handler code uses (e.g. `\"transcription_incremental\"`, `\"capture_photo\"`, `\"speak\"`). Drives `getPermissions`, `getProductionChecklist`, and `getCredentialGuide`. Update this when you add a new primitive to your handler.\n" +
538
+ "- `handlerNames: string[]` — optional informational list of the customer's handler class names (e.g. `[\"VisionHandler\", \"CoachHandler\"]`). Used by `getProductionChecklist` to label per-handler ship-readiness steps. Safe to leave empty.\n\n" +
539
+ "── Retired fields (v1, do not write) ──\n\n" +
540
+ "- `extentos.specVersion` — DSL version, retired.\n" +
541
+ "- `spec: { path, hash }` — pointed at the DSL spec file, retired.\n" +
542
+ "- `integration: { blocks, streams, triggers, handlerNames, togglesOverridden }` — DSL primitive references, retired (replaced by top-level `capabilities` + `handlerNames`).\n\n" +
543
+ "The manifest is tool-managed by `generateConnectionModule`. Hand-edits to `permissions`, `capabilities`, or `handlerNames` are expected during normal development (you add a feature to your handler → add the capability name here so getPermissions output stays accurate). Hand-edits to `generatedFiles`, `gradle`, or `spm` will trigger drift warnings in `validateIntegration`.",
544
+ keywords: ["manifest", "json", "extentos.manifest", "generatedfiles", "gradle", "spm", "capabilities", "handlerNames"],
545
+ relatedTools: ["inspectIntegration", "validateIntegration", "generateConnectionModule"],
1018
546
  },
1019
547
  {
1020
548
  topic: "multi_platform_projects",
@@ -1075,50 +603,23 @@ export const DOC_INDEX = [
1075
603
  {
1076
604
  topic: "file_actions",
1077
605
  title: "File Actions in Tool Responses",
1078
- content: "**generateConnectionModule, initSpec, and generateConsumer return a `files[]` array.** Each entry has `{ path, action, content, ... }`. The `action` value tells the agent how to apply the file — five distinct semantics. Apply each correctly or developer code is lost.\n\n" +
606
+ content: "`generateConnectionModule` returns a `files[]` array. Each entry has `{ path, action, content, ... }`. The `action` value tells the agent how to apply the file — two distinct semantics post-pivot. Apply each correctly or developer code is lost.\n\n" +
1079
607
  "─── action: \"create\" ───\n" +
1080
- "Fresh write. The file is fully tool-owned — only `EXTENTOS-GENERATED` markers, no `USER-CODE` blocks. If the file already exists and was previously generated by Extentos, it's safe to overwrite. If it exists and is NOT marked as Extentos-generated (no `EXTENTOS-GENERATED` header), do not overwrite — that's developer-authored code.\n" +
1081
- "Examples: `ExtentosBootstrap.kt`, `ExtentosBootstrap.swift`.\n" +
1082
- "Apply: write `content` to `path`.\n\n" +
1083
- "─── action: \"write\" ───\n" +
1084
- "Server has already done all merging. Write `content` to `path` verbatim. Used when the agent passed `projectPath` to initSpec / generateConsumer — the action upgrades unconditionally (whether the target file existed or not). When the file existed, the server performed the USER-CODE merge on the agent's behalf; when it didn't, there was nothing to merge but the agent still writes verbatim (a fresh-integration create). The response always carries `mergeReport: { preserved: string[], orphaned: string[], extra: string[], warnings: [...] }` — empty arrays on the fresh-create path, populated on the merge path.\n" +
1085
- " - `preserved`: USER-CODE blocks whose existing body was spliced into the new template (the dev's code survived in place).\n" +
1086
- " - `orphaned`: blocks whose corresponding HANDLER / STREAM was removed from the spec — body kept at end-of-file with an `// ORPHANED:<name>` annotation. Surface these to the dev so they can review and either delete or re-add the spec entry.\n" +
1087
- " - `extra`: blocks present in the existing file that the template didn't generate (developer-created shared utility blocks, e.g., a `// USER-CODE-START:helpers` block holding HTTP clients). Preserved silently at end-of-file with no annotation. Listed for transparency; agents typically don't need to surface these.\n" +
1088
- " - `warnings`: soft warnings (corrupt input, duplicate block names). Surface to the dev.\n" +
1089
- "Apply: write `content` to `path`. Done.\n\n" +
1090
- "─── action: \"create_or_merge\" ───\n" +
1091
- "**Legacy / no-projectPath path.** The file contains `USER-CODE-START:<name>` / `USER-CODE-END:<name>` blocks where the developer fills in handler bodies, stream-processing code, imports, etc. On regeneration, those blocks MUST be preserved.\n" +
1092
- "Examples: `ExtentosCallbacks.kt`, `ExtentosCallbacks.swift`, `ExtentosStreams.kt`, `ExtentosStreams.swift`.\n" +
1093
- "**Prefer the server-side merge path**: pass `projectPath` to initSpec / generateConsumer and the server returns action: `write` with content already merged — no agent-side merge logic needed.\n" +
1094
- "Manual-merge apply (when projectPath is unavailable):\n" +
1095
- " 1. If the file does not exist: write `content` to `path`. Done.\n" +
1096
- " 2. If the file exists: for every `USER-CODE-START:<name>` … `USER-CODE-END:<name>` block in the existing file, replace the matching block in `content` with the existing block's body verbatim (markers stay; only the body inside is preserved). Then write the merged result to `path`.\n" +
1097
- " 3. If a USER-CODE block in the existing file has a `<name>` that is not present in `content` (a removed handler), insert it back at end-of-file with a `// ORPHANED:<name>` comment line above its preserved START/END markers — never silently delete developer code.\n" +
1098
- "Skipping the merge step (always overwriting) silently destroys handler implementations on every regeneration. Strongly prefer the projectPath path so the server handles this correctly.\n\n" +
1099
- "─── action: \"replace_region\" ───\n" +
1100
- "Marker-bounded splice into an existing tool-owned file. Carries an extra `marker` field (e.g., `marker: \"dispatch\"`).\n" +
1101
- "Apply:\n" +
1102
- " 1. Read the existing file at `path`.\n" +
1103
- " 2. Find `// EXTENTOS-GENERATED-START:<marker>` and `// EXTENTOS-GENERATED-END:<marker>` (the `//` works for both Kotlin and Swift).\n" +
1104
- " 3. Replace the body BETWEEN those markers (markers themselves stay) with `content`.\n" +
1105
- " 4. Write the result back to `path`.\n" +
1106
- "Example: `generateConsumer(kind: \"callback\")` returns a `replace_region` patch with `marker: \"dispatch\"` to wire the callback handler registration into the bootstrap's dispatch region.\n" +
1107
- "Apply this programmatically — the content is a code fragment, not English instructions. Never copy `content` to disk verbatim; it must be spliced into the marker block.\n\n" +
608
+ "Fresh write. The file is fully tool-owned — `EXTENTOS-GENERATED` marker in the header. If the file already exists and was previously generated by Extentos, it's safe to overwrite (regeneration restores the canonical content; any hand-edits between markers are lost by design). If it exists and is NOT marked as Extentos-generated (no `EXTENTOS-GENERATED` header), do not overwrite — that's developer-authored code.\n" +
609
+ "Examples: `ExtentosBootstrap.kt` (Android), `ExtentosBootstrap.swift` (iOS).\n" +
610
+ "Apply: write `content` to `path` verbatim.\n\n" +
1108
611
  "─── action: \"manual_patch\" ───\n" +
1109
- "English-language instructions for a file the agent does not own (the developer's own app entry point — e.g., iOS `App.swift`). The `content` field is prose with embedded code snippets, not a machine-applyable file body.\n" +
1110
- "Apply: read the instructions, find the location they describe in the developer's file, hand-apply the change. Surface this to the developer when ambiguous; do not guess.\n" +
612
+ "English-language instructions for a file the agent does not own (typically the developer's own app entry point — e.g., iOS `App.swift`). The `content` field is prose with embedded code snippets, not a machine-applyable file body.\n" +
613
+ "Apply: read the instructions, find the location they describe in the developer's file, hand-apply the change. Surface this to the developer when the location is ambiguous; do not guess.\n" +
1111
614
  "Example: `generateConnectionModule` (iOS) returns a `manual_patch` for `App.swift` adding `.onOpenURL` and `.onChange(of: scenePhase)` modifiers. The agent reads the instructions and applies them to the existing app file.\n\n" +
1112
- "── Why five actions instead of one ──\n" +
1113
- "Each value encodes a different file-ownership and merge contract:\n" +
615
+ "── Retired actions (do not appear post-pivot) ──\n\n" +
616
+ "Pre-pivot the tools also returned `write` (server-side-merged USER-CODE blocks), `create_or_merge` (agent-applied USER-CODE merge), and `replace_region` (marker-bounded splice into an existing tool-owned file). All three were part of the retired `generateConsumer` / `initSpec` / `updateSpec` toolchain. Post-pivot the only generated artifact is the bootstrap, which the customer never edits — so the merge semantics collapse to plain `create` (regenerate by overwriting) and `manual_patch` (App.swift / Application class wiring the customer owns).\n\n" +
617
+ "── Why two actions instead of one ──\n\n" +
1114
618
  "- `create` says 'we own this whole file; safe to overwrite.'\n" +
1115
- "- `write` says 'we own the structure, the dev owns USER-CODE blocks, AND we already did the merge for you — write verbatim.'\n" +
1116
- "- `create_or_merge` says 'we own the structure but the dev owns the USER-CODE blocks; YOU (agent) must preserve those — pass projectPath next time and the server does it for you.'\n" +
1117
- "- `replace_region` says 'we own one block of an existing file; splice into that block only.'\n" +
1118
619
  "- `manual_patch` says 'we don't own this file; here are instructions, you decide.'\n" +
1119
- "Conflating them silently destroys developer code (overwriting `create_or_merge` as if it were `create`) or breaks regeneration (treating `replace_region` as a full file write blows away the surrounding code).",
1120
- keywords: ["files", "action", "create", "write", "create_or_merge", "replace_region", "manual_patch", "user-code", "merge", "regeneration", "marker", "projectPath", "mergeReport", "orphaned", "extra", "helpers"],
1121
- relatedTools: ["generateConnectionModule", "initSpec", "generateConsumer"],
620
+ "Conflating them either silently overwrites developer code (treating `manual_patch` as `create`) or fails to wire the App entry point at all (treating `create` as `manual_patch`).",
621
+ keywords: ["files", "action", "create", "manual_patch", "regeneration", "extentos-generated", "bootstrap", "app.swift"],
622
+ relatedTools: ["generateConnectionModule"],
1122
623
  },
1123
624
  {
1124
625
  topic: "constraints_and_limitations",
@@ -1130,8 +631,28 @@ export const DOC_INDEX = [
1130
631
  {
1131
632
  topic: "connection_ui",
1132
633
  title: "Connection UI",
1133
- content: "3-layer customization: (1) drop-in ExtentosConnectionPage with zero config; (2) appearance tokens via ExtentosTheme for colors/typography; (3) @ExtentosEscapeHatch for full replacement (forfeits auto-updates). Placement patterns: dedicated route, settings subscreen, bottom-nav tab, modal sheet, headless. Code snippets for each placement: search topic 'connection_ui_placement'. Auto-grows with spec — new triggers/toggles appear automatically; **voice_command stop_conditions on capture_video / record_audio blocks ALSO auto-surface as nested indented sub-rows under whichever trigger calls that block** (resolved at UI-state-build time by walking trigger.actions → block_call → block.params.stop_conditions). Devs do nothing — drop a stop_condition in the spec and end-users see 'STOP — <phrase>' beneath the trigger that starts the capture. Opt out per-app via ConnectionPageConfig.showStopConditions = false (defaults to true). FAQ: Is it customizable? Yes, 3 layers. Can I hide sections? Yes via ConnectionPageConfig. Can I hide the nested stop sub-rows? Yes — ConnectionPageConfig.showStopConditions = false. Can I reorder? No — structural lock (status always first). Can I bring my own design? Yes via escape hatch, but no auto-updates. Does it auto-update with library? Yes for drop-in / themed; no for escape-hatch.",
1134
- keywords: ["ui", "connection page", "theme", "escape hatch", "appearance", "auto-grow", "placement", "connectionpageconfig", "stop_conditions", "stopconditiondescriptor", "showstopconditions", "nested stops"],
634
+ content: "**ExtentosConnectionPage** is the drop-in UI that handles pairing, status, toggles, and developer-facing diagnostics for the glasses connection. Three customization layers:\n\n" +
635
+ "1. **Drop-in** — `ExtentosConnectionPage(glasses)` (Kotlin) / `ExtentosConnectionPage(glasses: glasses)` (Swift). Zero config; works out of the box. Auto-updates with new library versions.\n" +
636
+ "2. **Theme** — pass an `ExtentosTheme` to override colors / typography while keeping the structural layout. Auto-updates with new library versions.\n" +
637
+ "3. **`@ExtentosEscapeHatch`** — full replacement with your own UI. Build against `glasses.connection.state`, `glasses.toggles.state`, and `glasses.connection.simulatorHint` directly. Forfeits structural auto-updates: when the library adds a new connection sub-state or toggle, your UI won't show it until you re-implement.\n\n" +
638
+ "── Placement patterns ──\n\n" +
639
+ "Five placements covered by `generateConnectionModule`'s placement gate (`searchDocs(topic: 'connection_ui_placement')` for full code snippets):\n" +
640
+ "- **dedicated_route** — single-purpose route with a back-arrow scaffold.\n" +
641
+ "- **settings_subscreen** — pushed onto the back stack from a Settings list row.\n" +
642
+ "- **bottom_tab** — co-equal nav destination alongside the app's main screens.\n" +
643
+ "- **modal_sheet** — bottom sheet from a Settings row (Android) or `.sheet(...)` (iOS).\n" +
644
+ "- **headless** — no UI; bootstrap connects in the background and the app surfaces status from `glasses.connection.state` if it cares.\n\n" +
645
+ "── Auto-grow ──\n\n" +
646
+ "ExtentosConnectionPage renders the library's connection-state machine plus the 8 runtime toggles. When the library ships a new toggle or sub-state, drop-in and themed instances pick it up automatically on library bump — that's the auto-update contract for the structural layout. Escape-hatch instances do not; you re-render against `glasses.toggles.state` manually.\n\n" +
647
+ "── Per-app config ──\n\n" +
648
+ "`ConnectionPageConfig` (passed to `ExtentosConnectionPage(config:)`) toggles individual sections — e.g. `showDevDiagnostics = false` to suppress the developer-facing telemetry panel in a release build. Structural ordering is locked (status always first, then toggles, then diagnostics).\n\n" +
649
+ "── FAQ ──\n\n" +
650
+ "- *Is it customizable?* Yes, 3 layers.\n" +
651
+ "- *Can I hide sections?* Yes via ConnectionPageConfig.\n" +
652
+ "- *Can I reorder sections?* No — structural lock (status always first).\n" +
653
+ "- *Can I bring my own design?* Yes via escape hatch, but no auto-updates.\n" +
654
+ "- *Does it auto-update with library bumps?* Yes for drop-in / themed; no for escape-hatch.",
655
+ keywords: ["ui", "connection page", "theme", "escape hatch", "appearance", "auto-grow", "placement", "connectionpageconfig", "headless", "dedicated_route", "settings_subscreen", "bottom_tab", "modal_sheet"],
1135
656
  relatedTools: ["generateConnectionModule"],
1136
657
  },
1137
658
  {