@agenticmail/core 0.9.15 → 0.9.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -220,6 +220,15 @@ interface AgenticMailConfig {
220
220
  * system event (visible in the web UI) but no email is sent.
221
221
  */
222
222
  operatorEmail?: string;
223
+ /**
224
+ * OpenAI API key — used only by the realtime voice bridge to open an
225
+ * OpenAI Realtime (`gpt-realtime`) session for live phone calls. When
226
+ * unset, phone missions still place and track calls (call-control),
227
+ * but a 46elks realtime-media WebSocket cannot be bridged to a
228
+ * conversational model. Read from the `OPENAI_API_KEY` env var or
229
+ * `config.json`. Optional — no other feature depends on it.
230
+ */
231
+ openaiApiKey?: string;
223
232
  masterKey: string;
224
233
  dataDir: string;
225
234
  }
@@ -1692,6 +1701,396 @@ declare class SmsPoller {
1692
1701
  private pollOnce;
1693
1702
  }
1694
1703
 
1704
+ /**
1705
+ * Telegram Bot API client — dependency-free (global `fetch`, Node 22+).
1706
+ *
1707
+ * Ported and MERGED from two already-tuned internal sources (licensing
1708
+ * confirmed by Ope, 2026-05-19 — plan §13.5):
1709
+ * - enterprise `agent-tools/tools/messaging/telegram.ts`
1710
+ * (`tgApi`, `stripMarkdown`, webhook management)
1711
+ * - agent-harness `fola-lib/telegram-api.mjs`
1712
+ * (auto-splitting `sendMessage`, the long-poll timeout discipline)
1713
+ *
1714
+ * Host-specific pieces are intentionally dropped to fit the single-tenant
1715
+ * open-source product: the local Bot API server auto-detection
1716
+ * (`http://localhost:8081`) and the filesystem media-download paths are
1717
+ * Fola-host infrastructure, not channel logic.
1718
+ *
1719
+ * Secrets: the bot token rides in the request URL path. Every error this
1720
+ * module surfaces is scrubbed with {@link redactBotToken} so a token can
1721
+ * never reach a log line or an API response — matching the SMS/phone
1722
+ * credential-handling bar.
1723
+ */
1724
+ /** Official Telegram Bot API base. */
1725
+ declare const TELEGRAM_API_BASE = "https://api.telegram.org";
1726
+ /** Telegram's hard per-message ceiling is 4096 characters. */
1727
+ declare const TELEGRAM_MESSAGE_LIMIT = 4096;
1728
+ /** We split below the hard limit, leaving headroom for safety. */
1729
+ declare const TELEGRAM_CHUNK_SIZE = 4000;
1730
+ /** A failed Telegram Bot API call. `description` is the provider message. */
1731
+ declare class TelegramApiError extends Error {
1732
+ readonly isTelegramApiError = true;
1733
+ readonly description: string;
1734
+ readonly errorCode?: number;
1735
+ constructor(method: string, description: string, errorCode?: number);
1736
+ }
1737
+ /**
1738
+ * Scrub bot tokens from any string before it can be logged or returned.
1739
+ *
1740
+ * A token looks like `<digits>:<35 url-safe chars>`. We redact both an
1741
+ * explicitly-known token (exact match) and anything matching the generic
1742
+ * token shape — so a token leaks neither when the caller knows it nor
1743
+ * when it is buried in, say, a `fetch` failure message carrying the URL.
1744
+ */
1745
+ declare function redactBotToken(text: string, token?: string): string;
1746
+ interface TelegramApiOptions {
1747
+ /**
1748
+ * Long-poll requests (`getUpdates` with a non-zero `timeout`) need an
1749
+ * HTTP timeout longer than the server-side poll window, or the socket
1750
+ * is torn down before Telegram replies.
1751
+ */
1752
+ longPoll?: boolean;
1753
+ }
1754
+ /**
1755
+ * POST a Telegram Bot API method with a JSON body and return `result`.
1756
+ * Throws {@link TelegramApiError} (token-scrubbed) on any failure.
1757
+ */
1758
+ declare function callTelegramApi<T = unknown>(token: string, method: string, body?: Record<string, unknown>, options?: TelegramApiOptions): Promise<T>;
1759
+ /**
1760
+ * Strip Markdown so agent replies arrive as clean plain text. Telegram's
1761
+ * Markdown parse modes are bypassed entirely (no `parse_mode`) to avoid
1762
+ * formatting collisions on arbitrary agent output.
1763
+ */
1764
+ declare function stripTelegramMarkdown(text: string): string;
1765
+ /**
1766
+ * Split text into <= `maxLen` chunks, cutting on a newline boundary when
1767
+ * one is reasonably close so messages do not break mid-word.
1768
+ */
1769
+ declare function splitTelegramMessage(text: string, maxLen?: number): string[];
1770
+ interface SendTelegramMessageOptions {
1771
+ /** Reply to a specific message id in the chat. */
1772
+ replyToMessageId?: number;
1773
+ /** Deliver silently (no push notification). */
1774
+ disableNotification?: boolean;
1775
+ }
1776
+ interface SendTelegramMessageResult {
1777
+ /** message_id of each chunk Telegram accepted, in order. */
1778
+ messageIds: number[];
1779
+ /** Number of chunks the text was split into. */
1780
+ chunks: number;
1781
+ }
1782
+ /**
1783
+ * Send a text message, auto-splitting anything over the per-message
1784
+ * ceiling. The reply target (if any) is attached only to the first
1785
+ * chunk so a long reply still threads correctly.
1786
+ */
1787
+ declare function sendTelegramMessage(token: string, chatId: string | number, text: string, options?: SendTelegramMessageOptions): Promise<SendTelegramMessageResult>;
1788
+ interface TelegramBotInfo {
1789
+ id: number;
1790
+ is_bot: boolean;
1791
+ username?: string;
1792
+ first_name?: string;
1793
+ }
1794
+ /** `getMe` — used to validate a token and capture the bot identity. */
1795
+ declare function getTelegramMe(token: string): Promise<TelegramBotInfo>;
1796
+ /** `getChat` — chat metadata (title, type, member count, ...). */
1797
+ declare function getTelegramChat(token: string, chatId: string | number): Promise<Record<string, unknown>>;
1798
+ interface GetUpdatesOptions {
1799
+ /** Max updates per call (1-100). */
1800
+ limit?: number;
1801
+ /** Long-poll window in seconds (0 = short poll). */
1802
+ timeoutSec?: number;
1803
+ }
1804
+ /** `getUpdates` long-poll — the poll-mode transport. */
1805
+ declare function getTelegramUpdates(token: string, offset: number, options?: GetUpdatesOptions): Promise<Array<Record<string, unknown>>>;
1806
+ interface SetWebhookOptions {
1807
+ /**
1808
+ * Shared secret echoed by Telegram in the
1809
+ * `X-Telegram-Bot-Api-Secret-Token` header on every webhook delivery.
1810
+ */
1811
+ secretToken?: string;
1812
+ /** Drop updates that queued up while no webhook was set. */
1813
+ dropPendingUpdates?: boolean;
1814
+ }
1815
+ /** `setWebhook` — register the inbound webhook URL (webhook-mode transport). */
1816
+ declare function setTelegramWebhook(token: string, url: string, options?: SetWebhookOptions): Promise<boolean>;
1817
+ /** `deleteWebhook` — switch back to poll mode. */
1818
+ declare function deleteTelegramWebhook(token: string): Promise<boolean>;
1819
+ /** `getWebhookInfo` — current webhook status. */
1820
+ declare function getTelegramWebhookInfo(token: string): Promise<Record<string, unknown>>;
1821
+
1822
+ /**
1823
+ * Telegram update parsing — pure, dependency-free.
1824
+ *
1825
+ * Ported from the inbound-message handling in the agent-harness Fola
1826
+ * bridge (`fola-telegram-bridge.mjs` — the `formatPrompt` header fields
1827
+ * and the `STOP_WORDS` abort set), stripped of every Fola-host specific
1828
+ * (session routing, the fola-claude prompt envelope, media downloads).
1829
+ */
1830
+ type TelegramChatType = 'private' | 'group' | 'supergroup' | 'channel' | 'unknown';
1831
+ /** A normalized inbound Telegram text message. */
1832
+ interface ParsedTelegramMessage {
1833
+ /** Telegram `update_id` — drives the poll offset. */
1834
+ updateId: number;
1835
+ /** `message_id` within the chat. */
1836
+ messageId: number;
1837
+ /** Chat id as a string (Telegram ids are 64-bit; strings avoid loss). */
1838
+ chatId: string;
1839
+ chatType: TelegramChatType;
1840
+ chatTitle?: string;
1841
+ /** Sender id as a string; falls back to the chat id for channel posts. */
1842
+ fromId: string;
1843
+ fromName: string;
1844
+ fromUsername?: string;
1845
+ /** Message text (or media caption). Always non-empty for a parsed result. */
1846
+ text: string;
1847
+ /** `message_id` of the message this one replies to, if any. */
1848
+ replyToMessageId?: number;
1849
+ /** Text of the replied-to message, when Telegram included it. */
1850
+ replyToText?: string;
1851
+ /** ISO-8601 timestamp derived from the Telegram `date` epoch. */
1852
+ date: string;
1853
+ }
1854
+ /**
1855
+ * Normalize a raw Telegram update into a {@link ParsedTelegramMessage},
1856
+ * or `null` when it carries no usable text (callback queries, service
1857
+ * messages, edits with no text, non-text media without a caption).
1858
+ * Handles both `message` and `channel_post` envelopes.
1859
+ */
1860
+ declare function parseTelegramUpdate(update: unknown): ParsedTelegramMessage | null;
1861
+ /**
1862
+ * Words that, sent alone (case-insensitive, optional trailing
1863
+ * punctuation), signal "abort whatever you are doing for me". Kept
1864
+ * deliberately narrow — anything longer or ambiguous is a normal
1865
+ * message. Ported verbatim from the Fola bridge.
1866
+ */
1867
+ declare const TELEGRAM_STOP_WORDS: ReadonlySet<string>;
1868
+ /** True when `text` is a bare stop command. */
1869
+ declare function isTelegramStopCommand(text: string): boolean;
1870
+ /**
1871
+ * Compute the next `getUpdates` offset from a batch: one past the
1872
+ * highest `update_id` seen. Advancing the offset is what acknowledges
1873
+ * updates to Telegram, so this must run on the raw batch even if
1874
+ * individual updates fail to parse.
1875
+ */
1876
+ declare function nextTelegramOffset(currentOffset: number, updates: Array<{
1877
+ update_id?: unknown;
1878
+ }>): number;
1879
+
1880
+ /**
1881
+ * Telegram Manager — per-agent Telegram bot configuration + message log.
1882
+ *
1883
+ * Models the existing {@link import('../sms/manager.js').SmsManager} — the
1884
+ * closest existing channel — so the hardening bar matches:
1885
+ * - Config lives in agent metadata under the `telegram` key.
1886
+ * - Credential fields (`botToken`, `webhookSecret`) are encrypted at
1887
+ * rest with the same AES-256-GCM scheme (`encryptSecret`) and
1888
+ * redacted on every read that leaves the process.
1889
+ * - Inbound/outbound messages are stored in a `telegram_messages` table.
1890
+ *
1891
+ * Single-tenant: there is no org / multi-tenant scoping and nothing is
1892
+ * hardcoded — every bot token, chat id and operator identity is
1893
+ * per-agent config supplied by the user.
1894
+ */
1895
+
1896
+ /** Transport mode: long-poll `getUpdates`, or a registered webhook. */
1897
+ type TelegramMode = 'poll' | 'webhook';
1898
+ interface TelegramConfig {
1899
+ /** Whether the Telegram channel is active for this agent. */
1900
+ enabled: boolean;
1901
+ /** Bot API token from @BotFather. SECRET — encrypted at rest, redacted on read. */
1902
+ botToken: string;
1903
+ /** Bot @username (non-secret, for display). Captured from `getMe` at setup. */
1904
+ botUsername?: string;
1905
+ /** Numeric bot id (non-secret). Captured from `getMe` at setup. */
1906
+ botId?: number;
1907
+ /**
1908
+ * Chat ids permitted to message the agent. EMPTY = nobody (fail-closed,
1909
+ * same posture as the Fola bridge). The operator chat is always allowed.
1910
+ */
1911
+ allowedChatIds: string[];
1912
+ /** Chat that receives `ask_operator` notifications + can approve (plan §13.4). */
1913
+ operatorChatId?: string;
1914
+ /** Inbound transport. */
1915
+ mode: TelegramMode;
1916
+ /** Public webhook URL (webhook mode only). */
1917
+ webhookUrl?: string;
1918
+ /**
1919
+ * Shared secret echoed by Telegram in the
1920
+ * `X-Telegram-Bot-Api-Secret-Token` header. SECRET — encrypted at
1921
+ * rest, redacted on read. Doubles as the webhook routing key.
1922
+ */
1923
+ webhookSecret?: string;
1924
+ /** Persisted `getUpdates` offset (poll mode). */
1925
+ pollOffset?: number;
1926
+ /** When the channel was configured. */
1927
+ configuredAt: string;
1928
+ }
1929
+ interface TelegramMessage {
1930
+ id: string;
1931
+ agentId: string;
1932
+ direction: 'inbound' | 'outbound';
1933
+ chatId: string;
1934
+ /** Telegram `message_id` (may be absent for a failed outbound attempt). */
1935
+ telegramMessageId?: number;
1936
+ /** Sender id (inbound only). */
1937
+ fromId?: string;
1938
+ text: string;
1939
+ status: 'received' | 'sent' | 'failed' | 'pending';
1940
+ createdAt: string;
1941
+ metadata?: Record<string, unknown>;
1942
+ }
1943
+ /** Telegram's `secret_token` charset, and our minimum entropy floor. */
1944
+ declare const TELEGRAM_WEBHOOK_SECRET_RE: RegExp;
1945
+ declare const TELEGRAM_MIN_WEBHOOK_SECRET_LENGTH = 16;
1946
+ /**
1947
+ * Redact the credential fields of a Telegram config for any value that
1948
+ * leaves the process (API responses, logs). The bot token is collapsed
1949
+ * to `***` entirely — never partially shown — matching the SMS bar and
1950
+ * the plan §13.5 rule "no token ever in a log line or API response".
1951
+ */
1952
+ declare function redactTelegramConfig(config: TelegramConfig): TelegramConfig;
1953
+ /**
1954
+ * Allow-list gate for INBOUND messages. A chat may talk to the agent
1955
+ * only if it is on `allowedChatIds` OR it is the configured operator
1956
+ * chat. An empty allow-list means nobody — fail closed.
1957
+ */
1958
+ declare function isTelegramChatAllowed(config: TelegramConfig, chatId: string): boolean;
1959
+ declare class TelegramManager {
1960
+ private db;
1961
+ private encryptionKey?;
1962
+ private initialized;
1963
+ /**
1964
+ * Optional master key used to encrypt Telegram credentials at rest
1965
+ * (the same AES-256-GCM scheme SMS/phone use). When absent (tests, or
1966
+ * a deployment with no master key) configs are stored as-is and reads
1967
+ * tolerate plaintext — upgrades and downgrades both stay safe.
1968
+ */
1969
+ constructor(db: Database, encryptionKey?: string | undefined);
1970
+ private ensureTable;
1971
+ /** Encrypt the credential fields of a config before persisting. */
1972
+ private encryptConfig;
1973
+ /** Decrypt the credential fields of a config after loading. */
1974
+ private decryptConfig;
1975
+ /** Normalize a stored/loaded config object, defaulting missing fields. */
1976
+ private normalizeConfig;
1977
+ /** Get the Telegram config from agent metadata (credentials decrypted). */
1978
+ getConfig(agentId: string): TelegramConfig | null;
1979
+ /** Save the Telegram config to agent metadata (credentials encrypted). */
1980
+ saveConfig(agentId: string, config: TelegramConfig): void;
1981
+ /** Remove the Telegram config from agent metadata. */
1982
+ removeConfig(agentId: string): void;
1983
+ /** Persist a new poll offset without touching the rest of the config. */
1984
+ updatePollOffset(agentId: string, offset: number): void;
1985
+ /**
1986
+ * Resolve the agent that owns a webhook secret. Used to authenticate +
1987
+ * route an inbound Telegram webhook delivery: a webhook carries no bot
1988
+ * identity, so the `X-Telegram-Bot-Api-Secret-Token` header is the
1989
+ * routing key. The comparison is constant-time, and a non-match
1990
+ * returns `null` so the route can answer with a single uniform 403
1991
+ * (no enumeration oracle — same posture as the SMS webhook).
1992
+ */
1993
+ findAgentByWebhookSecret(secret: string): {
1994
+ agentId: string;
1995
+ config: TelegramConfig;
1996
+ } | null;
1997
+ /** True if an inbound message with this Telegram id is already stored. */
1998
+ inboundMessageExists(agentId: string, chatId: string, telegramMessageId: number): boolean;
1999
+ /** Record an inbound Telegram message. */
2000
+ recordInbound(agentId: string, input: {
2001
+ chatId: string;
2002
+ telegramMessageId: number;
2003
+ fromId?: string;
2004
+ text: string;
2005
+ createdAt?: string;
2006
+ }, metadata?: Record<string, unknown>): TelegramMessage;
2007
+ /** Record an outbound Telegram message attempt. */
2008
+ recordOutbound(agentId: string, input: {
2009
+ chatId: string;
2010
+ text: string;
2011
+ telegramMessageId?: number;
2012
+ status?: TelegramMessage['status'];
2013
+ }, metadata?: Record<string, unknown>): TelegramMessage;
2014
+ /** Update the status (+ optional metadata) of a stored message. */
2015
+ updateStatus(id: string, status: TelegramMessage['status'], metadata?: Record<string, unknown>): void;
2016
+ /** List stored Telegram messages for an agent, newest first. */
2017
+ listMessages(agentId: string, opts?: {
2018
+ direction?: 'inbound' | 'outbound';
2019
+ chatId?: string;
2020
+ limit?: number;
2021
+ offset?: number;
2022
+ }): TelegramMessage[];
2023
+ }
2024
+
2025
+ /**
2026
+ * Telegram ⇄ `ask_operator` bridge (plan §13.4 / §13.5) — pure helpers.
2027
+ *
2028
+ * The realtime voice agent's `ask_operator` tool records an *operator
2029
+ * query* on the phone mission; the API exposes it through the
2030
+ * operator-query endpoints. Email is the channel-agnostic default
2031
+ * notifier (plan §5). This module makes Telegram a first-class
2032
+ * notification + approval channel WITHOUT duplicating any of that
2033
+ * machinery — it only:
2034
+ *
2035
+ * 1. {@link formatOperatorQueryTelegramMessage} — renders the
2036
+ * notification text the operator receives.
2037
+ * 2. {@link parseTelegramOperatorReply} — parses the operator's
2038
+ * Telegram reply back into a `{ queryId, answer }`.
2039
+ *
2040
+ * The caller feeds the parsed result straight into the SAME
2041
+ * `PhoneManager.answerOperatorQuery` the inbound email-reply hook uses.
2042
+ */
2043
+ /**
2044
+ * Token embedded in a notification so the operator's reply can be
2045
+ * matched back to a query. Kept short — the operator may see it.
2046
+ */
2047
+ declare const TELEGRAM_OPERATOR_QUERY_TAG = "AMQ";
2048
+ interface OperatorQueryNotificationInput {
2049
+ queryId: string;
2050
+ question: string;
2051
+ callContext?: string;
2052
+ urgency?: string;
2053
+ missionId?: string;
2054
+ }
2055
+ /**
2056
+ * Render the Telegram message body for an `ask_operator` notification.
2057
+ * The trailing `[AMQ <id>]` token lets a reply be matched to the query
2058
+ * even when the operator does not use Telegram's native reply gesture.
2059
+ */
2060
+ declare function formatOperatorQueryTelegramMessage(input: OperatorQueryNotificationInput): string;
2061
+ /** A `kind` of `approve`/`deny` is a decision; `answer` is free-form. */
2062
+ type OperatorReplyKind = 'answer' | 'approve' | 'deny';
2063
+ interface ParsedOperatorReply {
2064
+ /**
2065
+ * Query id when the reply names one — explicitly (`/answer <id>`,
2066
+ * an inline `oq_…` token) or implicitly (a native Telegram reply
2067
+ * quoting a `[AMQ <id>]`-tagged notification). `undefined` when the
2068
+ * reply is a bare message and the caller must resolve the target.
2069
+ */
2070
+ queryId?: string;
2071
+ /** The answer text to record against the query. Always non-empty. */
2072
+ answer: string;
2073
+ kind: OperatorReplyKind;
2074
+ }
2075
+ /**
2076
+ * Parse an operator's Telegram message into a {@link ParsedOperatorReply},
2077
+ * or `null` when it carries no usable answer.
2078
+ *
2079
+ * Recognized forms (most explicit first):
2080
+ * - `/answer <queryId> <text>`
2081
+ * - `/approve [<queryId>] [note]` · `/deny [<queryId>] [note]`
2082
+ * - a plain message — `queryId` is taken from a quoted tagged
2083
+ * notification (`replyToText`) or an inline `oq_…` / `[AMQ …]` token,
2084
+ * and is otherwise left `undefined` for the caller to resolve.
2085
+ *
2086
+ * The `@botname` suffix Telegram appends to commands in groups
2087
+ * (`/approve@mybot`) is tolerated.
2088
+ */
2089
+ declare function parseTelegramOperatorReply(input: {
2090
+ text: string;
2091
+ replyToText?: string;
2092
+ }): ParsedOperatorReply | null;
2093
+
1695
2094
  declare const ELKS_REALTIME_AUDIO_FORMATS: readonly ["ulaw", "pcm_16000", "pcm_24000", "wav"];
1696
2095
  type ElksRealtimeAudioFormat = typeof ELKS_REALTIME_AUDIO_FORMATS[number];
1697
2096
  interface ElksRealtimeHelloMessage {
@@ -1737,6 +2136,1090 @@ declare function buildElksHandshakeMessages(options?: {
1737
2136
  sendFormat?: ElksRealtimeAudioFormat;
1738
2137
  }): ElksRealtimeOutboundMessage[];
1739
2138
 
2139
+ /**
2140
+ * Twilio Media Streams wire protocol.
2141
+ *
2142
+ * The realtime-voice counterpart of `realtime.ts` (the 46elks protocol).
2143
+ * Twilio connects a call's audio to a WebSocket via a TwiML
2144
+ * `<Connect><Stream url="wss://…"/></Connect>`; over that socket Twilio
2145
+ * speaks JSON messages keyed by an `event` field.
2146
+ *
2147
+ * Inbound (Twilio → us):
2148
+ * - `{ event: "connected", protocol, version }`
2149
+ * - `{ event: "start", start: { streamSid, callSid, accountSid,
2150
+ * mediaFormat, tracks, customParameters } }`
2151
+ * - `{ event: "media", media: { payload: "<base64 µ-law>", track,
2152
+ * chunk, timestamp } }`
2153
+ * - `{ event: "stop", stop: { accountSid, callSid } }`
2154
+ * - `{ event: "mark", mark: { name } }`
2155
+ *
2156
+ * Outbound (us → Twilio) — every outbound frame must echo the
2157
+ * `streamSid` Twilio handed us in the `start` event:
2158
+ * - `{ event: "media", streamSid, media: { payload: "<base64 µ-law>" } }`
2159
+ * - `{ event: "mark", streamSid, mark: { name } }`
2160
+ * - `{ event: "clear", streamSid }` — flush buffered playback
2161
+ * (barge-in / interrupt; the analogue of 46elks `interrupt`).
2162
+ *
2163
+ * The audio is G.711 µ-law, 8 kHz, mono, base64 — unlike 46elks which
2164
+ * uses linear PCM. OpenAI's GA Realtime API speaks µ-law natively
2165
+ * (`{ type: "audio/pcmu" }` @ 8 kHz), so on a Twilio call the bridge
2166
+ * does NO transcoding end to end.
2167
+ *
2168
+ * > The Media Streams message shapes above follow Twilio's public
2169
+ * > `<Stream>` documentation. Verify against current docs before the
2170
+ * > live smoke-test (same discipline as the 46elks / OpenAI wire notes).
2171
+ */
2172
+ /** µ-law payload sample rate Twilio Media Streams uses, in Hz. */
2173
+ declare const TWILIO_MEDIA_SAMPLE_RATE = 8000;
2174
+ /** The `connected` handshake frame — first frame Twilio sends. */
2175
+ interface TwilioConnectedMessage {
2176
+ event: 'connected';
2177
+ protocol?: string;
2178
+ version?: string;
2179
+ [key: string]: unknown;
2180
+ }
2181
+ /**
2182
+ * The `start` frame — carries the `streamSid` every outbound frame must
2183
+ * echo, plus the `callSid` we resolve the phone mission from. Twilio
2184
+ * sends exactly one `start` per stream, right after `connected`.
2185
+ */
2186
+ interface TwilioStartMessage {
2187
+ event: 'start';
2188
+ /** Stream identifier — required on every outbound media/mark/clear. */
2189
+ streamSid: string;
2190
+ /** The call SID — matches the `sid` from the Calls.json response. */
2191
+ callSid: string;
2192
+ accountSid?: string;
2193
+ mediaFormat?: {
2194
+ encoding?: string;
2195
+ sampleRate?: number;
2196
+ channels?: number;
2197
+ };
2198
+ tracks?: string[];
2199
+ /** `<Parameter>` values declared inside the TwiML `<Stream>`. */
2200
+ customParameters?: Record<string, string>;
2201
+ [key: string]: unknown;
2202
+ }
2203
+ /** A `media` frame — one small base64 µ-law audio chunk. */
2204
+ interface TwilioMediaMessage {
2205
+ event: 'media';
2206
+ /** Base64-encoded G.711 µ-law, 8 kHz mono. */
2207
+ payload: string;
2208
+ /** Inbound only: which leg the audio is from (`inbound`/`outbound`). */
2209
+ track?: string;
2210
+ }
2211
+ /** The `stop` frame — Twilio has stopped streaming this call's audio. */
2212
+ interface TwilioStopMessage {
2213
+ event: 'stop';
2214
+ callSid?: string;
2215
+ [key: string]: unknown;
2216
+ }
2217
+ /** A `mark` frame — a playback checkpoint echoed back when reached. */
2218
+ interface TwilioMarkMessage {
2219
+ event: 'mark';
2220
+ name: string;
2221
+ }
2222
+ type TwilioRealtimeInboundMessage = TwilioConnectedMessage | TwilioStartMessage | TwilioMediaMessage | TwilioStopMessage | TwilioMarkMessage;
2223
+ type TwilioRealtimeOutboundMessage = {
2224
+ event: 'media';
2225
+ streamSid: string;
2226
+ media: {
2227
+ payload: string;
2228
+ };
2229
+ } | {
2230
+ event: 'mark';
2231
+ streamSid: string;
2232
+ mark: {
2233
+ name: string;
2234
+ };
2235
+ } | {
2236
+ event: 'clear';
2237
+ streamSid: string;
2238
+ };
2239
+ /**
2240
+ * Parse one raw inbound Twilio Media Streams frame. Accepts a JSON
2241
+ * string or an already-parsed object. Throws on a malformed/unknown
2242
+ * frame — the caller (the bridge) swallows the throw and ignores the
2243
+ * frame, never tearing the call down for one bad message.
2244
+ */
2245
+ declare function parseTwilioRealtimeMessage(input: unknown): TwilioRealtimeInboundMessage;
2246
+ /**
2247
+ * Build an outbound `media` frame — synthesised agent audio for Twilio
2248
+ * to play to the caller. `data` may be a base64 string or raw bytes.
2249
+ * `streamSid` is the id Twilio handed us in the `start` event.
2250
+ */
2251
+ declare function buildTwilioMediaMessage(streamSid: string, data: string | Uint8Array): TwilioRealtimeOutboundMessage;
2252
+ /**
2253
+ * Build a `clear` frame — tells Twilio to flush any audio still
2254
+ * buffered for playback. This is how barge-in / interrupt works on a
2255
+ * Twilio call: when the caller starts talking the agent must stop
2256
+ * mid-sentence, so we drop everything queued. (46elks calls this
2257
+ * `interrupt`; Twilio calls it `clear`.)
2258
+ */
2259
+ declare function buildTwilioClearMessage(streamSid: string): TwilioRealtimeOutboundMessage;
2260
+ /**
2261
+ * Build a `mark` frame — a named playback checkpoint. Twilio echoes the
2262
+ * mark back (as an inbound `mark` event) once the audio queued before
2263
+ * it has actually been played to the caller. Useful for end-of-turn
2264
+ * detection; the bridge does not require it but exposes the builder for
2265
+ * parity with the protocol surface.
2266
+ */
2267
+ declare function buildTwilioMarkMessage(streamSid: string, name: string): TwilioRealtimeOutboundMessage;
2268
+
2269
+ /**
2270
+ * Twilio webhook helpers — request-signature validation and TwiML
2271
+ * generation.
2272
+ *
2273
+ * This is the call-control counterpart of the 46elks webhook handling
2274
+ * in `manager.ts`. Twilio secures every webhook it sends with an
2275
+ * `X-Twilio-Signature` header; we answer Twilio's voice webhook with
2276
+ * TwiML (an XML document) that connects the call's audio to the
2277
+ * realtime voice WebSocket.
2278
+ *
2279
+ * Everything here is pure (no I/O, no sockets) so it is fully
2280
+ * unit-testable and keeps `@agenticmail/core` dependency-light.
2281
+ */
2282
+ /**
2283
+ * Compute the Twilio request signature for a webhook request.
2284
+ *
2285
+ * Twilio's scheme (per its Security docs): take the full request URL,
2286
+ * then for an `application/x-www-form-urlencoded` POST append every
2287
+ * POST parameter — sorted by key — as `key` immediately followed by
2288
+ * `value`, with no separators. HMAC-SHA1 that string keyed by the
2289
+ * account `AuthToken`, and base64-encode the digest. Twilio sends the
2290
+ * result in the `X-Twilio-Signature` header.
2291
+ *
2292
+ * > Verify the exact construction against Twilio's current Security
2293
+ * > documentation before the live smoke-test.
2294
+ *
2295
+ * @param authToken the Twilio account AuthToken (the signing key)
2296
+ * @param url the full URL Twilio requested, exactly as configured
2297
+ * @param params the POST body parameters (form-encoded fields)
2298
+ */
2299
+ declare function buildTwilioSignature(authToken: string, url: string, params?: Record<string, string>): string;
2300
+ /**
2301
+ * Validate an `X-Twilio-Signature` header against the request, timing-
2302
+ * safe. Fails closed: a missing/empty signature or token, or any
2303
+ * mismatch, returns `false` — never throws. The comparison is constant-
2304
+ * time so a caller cannot probe the expected signature byte by byte.
2305
+ */
2306
+ declare function validateTwilioSignature(authToken: string, url: string, params: Record<string, string>, providedSignature: string): boolean;
2307
+ /**
2308
+ * XML-escape a string for safe inclusion in a TwiML attribute or text
2309
+ * node. TwiML is XML, so any value we interpolate (a websocket URL with
2310
+ * query params, a `<Parameter>` value) must have its metacharacters
2311
+ * escaped or a stray `&`/`"` would produce malformed XML.
2312
+ */
2313
+ declare function escapeXml(value: string): string;
2314
+ interface TwilioStreamTwiMLOptions {
2315
+ /** The `wss://…` URL Twilio should open a Media Stream to. */
2316
+ streamUrl: string;
2317
+ /**
2318
+ * `<Parameter>` name/value pairs nested in the `<Stream>` — Twilio
2319
+ * echoes them back inside the media-stream `start` event's
2320
+ * `customParameters`. Used to carry the mission id / token so the
2321
+ * media socket can be matched to its phone mission even though the
2322
+ * `<Stream>` URL itself is fixed.
2323
+ */
2324
+ parameters?: Record<string, string>;
2325
+ }
2326
+ /**
2327
+ * Build the TwiML returned from the Twilio voice webhook: a
2328
+ * `<Connect><Stream>` document that hands the live call audio to our
2329
+ * realtime voice WebSocket.
2330
+ *
2331
+ * <?xml version="1.0" encoding="UTF-8"?>
2332
+ * <Response>
2333
+ * <Connect>
2334
+ * <Stream url="wss://host/path">
2335
+ * <Parameter name="missionId" value="…"/>
2336
+ * </Stream>
2337
+ * </Connect>
2338
+ * </Response>
2339
+ *
2340
+ * `<Connect><Stream>` (as opposed to `<Start><Stream>`) makes the
2341
+ * stream bidirectional and keeps the call alive for its duration — the
2342
+ * call ends when the stream ends. Every interpolated value is
2343
+ * XML-escaped.
2344
+ *
2345
+ * > The `<Connect><Stream>` / `<Parameter>` TwiML verbs are per
2346
+ * > Twilio's public Media Streams docs; verify against current docs
2347
+ * > before the live smoke-test.
2348
+ */
2349
+ declare function buildTwilioStreamTwiML(opts: TwilioStreamTwiMLOptions): string;
2350
+ /**
2351
+ * Build a minimal `<Response><Say>…</Say></Response>` TwiML document —
2352
+ * the fallback Twilio voice response when the realtime voice runtime
2353
+ * cannot be connected (e.g. no OpenAI key). Mirrors the 46elks
2354
+ * voice-start `play` action.
2355
+ */
2356
+ declare function buildTwilioSayTwiML(message: string): string;
2357
+
2358
+ /**
2359
+ * Realtime voice WebSocket endpoint paths.
2360
+ *
2361
+ * These are the URL paths a phone carrier's media socket connects to.
2362
+ * They live in `@agenticmail/core` (rather than only in the API
2363
+ * package) because the {@link PhoneManager} needs the Twilio path to
2364
+ * build the `<Stream url>` inside the TwiML it returns from the Twilio
2365
+ * voice webhook. The API package mounts its WebSocket servers on the
2366
+ * same constants, so the path is defined exactly once.
2367
+ */
2368
+ /**
2369
+ * Path the 46elks websocket-number's `websocket_url` points at. 46elks
2370
+ * resolves the mission from the `hello` frame's `callid`, so the path
2371
+ * carries no mission identity itself.
2372
+ */
2373
+ declare const ELKS_REALTIME_WS_PATH = "/api/agenticmail/calls/realtime";
2374
+ /**
2375
+ * Path a Twilio `<Connect><Stream>` connects to. The mission id + token
2376
+ * ride as query params (and as `<Parameter>` values in the TwiML), so
2377
+ * the media socket can be matched to its mission before the Twilio
2378
+ * `start` frame even arrives.
2379
+ */
2380
+ declare const TWILIO_REALTIME_WS_PATH = "/api/agenticmail/calls/twilio-stream";
2381
+
2382
+ /**
2383
+ * Realtime transport adapter — the provider-pluggable seam of the
2384
+ * realtime voice bridge.
2385
+ *
2386
+ * `RealtimeVoiceBridge` is transport-agnostic on the *socket* side
2387
+ * (it only ever sees abstract {@link RealtimeBridgePort}s), but the
2388
+ * *messages* on the carrier side are provider-specific: 46elks speaks
2389
+ * `hello`/`audio`/`bye` JSON over linear PCM, while Twilio Media Streams
2390
+ * speaks `connected`/`start`/`media`/`stop` JSON over G.711 µ-law.
2391
+ *
2392
+ * A {@link RealtimeTransportAdapter} captures exactly those differences:
2393
+ * - how to interpret one inbound carrier frame ({@link parseInbound}),
2394
+ * - how to build the outbound control frames (handshake / audio /
2395
+ * interrupt / bye),
2396
+ * - and which OpenAI Realtime audio format the carrier's audio needs.
2397
+ *
2398
+ * Everything else — the OpenAI session lifecycle, function calling,
2399
+ * barge-in handling, transcript accumulation, teardown — is identical
2400
+ * across providers and lives once in the bridge. This is the
2401
+ * "generalise the bridge" design: one bridge, one OpenAI side, a thin
2402
+ * per-provider adapter, no duplicated conversation logic.
2403
+ *
2404
+ * Adapters are pure (no sockets, no I/O), so the bridge stays fully
2405
+ * unit-testable and `@agenticmail/core` stays dependency-light.
2406
+ */
2407
+
2408
+ /** Provider identifier — kept in lockstep with `PhoneTransportProvider`. */
2409
+ type RealtimeTransportProvider = '46elks' | 'twilio';
2410
+ /**
2411
+ * A normalised inbound carrier event. Provider message shapes are
2412
+ * collapsed into this small, bridge-facing vocabulary so the bridge
2413
+ * never has to branch on the provider:
2414
+ * - `hello` — the call leg is live; carries the provider call id.
2415
+ * (46elks `hello`; Twilio `start`. Twilio's `connected`
2416
+ * frame is internal handshake noise → `ignore`.)
2417
+ * - `audio` — one inbound audio frame (base64), to relay to OpenAI.
2418
+ * - `bye` — the caller side ended the call.
2419
+ * - `ignore` — a known-but-uninteresting frame (e.g. Twilio `mark`).
2420
+ */
2421
+ type RealtimeInboundEvent = {
2422
+ kind: 'hello';
2423
+ callId: string;
2424
+ from?: string;
2425
+ to?: string;
2426
+ } | {
2427
+ kind: 'audio';
2428
+ data: string;
2429
+ } | {
2430
+ kind: 'bye';
2431
+ reason?: string;
2432
+ message?: string;
2433
+ } | {
2434
+ kind: 'ignore';
2435
+ };
2436
+ /**
2437
+ * The provider-specific seam the bridge drives. One instance per call.
2438
+ * Adapters that need per-call state (Twilio's `streamSid`) are
2439
+ * stateful — {@link parseInbound} latches that state from the `hello`
2440
+ * frame and the outbound builders read it back.
2441
+ */
2442
+ interface RealtimeTransportAdapter {
2443
+ /** Provider this adapter speaks for — used only for log/transcript text. */
2444
+ readonly provider: RealtimeTransportProvider;
2445
+ /**
2446
+ * Prefix for the carrier-side `onEnd` reason strings (`<prefix>-bye`,
2447
+ * `<prefix>-closed`, `<prefix>-error`). The 46elks adapter keeps the
2448
+ * historical `elks` prefix so existing call sites + tests that match
2449
+ * on `elks-bye` / `elks-closed` stay correct; Twilio uses `twilio`.
2450
+ */
2451
+ readonly endReasonPrefix: string;
2452
+ /**
2453
+ * The OpenAI Realtime audio format the carrier's audio requires.
2454
+ * 46elks linear PCM → `audio/pcm` @ 24 kHz; Twilio G.711 µ-law →
2455
+ * `audio/pcmu` @ 8 kHz. Used by `buildRealtimeSessionConfig` so the
2456
+ * OpenAI session in/out format matches the carrier with no transcode.
2457
+ */
2458
+ readonly openaiAudioFormat: {
2459
+ type: string;
2460
+ rate?: number;
2461
+ };
2462
+ /**
2463
+ * Normalise one raw inbound carrier frame. Throws on a malformed
2464
+ * frame (the bridge catches the throw and ignores that frame). A
2465
+ * well-formed but uninteresting frame returns `{ kind: 'ignore' }`.
2466
+ */
2467
+ parseInbound(raw: string | Record<string, unknown>): RealtimeInboundEvent;
2468
+ /**
2469
+ * Frames to send the carrier the moment the call leg is live (right
2470
+ * after the `hello` event). 46elks needs a `listening`+`sending`
2471
+ * format handshake; Twilio needs nothing, so this is `[]`.
2472
+ */
2473
+ buildHandshake(): Record<string, unknown>[];
2474
+ /** Build one outbound audio frame (synthesised agent speech, base64). */
2475
+ buildAudio(base64: string): Record<string, unknown>;
2476
+ /**
2477
+ * Build the barge-in frame — tells the carrier to drop buffered
2478
+ * playback so the agent stops mid-sentence. 46elks `interrupt`;
2479
+ * Twilio `clear`.
2480
+ */
2481
+ buildInterrupt(): Record<string, unknown>;
2482
+ /**
2483
+ * Build the end-of-call frame for the carrier, or `null` if the
2484
+ * carrier has none (Twilio has no client-initiated stream-stop frame;
2485
+ * the bridge just closes the socket).
2486
+ */
2487
+ buildBye(): Record<string, unknown> | null;
2488
+ }
2489
+ /**
2490
+ * The 46elks realtime-media adapter — the original transport, behaviour
2491
+ * unchanged. Audio is linear PCM, so the OpenAI session uses
2492
+ * `audio/pcm` @ 24 kHz (matching 46elks `pcm_24000`).
2493
+ */
2494
+ declare class ElksRealtimeTransport implements RealtimeTransportAdapter {
2495
+ private readonly listenFormat;
2496
+ private readonly sendFormat;
2497
+ readonly provider: "46elks";
2498
+ readonly endReasonPrefix = "elks";
2499
+ readonly openaiAudioFormat: {
2500
+ type: string;
2501
+ };
2502
+ constructor(listenFormat?: ElksRealtimeAudioFormat, sendFormat?: ElksRealtimeAudioFormat);
2503
+ parseInbound(raw: string | Record<string, unknown>): RealtimeInboundEvent;
2504
+ buildHandshake(): Record<string, unknown>[];
2505
+ buildAudio(base64: string): Record<string, unknown>;
2506
+ buildInterrupt(): Record<string, unknown>;
2507
+ buildBye(): Record<string, unknown>;
2508
+ }
2509
+ /**
2510
+ * The Twilio Media Streams adapter. Audio is G.711 µ-law @ 8 kHz, which
2511
+ * the OpenAI GA Realtime API speaks natively (`audio/pcmu`) — so the
2512
+ * bridge does NO transcoding for a Twilio call.
2513
+ *
2514
+ * Stateful per call: Twilio assigns a `streamSid` in the `start` frame
2515
+ * that EVERY outbound frame must echo. {@link parseInbound} latches it
2516
+ * on `start`; the outbound builders read it back. Building an outbound
2517
+ * frame before `start` throws — but the bridge never does that (it only
2518
+ * sends audio after the `hello` event, which `start` produces).
2519
+ */
2520
+ declare class TwilioRealtimeTransport implements RealtimeTransportAdapter {
2521
+ readonly provider: "twilio";
2522
+ readonly endReasonPrefix = "twilio";
2523
+ readonly openaiAudioFormat: {
2524
+ type: string;
2525
+ };
2526
+ /** Latched from the Twilio `start` frame; required on every outbound. */
2527
+ private streamSid;
2528
+ /** The active `streamSid`, once the `start` frame has been seen. */
2529
+ get currentStreamSid(): string;
2530
+ parseInbound(raw: string | Record<string, unknown>): RealtimeInboundEvent;
2531
+ buildHandshake(): Record<string, unknown>[];
2532
+ buildAudio(base64: string): Record<string, unknown>;
2533
+ buildInterrupt(): Record<string, unknown>;
2534
+ buildBye(): Record<string, unknown> | null;
2535
+ }
2536
+ /** Construct the transport adapter for a provider. */
2537
+ declare function createRealtimeTransport(provider: RealtimeTransportProvider): RealtimeTransportAdapter;
2538
+
2539
+ /**
2540
+ * Realtime voice tools — the tool-using layer on top of the v0.9.52
2541
+ * audio bridge (see `realtime-bridge.ts`).
2542
+ *
2543
+ * # What this file is
2544
+ *
2545
+ * v0.9.52 wired a 46elks ⇄ OpenAI Realtime audio bridge that could only
2546
+ * *converse*. v0.9.53 turns the voice agent into something that can
2547
+ * *act* on a live call: ask the human operator a question, look things
2548
+ * up, tell the time. The enabler is OpenAI Realtime **function
2549
+ * calling** — tools declared in `session.tools`, called by the model
2550
+ * mid-call, dispatched here, and answered back over the wire.
2551
+ *
2552
+ * This module is the transport-agnostic half of that:
2553
+ * - {@link RealtimeToolDefinition} — the JSON-schema tool shapes the
2554
+ * bridge declares to OpenAI.
2555
+ * - {@link ToolExecutor} — the interface the bridge dispatches calls
2556
+ * into; {@link createToolExecutor} builds one from a handler map.
2557
+ * - The *pure / fast* tool implementations ({@link getDatetime},
2558
+ * {@link recallMemory}, {@link webSearch}) — no sockets, no DB
2559
+ * handles passed in directly, fully unit-testable.
2560
+ * - {@link pollForOperatorAnswer} — the poll loop `ask_operator` uses
2561
+ * to block (with an injectable clock + sleep) until the operator
2562
+ * answers or the hard timeout elapses.
2563
+ * - {@link parseOperatorQueryReply} — parses an operator's *email*
2564
+ * reply back into a query answer (the channel-agnostic default
2565
+ * notifier path, see the plan §5).
2566
+ *
2567
+ * The *side-effecting* wiring — actually recording an operator query on
2568
+ * a mission, sending the notification email, calling a web-search API —
2569
+ * lives in `@agenticmail/api`'s `realtime-ws.ts`, which builds a
2570
+ * {@link ToolExecutor} per connection. Keeping that split means
2571
+ * `@agenticmail/core` stays dependency-light and every piece here is
2572
+ * testable with fakes.
2573
+ *
2574
+ * > NOTE on OpenAI Realtime GA event/field names: the function-calling
2575
+ * > wire shapes (`session.tools`, `tool_choice`, `function_call_output`,
2576
+ * > `response.function_call_arguments.done`) follow the plan. Any name
2577
+ * > that could not be verified against the v0.9.52 code is flagged with
2578
+ * > a comment in `realtime-bridge.ts` — verify against current OpenAI
2579
+ * > docs before the live smoke test (same discipline as v0.9.52's
2580
+ * > `response.output_audio.delta` vs legacy `response.audio.delta`).
2581
+ */
2582
+ /**
2583
+ * A function tool as declared in the OpenAI Realtime `session.tools`
2584
+ * array. The shape is the GA `gpt-realtime` function-tool schema:
2585
+ * `{ type: 'function', name, description, parameters: <JSON Schema> }`.
2586
+ */
2587
+ interface RealtimeToolDefinition {
2588
+ type: 'function';
2589
+ name: string;
2590
+ description: string;
2591
+ parameters: {
2592
+ type: 'object';
2593
+ properties: Record<string, unknown>;
2594
+ required?: string[];
2595
+ additionalProperties?: boolean;
2596
+ };
2597
+ }
2598
+ /** A function call parsed off the wire and ready to dispatch. */
2599
+ interface RealtimeToolCall {
2600
+ /** OpenAI `call_id` — echoed back on the `function_call_output`. */
2601
+ callId: string;
2602
+ /** The tool name the model chose. */
2603
+ name: string;
2604
+ /** Parsed `arguments` object (`{}` if the model sent none / invalid). */
2605
+ arguments: Record<string, unknown>;
2606
+ }
2607
+ /** The result of executing a tool — `output` is a string for the model. */
2608
+ interface RealtimeToolResult {
2609
+ /** Sent back verbatim as the `function_call_output` `output` field. */
2610
+ output: string;
2611
+ }
2612
+ /**
2613
+ * Dispatches a model tool call to its implementation. The bridge holds
2614
+ * one of these (injected) and calls {@link execute} for every
2615
+ * function call; it must always resolve (never reject) — a failing
2616
+ * tool resolves to an `output` the model can read and recover from.
2617
+ */
2618
+ interface ToolExecutor {
2619
+ execute(call: RealtimeToolCall): Promise<RealtimeToolResult>;
2620
+ }
2621
+ /**
2622
+ * One tool's implementation. Receives the parsed arguments + the raw
2623
+ * call. May return a plain string or a value that is JSON-stringified.
2624
+ * It MAY throw — {@link createToolExecutor} catches and turns a thrown
2625
+ * error into a model-readable output, so a buggy tool never wedges the
2626
+ * call.
2627
+ */
2628
+ type RealtimeToolHandler = (args: Record<string, unknown>, call: RealtimeToolCall) => Promise<string | Record<string, unknown>> | string | Record<string, unknown>;
2629
+ /**
2630
+ * Hard ceiling on how long `ask_operator` blocks waiting for an answer.
2631
+ * Per Ope (2026-05-19): a human caller will hold ~5 minutes; past that
2632
+ * the graceful path is a callback (plan §7), not an indefinite hold.
2633
+ */
2634
+ declare const OPERATOR_QUERY_TIMEOUT_MS: number;
2635
+ /** How often `ask_operator` re-checks the query record for an answer. */
2636
+ declare const OPERATOR_QUERY_POLL_INTERVAL_MS = 3000;
2637
+ /**
2638
+ * Returned to the model as the `ask_operator` tool output when the
2639
+ * operator did not answer in time. Phrased as an instruction the model
2640
+ * can act on — it must NOT invent an answer, it should tell the caller
2641
+ * a follow-up / callback is coming (the bridge + API handle the actual
2642
+ * callback-on-disconnect, see the plan §7).
2643
+ */
2644
+ declare const OPERATOR_QUERY_TIMEOUT_SENTINEL: string;
2645
+ /**
2646
+ * Subject-line tag used by the email notifier. The query id is embedded
2647
+ * so an operator's *reply* can be parsed straight back into an answer
2648
+ * ({@link parseOperatorQueryReply}) — the channel-agnostic default path.
2649
+ */
2650
+ declare const OPERATOR_QUERY_SUBJECT_TAG = "AgenticMail Operator Query";
2651
+ /**
2652
+ * Phase 1 keystone — human-in-the-loop. The model calls this when it
2653
+ * needs information, a decision, or approval it does not have. It can
2654
+ * take minutes to answer, so the model is instructed (see
2655
+ * {@link buildRealtimeToolGuidance}) to put the caller on hold first.
2656
+ */
2657
+ declare const ASK_OPERATOR_TOOL: RealtimeToolDefinition;
2658
+ /** Phase 2 — a web search. Returns the top results as text. */
2659
+ declare const WEB_SEARCH_TOOL: RealtimeToolDefinition;
2660
+ /** Phase 2 — searches the agent's own persistent memory. */
2661
+ declare const RECALL_MEMORY_TOOL: RealtimeToolDefinition;
2662
+ /** Phase 2 — the current date/time, for resolving "tomorrow" etc. */
2663
+ declare const GET_DATETIME_TOOL: RealtimeToolDefinition;
2664
+ /**
2665
+ * Phase 2 (optional) — searches the agent's AgenticMail inbox. Defined
2666
+ * here so the schema is canonical, but NOT wired into the default
2667
+ * executor in `realtime-ws.ts` (it needs IMAP access); a deployment can
2668
+ * opt in. Kept in the file so the tool surface is documented in one place.
2669
+ */
2670
+ declare const SEARCH_EMAIL_TOOL: RealtimeToolDefinition;
2671
+ /** Every tool defined in this module, keyed by name. */
2672
+ declare const REALTIME_TOOL_DEFINITIONS: Record<string, RealtimeToolDefinition>;
2673
+ /**
2674
+ * Build the natural-language guidance appended to the Realtime session
2675
+ * `instructions` when tools are present. This is the *model-side* half
2676
+ * of "keep the line warm" (plan §6): the model announces a hold before
2677
+ * a slow tool and reassures the caller while it waits. The bridge-side
2678
+ * safety net (the tool-call timeout) is in `realtime-bridge.ts`.
2679
+ */
2680
+ declare function buildRealtimeToolGuidance(tools: readonly RealtimeToolDefinition[]): string;
2681
+ /**
2682
+ * Build a {@link ToolExecutor} from a `name → handler` map. The
2683
+ * executor:
2684
+ * - resolves an unknown tool name to a model-readable "not available"
2685
+ * output (never rejects),
2686
+ * - catches a thrown / rejected handler and turns it into a
2687
+ * model-readable failure output,
2688
+ * - JSON-stringifies a non-string handler return.
2689
+ *
2690
+ * So a single buggy or missing tool can never crash the bridge or
2691
+ * wedge the call — the model just gets an output it can recover from.
2692
+ */
2693
+ declare function createToolExecutor(handlers: Record<string, RealtimeToolHandler>): ToolExecutor;
2694
+ interface GetDatetimeOptions {
2695
+ /** IANA timezone; defaults to UTC. */
2696
+ timezone?: string;
2697
+ /** Clock override for tests. */
2698
+ now?: Date;
2699
+ }
2700
+ /**
2701
+ * `get_datetime` — current date/time as a human-readable line plus the
2702
+ * exact ISO timestamp. Pure: an injectable clock makes it deterministic
2703
+ * in tests. An invalid timezone falls back to UTC rather than throwing.
2704
+ */
2705
+ declare function getDatetime(options?: GetDatetimeOptions): string;
2706
+ /**
2707
+ * Minimal structural interface for the bit of `AgentMemoryManager`
2708
+ * {@link recallMemory} needs. Declared here so this module does not
2709
+ * have to import the full memory manager — `AgentMemoryManager`
2710
+ * satisfies it structurally.
2711
+ */
2712
+ interface MemoryRecaller {
2713
+ recall(agentId: string, query: string, limit?: number): Promise<Array<{
2714
+ title: string;
2715
+ content: string;
2716
+ }>>;
2717
+ }
2718
+ /**
2719
+ * `recall_memory` — query the agent's own persistent memory. Returns a
2720
+ * compact numbered list, or a clear "nothing found" line so the model
2721
+ * does not stall on an empty result.
2722
+ */
2723
+ declare function recallMemory(memory: MemoryRecaller, agentId: string, query: string, limit?: number): Promise<string>;
2724
+ interface WebSearchOptions {
2725
+ /**
2726
+ * Search endpoint. Defaults to the DuckDuckGo HTML endpoint
2727
+ * ({@link DEFAULT_WEB_SEARCH_ENDPOINT}) — free, no API key, per the
2728
+ * scope decision (plan §13.1). Overridable for tests / a mirror.
2729
+ */
2730
+ endpoint?: string;
2731
+ /** `fetch` override for tests. */
2732
+ fetchFn?: typeof fetch;
2733
+ /** Max results to fold into the output (default 5, capped at 10). */
2734
+ maxResults?: number;
2735
+ }
2736
+ /**
2737
+ * Default web-search endpoint — DuckDuckGo's keyless HTML results page.
2738
+ * Chosen because it needs no API key or account (plan §13.1: "web_search
2739
+ * → DuckDuckGo only, free, no key").
2740
+ */
2741
+ declare const DEFAULT_WEB_SEARCH_ENDPOINT = "https://html.duckduckgo.com/html/";
2742
+ /**
2743
+ * Untrusted-content marker prefixed to every non-empty `web_search`
2744
+ * result block (v0.9.53 security review).
2745
+ *
2746
+ * Web-search results are scraped page titles + snippets — attacker-
2747
+ * controllable text that the model consumes verbatim as a
2748
+ * `function_call_output`. A page that ranks for the caller's query can
2749
+ * plant instructions in its `<title>` or snippet; without an explicit
2750
+ * delimiter the model may read them as commands rather than data — the
2751
+ * classic search-result prompt-injection vector. This marker tells the
2752
+ * model the block is strictly reference data and that any instructions
2753
+ * inside it must be ignored.
2754
+ *
2755
+ * (The enterprise web-search tool wrapped results the same way via
2756
+ * `wrapWebContent` / `externalContent.untrusted`; the fresh DuckDuckGo
2757
+ * helper here re-establishes that defense without copying the file.)
2758
+ */
2759
+ declare const WEB_SEARCH_UNTRUSTED_PREFIX: string;
2760
+ /**
2761
+ * `web_search` — a keyless DuckDuckGo lookup returning the top results
2762
+ * as text. Reimplemented fresh as a small HTML-scrape helper (plan
2763
+ * §13.1 / the host's provenance note: do not copy the enterprise
2764
+ * file). No API key is needed, so unlike the other tools this one is
2765
+ * always available.
2766
+ *
2767
+ * Fails *soft*: a non-OK response, an unreadable body, or a thrown
2768
+ * fetch all resolve to a model-readable line rather than throwing — a
2769
+ * search outage must never kill a live call.
2770
+ *
2771
+ * > DuckDuckGo's HTML page is an unversioned scrape target, not a
2772
+ * > documented API: the `result__a` / `result__snippet` selectors and
2773
+ * > the `/l/?uddg=` redirect wrapper below match the endpoint today;
2774
+ * > verify them before the live smoke test (same "verify the wire
2775
+ * > shape" discipline as the OpenAI Realtime event names).
2776
+ */
2777
+ declare function webSearch(query: string, options?: WebSearchOptions): Promise<string>;
2778
+ interface OperatorQueryPollOptions {
2779
+ /** Hard timeout (default {@link OPERATOR_QUERY_TIMEOUT_MS}). */
2780
+ timeoutMs?: number;
2781
+ /** Poll interval (default {@link OPERATOR_QUERY_POLL_INTERVAL_MS}). */
2782
+ pollIntervalMs?: number;
2783
+ /** Clock override for tests (returns ms). */
2784
+ now?: () => number;
2785
+ /** Sleep override for tests. */
2786
+ sleep?: (ms: number) => Promise<void>;
2787
+ /** Abort handle — when `aborted` flips true the poll resolves null. */
2788
+ signal?: {
2789
+ readonly aborted: boolean;
2790
+ };
2791
+ }
2792
+ /**
2793
+ * Poll `readAnswer` until it yields a non-empty answer or the timeout
2794
+ * elapses. Returns the trimmed answer, or `null` on timeout / abort.
2795
+ *
2796
+ * This is the loop `ask_operator` runs to *block* the tool call while
2797
+ * it waits for the human. The clock + sleep are injectable so tests run
2798
+ * the full timeout path in microseconds. The `signal` lets the caller
2799
+ * abandon the wait early — e.g. the call dropped, so there is no point
2800
+ * polling (the unanswered query then drives callback-on-disconnect).
2801
+ */
2802
+ declare function pollForOperatorAnswer(readAnswer: () => Promise<string | null | undefined> | string | null | undefined, options?: OperatorQueryPollOptions): Promise<string | null>;
2803
+ /**
2804
+ * Build the notification email subject for an operator query. The query
2805
+ * id is embedded in a `[tag id]` token so the operator's reply (subject
2806
+ * `Re: [tag id] …`) can be parsed straight back into an answer.
2807
+ */
2808
+ declare function operatorQuerySubject(queryId: string, callContext?: string): string;
2809
+ /**
2810
+ * Parse an operator's email reply into `{ queryId, answer }`, or `null`
2811
+ * if the email is not an operator-query reply (no id token in the
2812
+ * subject) or carries no usable answer text.
2813
+ *
2814
+ * This is the pure half of the channel-agnostic notifier (plan §5): the
2815
+ * default notifier emails the operator; their reply lands back in the
2816
+ * agent's inbox; the inbound mail hook runs this and posts the answer
2817
+ * to the same query record the HTTP endpoint writes to.
2818
+ */
2819
+ declare function parseOperatorQueryReply(input: {
2820
+ subject?: string;
2821
+ text?: string;
2822
+ }): {
2823
+ queryId: string;
2824
+ answer: string;
2825
+ } | null;
2826
+ /**
2827
+ * Extract the bare email address from a `From`-style value, lowercased
2828
+ * and trimmed. Accepts `"Name" <addr@host>`, `<addr@host>`, or a plain
2829
+ * `addr@host`; returns `''` for an empty / unusable input.
2830
+ */
2831
+ declare function extractEmailAddress(value: string | null | undefined): string;
2832
+ /**
2833
+ * Fail-closed sender check for the operator email-reply answer path
2834
+ * (plan §5; added in the v0.9.53 security review).
2835
+ *
2836
+ * An operator-query answer arriving by email is gated by the query id
2837
+ * embedded in the subject — an unguessable 122-bit v4 UUID, but one that
2838
+ * rides in *plaintext* subject lines through quoting, forwarding, and
2839
+ * relay/provider logs. The id alone is therefore a materially weaker
2840
+ * gate than the HMAC per-mission token used on the phone webhook (which
2841
+ * only 46elks ever sees). So an emailed answer is accepted ONLY when its
2842
+ * `From` address matches the configured operator address (case-
2843
+ * insensitive, address-only).
2844
+ *
2845
+ * Returns `false` when no `operatorEmail` is configured — no operator
2846
+ * means nobody is trusted (fail closed), so the email-reply path is
2847
+ * simply inert on a deployment without one.
2848
+ *
2849
+ * NOTE: ultimate strength still depends on inbound SPF/DKIM rejecting a
2850
+ * spoofed `From`; this check closes the casual-leak path — a leaked or
2851
+ * forwarded subject token replied to from an arbitrary address.
2852
+ */
2853
+ declare function isOperatorReplySender(from: string | null | undefined, operatorEmail: string | null | undefined): boolean;
2854
+
2855
+ /**
2856
+ * Realtime voice bridge — wires the OpenAI Realtime API to a phone
2857
+ * carrier's realtime-media WebSocket so a phone mission can actually
2858
+ * *converse*.
2859
+ *
2860
+ * # Shape of the integration
2861
+ *
2862
+ * caller ⇄ carrier ⇄ (carrier media WebSocket) ⇄ AgenticMail
2863
+ * │
2864
+ * RealtimeVoiceBridge
2865
+ * │
2866
+ * (OpenAI Realtime WebSocket)
2867
+ * │
2868
+ * gpt-realtime
2869
+ *
2870
+ * The carrier streams the live call audio to AgenticMail as JSON frames
2871
+ * (base64 audio); AgenticMail relays them to OpenAI as
2872
+ * `input_audio_buffer.append`; OpenAI streams synthesised speech back
2873
+ * as `response.output_audio.delta`; AgenticMail relays that to the
2874
+ * carrier. Server-side VAD on the OpenAI session handles turn-taking —
2875
+ * no manual commit / response.create.
2876
+ *
2877
+ * # Provider-pluggable transport
2878
+ *
2879
+ * The carrier side speaks a provider-specific wire protocol — 46elks
2880
+ * (`hello`/`audio`/`bye`, linear PCM) and Twilio Media Streams
2881
+ * (`connected`/`start`/`media`/`stop`, G.711 µ-law). Those differences
2882
+ * are isolated behind a {@link RealtimeTransportAdapter}: the bridge
2883
+ * itself — OpenAI session lifecycle, function calling, barge-in,
2884
+ * transcript, teardown — is identical across providers and lives here
2885
+ * once. The bridge defaults to the 46elks adapter, so existing callers
2886
+ * that pass no `transport` keep their exact prior behaviour.
2887
+ *
2888
+ * # Memory injection — the whole point
2889
+ *
2890
+ * Before the OpenAI session starts, the agent's persistent memory is
2891
+ * rendered (`AgentMemoryManager.generateMemoryContext()`) and folded
2892
+ * into the Realtime session `instructions`. The model is told to treat
2893
+ * that block as *its own* long-term knowledge — so on the call it acts
2894
+ * with full continuity, as if it had always known those things.
2895
+ *
2896
+ * # Why this file is transport-agnostic
2897
+ *
2898
+ * `RealtimeVoiceBridge` never touches a socket. It takes two abstract
2899
+ * {@link RealtimeBridgePort}s (one per side) and is driven by
2900
+ * `handle*Message` / `handle*Open` / `handle*Close` calls. The real
2901
+ * WebSocket plumbing lives in `@agenticmail/api` (which has the `ws`
2902
+ * dependency); tests drive the bridge with in-memory fake ports. This
2903
+ * keeps `@agenticmail/core` dependency-free and the bridge logic fully
2904
+ * unit-testable without a live OpenAI key or a carrier websocket number.
2905
+ *
2906
+ * The exact OpenAI Realtime wire shapes below are the GA `gpt-realtime`
2907
+ * protocol (session config nested under `audio.input` / `audio.output`,
2908
+ * `format` as an object, `response.output_audio.delta` for output). The
2909
+ * legacy beta output event name `response.audio.delta` is also handled
2910
+ * defensively — some `gpt-realtime` deployments still emit it.
2911
+ */
2912
+
2913
+ /** OpenAI Realtime WebSocket base URL (model passed as `?model=`). */
2914
+ declare const OPENAI_REALTIME_URL = "wss://api.openai.com/v1/realtime";
2915
+ /** GA Realtime model. */
2916
+ declare const DEFAULT_REALTIME_MODEL = "gpt-realtime";
2917
+ /** Default GA Realtime voice. */
2918
+ declare const DEFAULT_REALTIME_VOICE = "marin";
2919
+ /** PCM sample rate shared by 46elks `pcm_24000` and the OpenAI session. */
2920
+ declare const REALTIME_AUDIO_SAMPLE_RATE = 24000;
2921
+ /**
2922
+ * #46-H1 — hard ceiling on a single inbound audio frame, measured in
2923
+ * base64 characters. Realtime frames are tiny (20–100 ms of audio); a
2924
+ * frame larger than this is either a buggy or a hostile peer trying to
2925
+ * push an unbounded allocation through the bridge. Oversized frames are
2926
+ * dropped, never forwarded. ~256 KiB of base64 ≈ 4 s of 24 kHz PCM16 —
2927
+ * far above any legitimate realtime frame, so this never trims real
2928
+ * speech, it only fences off abuse.
2929
+ */
2930
+ declare const REALTIME_MAX_AUDIO_FRAME_BASE64: number;
2931
+ /**
2932
+ * Bridge-side safety net for a slow tool call (plan §6). The model-side
2933
+ * keeps the line warm (it announces a hold and reassures the caller);
2934
+ * this is the floor under that — if a tool's `execute()` never settles
2935
+ * (a hung `ask_operator`, a wedged search) the bridge still answers the
2936
+ * model after this long so the call cannot be wedged forever. It is set
2937
+ * deliberately ABOVE `OPERATOR_QUERY_TIMEOUT_MS` (5 min) so the tool's
2938
+ * own graceful timeout sentinel normally wins the race; this only fires
2939
+ * if the executor itself hangs.
2940
+ */
2941
+ declare const REALTIME_TOOL_CALL_TIMEOUT_MS: number;
2942
+ interface RealtimeInstructionOptions {
2943
+ /** The concrete objective of this call. */
2944
+ task: string;
2945
+ /** Rendered agent memory block (from `generateMemoryContext()`). */
2946
+ memoryContext?: string;
2947
+ /** The agent's display name, used in the persona line. */
2948
+ agentName?: string;
2949
+ /** Override the default persona preamble. */
2950
+ persona?: string;
2951
+ /**
2952
+ * Natural-language tool-use guidance, appended as its own section.
2953
+ * Normally produced by `buildRealtimeToolGuidance()` and folded in
2954
+ * automatically by `buildRealtimeSessionConfig()` when tools are set.
2955
+ */
2956
+ toolGuidance?: string;
2957
+ }
2958
+ /**
2959
+ * Compose the Realtime session `instructions` string. The agent's
2960
+ * memory is presented as the model's *own* knowledge — not as external
2961
+ * notes — so the call feels continuous with everything the agent has
2962
+ * learned elsewhere.
2963
+ */
2964
+ declare function buildRealtimeInstructions(opts: RealtimeInstructionOptions): string;
2965
+ interface RealtimeSessionConfigOptions extends RealtimeInstructionOptions {
2966
+ /** OpenAI Realtime voice (default {@link DEFAULT_REALTIME_VOICE}). */
2967
+ voice?: string;
2968
+ /** OpenAI Realtime model (default {@link DEFAULT_REALTIME_MODEL}). */
2969
+ model?: string;
2970
+ /** Provide a fully-formed instruction string instead of composing one. */
2971
+ instructions?: string;
2972
+ /**
2973
+ * Function tools to declare on the session. When present they are
2974
+ * emitted under `session.tools` with a `tool_choice`, and (unless
2975
+ * `instructions` is overridden) tool-use guidance is folded into the
2976
+ * composed instructions automatically.
2977
+ */
2978
+ tools?: RealtimeToolDefinition[];
2979
+ /** `tool_choice` override — defaults to `'auto'` when tools are set. */
2980
+ toolChoice?: 'auto' | 'none' | 'required';
2981
+ /**
2982
+ * OpenAI Realtime audio format for the session's input AND output.
2983
+ * Defaults to linear PCM @ 24 kHz (`{ type: 'audio/pcm', rate: 24000 }`)
2984
+ * — the format a 46elks `pcm_24000` call needs. A Twilio call must
2985
+ * pass `{ type: 'audio/pcmu', rate: 8000 }` (G.711 µ-law @ 8 kHz) so
2986
+ * the OpenAI session speaks the carrier's native codec with NO
2987
+ * transcoding. Normally sourced from a {@link RealtimeTransportAdapter}'s
2988
+ * `openaiAudioFormat`.
2989
+ */
2990
+ audioFormat?: {
2991
+ type: string;
2992
+ rate?: number;
2993
+ };
2994
+ }
2995
+ /** Default OpenAI Realtime audio format — linear PCM @ 24 kHz (46elks). */
2996
+ declare const DEFAULT_REALTIME_AUDIO_FORMAT: {
2997
+ readonly type: "audio/pcm";
2998
+ };
2999
+ /**
3000
+ * Build the `session.update` client event for the GA `gpt-realtime`
3001
+ * API. Audio in/out default to PCM16 @ 24 kHz (matches 46elks
3002
+ * `pcm_24000`); a Twilio call passes `audioFormat: { type: 'audio/pcmu',
3003
+ * rate: 8000 }` so the session speaks G.711 µ-law natively. Turn-taking
3004
+ * is server-side VAD, and the agent's memory is folded into
3005
+ * `instructions`.
3006
+ *
3007
+ * When `tools` are supplied they are declared under `session.tools`
3008
+ * with a `tool_choice` (default `'auto'`), and — unless the caller
3009
+ * passed an explicit `instructions` string — natural-language tool-use
3010
+ * guidance is appended to the composed instructions so the model knows
3011
+ * to put the caller on hold before a slow tool (plan §6).
3012
+ *
3013
+ * > The `session.tools` / `tool_choice` field names follow the OpenAI
3014
+ * > Realtime function-calling protocol per the plan §3, and the
3015
+ * > `audio/pcmu` µ-law format token follows the OpenAI GA Realtime
3016
+ * > audio-format protocol; verify both against current OpenAI docs
3017
+ * > before the live smoke test.
3018
+ */
3019
+ declare function buildRealtimeSessionConfig(opts: RealtimeSessionConfigOptions): Record<string, unknown>;
3020
+ /** Build the `wss://…/v1/realtime?model=…` URL for a model. */
3021
+ declare function buildOpenAIRealtimeUrl(model?: string): string;
3022
+ /** One side of the bridge — a JSON message sink that can be closed. */
3023
+ interface RealtimeBridgePort {
3024
+ /** Send one JSON message to the peer. Must not throw. */
3025
+ send(message: Record<string, unknown>): void;
3026
+ /** Close the underlying connection. Must be idempotent. */
3027
+ close(): void;
3028
+ }
3029
+ interface RealtimeBridgeTranscriptEntry {
3030
+ source: 'system' | 'provider' | 'agent';
3031
+ text: string;
3032
+ metadata?: Record<string, unknown>;
3033
+ }
3034
+ interface RealtimeVoiceBridgeOptions {
3035
+ /**
3036
+ * Port to the carrier realtime-media side. Named `elks` for backward
3037
+ * compatibility (it predates the Twilio transport); `carrier` is the
3038
+ * provider-neutral alias and takes precedence if both are given.
3039
+ */
3040
+ elks?: RealtimeBridgePort;
3041
+ /** Provider-neutral alias for {@link elks} — the carrier media port. */
3042
+ carrier?: RealtimeBridgePort;
3043
+ /** Port to the OpenAI Realtime side. */
3044
+ openai: RealtimeBridgePort;
3045
+ /** `session.update` payload — sent to OpenAI once its socket opens. */
3046
+ sessionConfig: Record<string, unknown>;
3047
+ /**
3048
+ * Carrier transport adapter — encodes/decodes the provider's wire
3049
+ * protocol (46elks vs Twilio Media Streams). Defaults to the 46elks
3050
+ * adapter, so callers that omit it keep the original behaviour.
3051
+ */
3052
+ transport?: RealtimeTransportAdapter;
3053
+ /**
3054
+ * 46elks-only: audio format we ask 46elks to send us (default
3055
+ * `pcm_24000`). Ignored unless the default 46elks transport is used
3056
+ * and no explicit `transport` was supplied.
3057
+ */
3058
+ listenFormat?: ElksRealtimeAudioFormat;
3059
+ /**
3060
+ * 46elks-only: audio format we declare for the audio we send 46elks
3061
+ * (default `pcm_24000`). Ignored unless the default 46elks transport
3062
+ * is used and no explicit `transport` was supplied.
3063
+ */
3064
+ sendFormat?: ElksRealtimeAudioFormat;
3065
+ /** Per-frame base64 ceiling (default {@link REALTIME_MAX_AUDIO_FRAME_BASE64}). */
3066
+ maxAudioFrameBase64?: number;
3067
+ /**
3068
+ * Dispatches the model's function calls. When omitted the bridge has
3069
+ * no tools — a function call from the model is acknowledged with a
3070
+ * "no tools available" output rather than left to wedge the model.
3071
+ */
3072
+ toolExecutor?: ToolExecutor;
3073
+ /**
3074
+ * Bridge-side safety-net timeout for a single tool call (default
3075
+ * {@link REALTIME_TOOL_CALL_TIMEOUT_MS}). If the executor does not
3076
+ * settle within this window the bridge answers the model itself so
3077
+ * the call cannot be wedged by a hung tool.
3078
+ */
3079
+ maxToolCallMs?: number;
3080
+ /** Sink for transcript / lifecycle entries worth persisting on the mission. */
3081
+ onTranscript?: (entry: RealtimeBridgeTranscriptEntry) => void;
3082
+ /**
3083
+ * Called exactly once when the bridge has fully ended. `pendingToolCalls`
3084
+ * is how many tool calls were still in flight at teardown — non-zero
3085
+ * means the call dropped mid-tool (e.g. an unanswered `ask_operator`),
3086
+ * which is the signal the API layer uses to arm callback-on-disconnect
3087
+ * (plan §7).
3088
+ */
3089
+ onEnd?: (summary: {
3090
+ reason: string;
3091
+ pendingToolCalls: number;
3092
+ }) => void;
3093
+ }
3094
+ /**
3095
+ * Bridges a phone carrier's realtime-media connection to an OpenAI
3096
+ * Realtime connection. Provider-pluggable: the carrier wire protocol
3097
+ * (46elks vs Twilio Media Streams) is isolated in a
3098
+ * {@link RealtimeTransportAdapter}; the conversation logic here is
3099
+ * provider-neutral. The caller pumps raw messages in via
3100
+ * `handleCarrierMessage` / `handleOpenAIMessage` and connection
3101
+ * lifecycle via `handle*Open` / `handle*Close`. Every public method is
3102
+ * safe to call after the bridge has ended (they become no-ops).
3103
+ *
3104
+ * The legacy `handleElks*` method names remain as thin aliases for the
3105
+ * `handleCarrier*` methods so existing 46elks call sites and tests do
3106
+ * not break.
3107
+ */
3108
+ declare class RealtimeVoiceBridge {
3109
+ private readonly carrier;
3110
+ private readonly openai;
3111
+ private readonly sessionConfig;
3112
+ private readonly transport;
3113
+ private readonly maxAudioFrameBase64;
3114
+ private readonly toolExecutor?;
3115
+ private readonly maxToolCallMs;
3116
+ private readonly onTranscript?;
3117
+ private readonly onEnd?;
3118
+ /** Carrier `hello`/`start` received — the call leg is live. */
3119
+ private helloSeen;
3120
+ /** OpenAI socket open + `session.update` sent. */
3121
+ private openaiReady;
3122
+ /** Bridge has ended — all further input is ignored. */
3123
+ private ended;
3124
+ /** Carrier call id from the `hello` event (46elks `callid` / Twilio `callSid`). */
3125
+ private callId;
3126
+ /** Audio frames received before OpenAI was ready, flushed on open. */
3127
+ private readonly pendingAudio;
3128
+ /** Oversized-frame counter — reported once, not per frame. */
3129
+ private droppedFrames;
3130
+ private droppedFramesReported;
3131
+ /** Accumulated assistant speech transcript for the current response. */
3132
+ private assistantTranscript;
3133
+ /**
3134
+ * Function-call name keyed by `call_id`, captured from
3135
+ * `response.output_item.added`. The later `*.arguments.done` event is
3136
+ * not guaranteed to echo the tool name, so we remember it here.
3137
+ */
3138
+ private readonly toolCallNames;
3139
+ /** `call_id`s whose tool call is currently executing. */
3140
+ private readonly inFlightToolCalls;
3141
+ constructor(opts: RealtimeVoiceBridgeOptions);
3142
+ /** True once the bridge has ended. */
3143
+ get isEnded(): boolean;
3144
+ /** The carrier call id, once the `hello`/`start` event has been seen. */
3145
+ get currentCallId(): string;
3146
+ /** The carrier transport provider this bridge is running for. */
3147
+ get provider(): string;
3148
+ /** How many tool calls are executing right now. */
3149
+ get pendingToolCalls(): number;
3150
+ /** Call when the OpenAI socket opens — sends `session.update`. */
3151
+ handleOpenAIOpen(): void;
3152
+ /** Call when the OpenAI socket closes. */
3153
+ handleOpenAIClose(): void;
3154
+ /** Call when the OpenAI socket errors. */
3155
+ handleOpenAIError(err: unknown): void;
3156
+ /**
3157
+ * Call when the carrier media socket closes. The `onEnd` reason is
3158
+ * `<prefix>-closed`, where the prefix comes from the transport adapter
3159
+ * (`elks` for 46elks, `twilio` for Twilio) — so historical 46elks
3160
+ * reason strings (`elks-closed`) are preserved.
3161
+ */
3162
+ handleCarrierClose(): void;
3163
+ /** Call when the carrier media socket errors. */
3164
+ handleCarrierError(err: unknown): void;
3165
+ /** @deprecated 46elks-era alias for {@link handleCarrierClose}. */
3166
+ handleElksClose(): void;
3167
+ /** @deprecated 46elks-era alias for {@link handleCarrierError}. */
3168
+ handleElksError(err: unknown): void;
3169
+ /**
3170
+ * Feed one raw message from the carrier media socket. Accepts a JSON
3171
+ * string or an already-parsed object. The transport adapter
3172
+ * normalises the provider-specific frame; malformed frames throw out
3173
+ * of the adapter and are ignored here (the bridge is never torn down
3174
+ * for one bad frame).
3175
+ */
3176
+ handleCarrierMessage(raw: string | Record<string, unknown>): void;
3177
+ /** @deprecated 46elks-era alias for {@link handleCarrierMessage}. */
3178
+ handleElksMessage(raw: string | Record<string, unknown>): void;
3179
+ /** Relay caller audio to OpenAI, enforcing the per-frame size cap. */
3180
+ private forwardInboundAudio;
3181
+ /**
3182
+ * Feed one raw message from the OpenAI Realtime socket. Accepts a
3183
+ * JSON string or an already-parsed object. Unknown event types are
3184
+ * ignored.
3185
+ */
3186
+ handleOpenAIMessage(raw: string | Record<string, unknown>): void;
3187
+ /** Relay synthesised agent audio to the carrier, enforcing the size cap. */
3188
+ private forwardOutboundAudio;
3189
+ /**
3190
+ * Parse a `response.function_call_arguments.done` event and dispatch
3191
+ * the tool call. Resolves `name` from the event or the map captured
3192
+ * on `response.output_item.added`; parses `arguments` (a JSON string)
3193
+ * defensively. Always answers the model — an unknown name, missing
3194
+ * executor, or oversized fan-out each gets a model-readable output
3195
+ * rather than being dropped (a dropped `call_id` wedges the model,
3196
+ * which waits forever for its `function_call_output`).
3197
+ */
3198
+ private dispatchToolCall;
3199
+ /** Execute one tool call, racing the executor against the safety-net timeout. */
3200
+ private runToolCall;
3201
+ /**
3202
+ * Send a tool result back to OpenAI: a `function_call_output`
3203
+ * conversation item, then `response.create` so the model resumes
3204
+ * speaking with the result in hand.
3205
+ *
3206
+ * > `conversation.item.create` with `{ type: 'function_call_output',
3207
+ * > call_id, output }` followed by `response.create` is the OpenAI
3208
+ * > Realtime function-calling return path per the plan §3. Verify
3209
+ * > against current OpenAI docs before the live smoke test.
3210
+ */
3211
+ private answerToolCall;
3212
+ /**
3213
+ * End the bridge. Idempotent — the first call wins, later calls are
3214
+ * no-ops. Sends the carrier's end-of-call frame (if it has one — 46elks
3215
+ * `bye`; Twilio has none), closes both ports, fires `onEnd`.
3216
+ */
3217
+ end(reason: string): void;
3218
+ private noteDroppedFrame;
3219
+ private emitTranscript;
3220
+ private safeSend;
3221
+ }
3222
+
1740
3223
  declare const PHONE_REGION_SCOPES: readonly ["AT", "DE", "EU", "WORLD"];
1741
3224
  type PhoneRegionScope = typeof PHONE_REGION_SCOPES[number];
1742
3225
  declare const TELEPHONY_TRANSPORT_CAPABILITIES: readonly ["sms", "call_control", "realtime_media", "recording_supported"];
@@ -1839,7 +3322,9 @@ declare function validatePhoneMissionStart(input: unknown, transport: unknown, o
1839
3322
  allowPremiumOrSpecialNumbers?: boolean;
1840
3323
  }): PhoneMissionStartValidationResult;
1841
3324
 
1842
- type PhoneTransportProvider = '46elks';
3325
+ type PhoneTransportProvider = '46elks' | 'twilio';
3326
+ /** Providers that support starting outbound call-control missions. */
3327
+ declare const PHONE_CALL_CONTROL_PROVIDERS: readonly PhoneTransportProvider[];
1843
3328
  /**
1844
3329
  * Abuse / cost controls for the call-control surface. A phone mission
1845
3330
  * places a real, billed outbound call, so /calls/start needs hard limits
@@ -1881,6 +3366,29 @@ interface PhoneMissionTranscriptEntry {
1881
3366
  text: string;
1882
3367
  metadata?: Record<string, unknown>;
1883
3368
  }
3369
+ type OperatorQueryUrgency = 'normal' | 'high';
3370
+ /**
3371
+ * A question the voice agent put to its human operator mid-call via the
3372
+ * `ask_operator` tool (plan §4 / §5). Persisted on the mission under
3373
+ * `metadata.operatorQueries[]` and exposed by the operator-query API
3374
+ * endpoints. The bridge's `ask_operator` tool blocks polling this
3375
+ * record until `answer` is set or the hard timeout elapses.
3376
+ */
3377
+ interface PhoneOperatorQuery {
3378
+ /** `oq_<uuid>` — unique across all missions. */
3379
+ id: string;
3380
+ /** The question, sanitised + length-bounded. */
3381
+ question: string;
3382
+ /** One-line context on the call, if the agent supplied it. */
3383
+ callContext?: string;
3384
+ urgency: OperatorQueryUrgency;
3385
+ askedAt: string;
3386
+ /** The operator's answer — set once, never overwritten (idempotent). */
3387
+ answer?: string;
3388
+ answeredAt?: string;
3389
+ /** Channel the answer arrived on (e.g. `api`, `email`). */
3390
+ answeredVia?: string;
3391
+ }
1884
3392
  interface PhoneCallMission {
1885
3393
  id: string;
1886
3394
  agentId: string;
@@ -1951,6 +3459,52 @@ declare class PhoneManager {
1951
3459
  private authenticateWebhook;
1952
3460
  handleVoiceStartWebhook(missionId: string, providedToken: string, payload?: Record<string, unknown>): PhoneWebhookResult;
1953
3461
  handleHangupWebhook(missionId: string, providedToken: string, payload?: Record<string, unknown>): PhoneCallMission;
3462
+ /**
3463
+ * Handle Twilio's voice webhook — the `Url` Twilio fetches when the
3464
+ * outbound call connects. The mirror of {@link handleVoiceStartWebhook}
3465
+ * for Twilio: it authenticates the per-mission token, transitions the
3466
+ * mission to `connected`, and returns the TwiML to send back.
3467
+ *
3468
+ * `twiml` is a `<Connect><Stream>` document that wires the call's
3469
+ * audio to the realtime voice WebSocket — the same realtime path the
3470
+ * 46elks websocket-number uses. The route serves it with
3471
+ * `Content-Type: text/xml`.
3472
+ *
3473
+ * Like the 46elks handler this is terminal-state-guarded (#43-H5,
3474
+ * a late/replayed webhook cannot resurrect a finished mission) and
3475
+ * idempotent (a duplicate is acknowledged with the same TwiML but
3476
+ * changes nothing).
3477
+ */
3478
+ handleTwilioVoiceWebhook(missionId: string, providedToken: string, payload?: Record<string, unknown>): {
3479
+ mission: PhoneCallMission;
3480
+ twiml: string;
3481
+ };
3482
+ /**
3483
+ * Handle Twilio's status callback — the `StatusCallback` Twilio POSTs
3484
+ * with the terminal call status. The mirror of
3485
+ * {@link handleHangupWebhook} for Twilio. Idempotent + terminal-state
3486
+ * guarded; records the reported `CallDuration` and accumulates cost
3487
+ * from `Price` when Twilio supplied it (Twilio reports the final
3488
+ * price asynchronously, so it may be absent on the first callback —
3489
+ * the duration ceiling / rate limit / concurrency cap remain the
3490
+ * preventive cost controls, #43-H2).
3491
+ */
3492
+ handleTwilioStatusWebhook(missionId: string, providedToken: string, payload?: Record<string, unknown>): PhoneCallMission;
3493
+ /**
3494
+ * Build the TwiML for the Twilio voice webhook — a `<Connect><Stream>`
3495
+ * pointing at the realtime voice WebSocket. The `<Stream>` URL is
3496
+ * derived from `webhookBaseUrl` (https → wss); the per-mission token
3497
+ * (#43-H7) rides as both a `<Parameter>` and a query param so the
3498
+ * media socket can be matched to its mission.
3499
+ */
3500
+ private buildTwilioVoiceTwiML;
3501
+ /**
3502
+ * Read the call cost off a Twilio status callback (`Price`, a
3503
+ * negative or string number), add it to the mission's running total,
3504
+ * and flag a policy-cap breach (#43-H2). Twilio prices are reported
3505
+ * as a negative amount (a debit); we use the absolute value.
3506
+ */
3507
+ private buildTwilioCostMetadataPatch;
1954
3508
  /**
1955
3509
  * Read the call cost off a 46elks hangup payload, add it to the
1956
3510
  * mission's running total, and flag a policy-cap breach (#43-H2).
@@ -1961,7 +3515,117 @@ declare class PhoneManager {
1961
3515
  private buildCostMetadataPatch;
1962
3516
  private buildVoiceStartAction;
1963
3517
  cancelMission(agentId: string, missionId: string): PhoneCallMission;
3518
+ /**
3519
+ * Resolve a mission by the provider's call id (the 46elks `callid`).
3520
+ * The realtime voice bridge uses this to match an inbound 46elks
3521
+ * realtime-media WebSocket — whose `hello` frame carries `callid` —
3522
+ * back to the mission that placed the call, so the right agent's
3523
+ * memory and task can be loaded into the OpenAI Realtime session.
3524
+ */
3525
+ findMissionByProviderCallId(providerCallId: string, agentId?: string): PhoneCallMission | null;
3526
+ /**
3527
+ * Append transcript entries produced by the realtime voice bridge and
3528
+ * optionally transition the mission status. A mission already in a
3529
+ * terminal state keeps that state — a late bridge event must not
3530
+ * resurrect a completed/failed/cancelled mission (mirrors the
3531
+ * terminal-state guard on the webhook handlers). No-op if the mission
3532
+ * no longer exists.
3533
+ */
3534
+ recordRealtimeActivity(missionId: string, entries: PhoneMissionTranscriptEntry[], status?: PhoneMissionState): PhoneCallMission | null;
3535
+ /**
3536
+ * Record an operator query against a mission — the first step of the
3537
+ * `ask_operator` tool (plan §4). Returns the persisted query; the
3538
+ * bridge then polls {@link getOperatorQuery} for an answer. Throws on
3539
+ * an unknown mission or an empty question.
3540
+ */
3541
+ addOperatorQuery(missionId: string, input: {
3542
+ question: string;
3543
+ callContext?: string;
3544
+ urgency?: string;
3545
+ }): {
3546
+ mission: PhoneCallMission;
3547
+ query: PhoneOperatorQuery;
3548
+ };
3549
+ /** List the operator queries recorded on a mission. */
3550
+ listOperatorQueries(missionId: string, agentId?: string): PhoneOperatorQuery[];
3551
+ /** Read one operator query, or null if the mission/query is unknown. */
3552
+ getOperatorQuery(missionId: string, queryId: string, agentId?: string): PhoneOperatorQuery | null;
3553
+ /**
3554
+ * Resolve a mission + query by the query id alone — used by the
3555
+ * inbound email-reply hook, which only has the id parsed out of the
3556
+ * reply subject. A LIKE prefilter (id escaped so its `_`/`-` are
3557
+ * literal) narrows the scan; the match is then verified exactly.
3558
+ */
3559
+ findMissionByOperatorQueryId(queryId: string): {
3560
+ mission: PhoneCallMission;
3561
+ query: PhoneOperatorQuery;
3562
+ } | null;
3563
+ /**
3564
+ * Record the operator's answer to a query. Idempotent — the first
3565
+ * answer wins; a later answer for the same query returns the existing
3566
+ * record unchanged with `alreadyAnswered: true`, so a duplicate
3567
+ * (e.g. an email reply AND an API POST) cannot fight. Returns null if
3568
+ * the mission/query is unknown; throws on an empty answer.
3569
+ */
3570
+ answerOperatorQuery(missionId: string, queryId: string, answer: string, options?: {
3571
+ via?: string;
3572
+ agentId?: string;
3573
+ }): {
3574
+ mission: PhoneCallMission;
3575
+ query: PhoneOperatorQuery;
3576
+ alreadyAnswered: boolean;
3577
+ } | null;
3578
+ /**
3579
+ * Flag a mission for callback-on-disconnect: the call dropped while
3580
+ * an operator query was still unanswered, so once the operator
3581
+ * answers the API should dial the caller back. Returns the mission
3582
+ * unchanged (not flagged) if every query is already answered; null if
3583
+ * the mission is unknown.
3584
+ */
3585
+ flagCallbackPending(missionId: string): PhoneCallMission | null;
3586
+ /** Missions currently flagged for callback-on-disconnect. */
3587
+ findCallbackPendingMissions(agentId?: string): PhoneCallMission[];
3588
+ /**
3589
+ * Trigger a callback (plan §7) when a callback-pending mission now has
3590
+ * an answered query: re-dial the same number with a continuation task
3591
+ * carrying the operator's answer. Returns the (updated) original
3592
+ * mission + the new callback mission, or null if no callback is due.
3593
+ *
3594
+ * `callbackPending` is cleared BEFORE dialing so a concurrent second
3595
+ * answer cannot double-dial; if the dial throws it is restored so the
3596
+ * callback is not silently lost, and the error is rethrown.
3597
+ */
3598
+ triggerCallback(missionId: string, options?: StartPhoneCallOptions): Promise<{
3599
+ mission: PhoneCallMission;
3600
+ callbackMission: PhoneCallMission;
3601
+ } | null>;
1964
3602
  private build46ElksCallRequest;
3603
+ /**
3604
+ * Build the Twilio outbound-call request — the mirror of
3605
+ * {@link build46ElksCallRequest} for Twilio's Calls.json endpoint:
3606
+ *
3607
+ * POST https://api.twilio.com/2010-04-01/Accounts/{AccountSid}/Calls.json
3608
+ *
3609
+ * with an `application/x-www-form-urlencoded` body. `From`/`To` are
3610
+ * the numbers; `Url` is a TwiML webhook Twilio fetches when the call
3611
+ * connects — it points at our voice-start webhook, which returns the
3612
+ * `<Connect><Stream>` TwiML that wires the call's audio to the
3613
+ * realtime voice WebSocket. `StatusCallback` is Twilio's hangup-
3614
+ * equivalent — fired with the final call status (the analogue of the
3615
+ * 46elks `whenhangup`). `TimeLimit` caps the call duration, re-clamped
3616
+ * to the server ceiling (#43-H6) exactly as the 46elks `timeout` is.
3617
+ *
3618
+ * Both webhook URLs carry the per-mission HMAC token (#43-H7), never
3619
+ * the raw `webhookSecret`. The Twilio `AccountSid` is `config.username`
3620
+ * and the `AuthToken` is `config.password` (HTTP Basic on the request,
3621
+ * and the key Twilio signs `X-Twilio-Signature` with).
3622
+ *
3623
+ * > The Calls.json endpoint path, the `From`/`To`/`Url`/
3624
+ * > `StatusCallback`/`TimeLimit` body fields, and the `<Connect>
3625
+ * > <Stream>` TwiML are per Twilio's public Programmable Voice docs;
3626
+ * > verify against current docs before the live smoke-test.
3627
+ */
3628
+ private buildTwilioCallRequest;
1965
3629
  private insertMission;
1966
3630
  private updateProviderCall;
1967
3631
  private updateMissionStatus;
@@ -1971,6 +3635,10 @@ declare function buildPhoneTransportConfig(input: {
1971
3635
  phoneNumber?: unknown;
1972
3636
  username?: unknown;
1973
3637
  password?: unknown;
3638
+ /** Twilio alias for {@link username} — the account SID. */
3639
+ accountSid?: unknown;
3640
+ /** Twilio alias for {@link password} — the account auth token. */
3641
+ authToken?: unknown;
1974
3642
  webhookBaseUrl?: unknown;
1975
3643
  webhookSecret?: unknown;
1976
3644
  apiUrl?: unknown;
@@ -2772,6 +4440,333 @@ declare class SetupManager {
2772
4440
  isInitialized(): boolean;
2773
4441
  }
2774
4442
 
4443
+ /** External binary a media operation depends on. */
4444
+ type MediaBinary = 'ffmpeg' | 'ffprobe' | 'imagemagick' | 'whisper' | 'python' | 'edge-tts';
4445
+ /**
4446
+ * Detection result for one external binary. `available: false` carries
4447
+ * an actionable `installHint` the agent can relay to the operator.
4448
+ */
4449
+ interface MediaCapability {
4450
+ /** Binary identifier. */
4451
+ binary: MediaBinary;
4452
+ /** Whether the binary was found and is runnable. */
4453
+ available: boolean;
4454
+ /** Version string when detectable. */
4455
+ version?: string;
4456
+ /** Resolved path / command used to invoke it. */
4457
+ command?: string;
4458
+ /** Human-readable description of what it powers. */
4459
+ description: string;
4460
+ /** Install instructions, present when `available` is false. */
4461
+ installHint?: string;
4462
+ }
4463
+ /** Aggregate media capability report — one entry per binary. */
4464
+ interface MediaCapabilityReport {
4465
+ /** Per-binary detection results. */
4466
+ capabilities: MediaCapability[];
4467
+ /** True when at least ffmpeg + ffprobe are present (the baseline). */
4468
+ ready: boolean;
4469
+ /** When the report was generated. */
4470
+ checkedAt: string;
4471
+ }
4472
+ /** Result envelope for an operation that produced an output file. */
4473
+ interface MediaFileResult {
4474
+ ok: true;
4475
+ /** Absolute path of the produced file. */
4476
+ filePath: string;
4477
+ /** Size of the produced file in bytes. */
4478
+ sizeBytes: number;
4479
+ /** Output container/format, when meaningful. */
4480
+ format?: string;
4481
+ /** Operation-specific extra fields. */
4482
+ [key: string]: unknown;
4483
+ }
4484
+ interface TtsGenerateOptions {
4485
+ /** Text to synthesise. */
4486
+ text: string;
4487
+ /** Voice preset name or a full Edge voice id. */
4488
+ voice?: string;
4489
+ /** Speaking rate, e.g. "+20%" / "-10%". */
4490
+ rate?: string;
4491
+ /** Pitch shift, e.g. "+5Hz" / "-10Hz". */
4492
+ pitch?: string;
4493
+ }
4494
+ type ImageAction = 'resize' | 'crop' | 'rotate' | 'convert' | 'compress' | 'text_overlay' | 'flip' | 'blur' | 'sharpen' | 'grayscale';
4495
+ interface ImageEditOptions {
4496
+ /** Absolute path to the input image. */
4497
+ input: string;
4498
+ /** Edit action to perform. */
4499
+ action: ImageAction;
4500
+ width?: number;
4501
+ height?: number;
4502
+ angle?: number;
4503
+ format?: string;
4504
+ quality?: number;
4505
+ text?: string;
4506
+ position?: string;
4507
+ fontSize?: number;
4508
+ fontColor?: string;
4509
+ blurRadius?: number;
4510
+ direction?: 'horizontal' | 'vertical';
4511
+ offsetX?: number;
4512
+ offsetY?: number;
4513
+ }
4514
+ type VideoAction = 'trim' | 'extract_frame' | 'extract_frames' | 'convert' | 'gif' | 'compress' | 'resize' | 'add_audio' | 'remove_audio' | 'speed' | 'color_grade' | 'transition' | 'text_overlay' | 'picture_in_picture' | 'split_screen' | 'ken_burns' | 'slow_motion' | 'watermark' | 'concatenate' | 'audio_mix' | 'auto_caption';
4515
+ interface VideoEditOptions {
4516
+ /** Absolute path to the input video (or image for ken_burns). */
4517
+ input: string;
4518
+ /** Edit action to perform. */
4519
+ action: VideoAction;
4520
+ start?: string;
4521
+ end?: string;
4522
+ duration?: string;
4523
+ timestamp?: string;
4524
+ interval?: number;
4525
+ format?: string;
4526
+ width?: number;
4527
+ height?: number;
4528
+ fps?: number;
4529
+ crf?: number;
4530
+ audioPath?: string;
4531
+ speedFactor?: number;
4532
+ secondInput?: string;
4533
+ transitionType?: string;
4534
+ transitionDuration?: number;
4535
+ text?: string;
4536
+ fontSize?: number;
4537
+ fontColor?: string;
4538
+ textPosition?: string;
4539
+ textBg?: string;
4540
+ textStart?: string;
4541
+ textEnd?: string;
4542
+ overlayOpacity?: number;
4543
+ overlayScale?: number;
4544
+ watermarkPosition?: string;
4545
+ watermarkPath?: string;
4546
+ pipWidth?: number;
4547
+ pipPosition?: string;
4548
+ splitDirection?: 'horizontal' | 'vertical';
4549
+ zoomDirection?: string;
4550
+ zoomDuration?: number;
4551
+ zoomFactor?: number;
4552
+ files?: string[];
4553
+ bgVolume?: string;
4554
+ fgVolume?: string;
4555
+ colorPreset?: string;
4556
+ lutPath?: string;
4557
+ captionColor?: string;
4558
+ captionFontSize?: number;
4559
+ /** Path to a whisper.cpp model file (.bin) — required for auto_caption. */
4560
+ whisperModel?: string;
4561
+ }
4562
+ type AudioAction = 'trim' | 'convert' | 'merge' | 'volume' | 'speed' | 'extract' | 'reverse' | 'fade';
4563
+ interface AudioEditOptions {
4564
+ /** Absolute path to the input audio (or video for `extract`). */
4565
+ input?: string;
4566
+ /** Edit action to perform. */
4567
+ action: AudioAction;
4568
+ start?: string;
4569
+ end?: string;
4570
+ duration?: string;
4571
+ format?: string;
4572
+ files?: string[];
4573
+ volume?: string;
4574
+ speedFactor?: number;
4575
+ fadeType?: 'in' | 'out' | 'both';
4576
+ fadeDuration?: number;
4577
+ }
4578
+ interface MediaStreamInfo {
4579
+ type?: string;
4580
+ codec?: string;
4581
+ width?: number;
4582
+ height?: number;
4583
+ duration?: string;
4584
+ bitRate?: string;
4585
+ sampleRate?: string;
4586
+ channels?: number;
4587
+ fps?: string;
4588
+ }
4589
+ interface MediaInfoResult {
4590
+ ok: true;
4591
+ file: string;
4592
+ format?: string;
4593
+ duration?: string;
4594
+ sizeBytes: number;
4595
+ bitRate?: string;
4596
+ streams: MediaStreamInfo[];
4597
+ }
4598
+ interface VideoUnderstandOptions {
4599
+ /** Absolute path to the input video. */
4600
+ input: string;
4601
+ /** Seconds between extracted frames (default 3). */
4602
+ frameInterval?: number;
4603
+ /** Maximum number of frames to extract (default 30). */
4604
+ maxFrames?: number;
4605
+ /** Path to a whisper.cpp model file (.bin) — enables transcription. */
4606
+ whisperModel?: string;
4607
+ }
4608
+ interface VideoTimelineEntry {
4609
+ timeSeconds: number;
4610
+ timeDisplay: string;
4611
+ framePath: string;
4612
+ spokenText: string;
4613
+ }
4614
+ interface VideoUnderstandResult {
4615
+ ok: true;
4616
+ video: string;
4617
+ duration: number;
4618
+ resolution: string;
4619
+ totalFramesExtracted: number;
4620
+ transcriptSegments: number;
4621
+ timeline: VideoTimelineEntry[];
4622
+ frameDir: string;
4623
+ hint: string;
4624
+ }
4625
+ interface VoiceCloneOptions {
4626
+ /** Text to speak in the cloned voice. */
4627
+ text: string;
4628
+ /** Absolute path to the reference audio sample (required). */
4629
+ refAudio: string;
4630
+ /** Transcript of the reference audio (required). */
4631
+ refText: string;
4632
+ /** Optional path to the Python interpreter with F5-TTS installed. */
4633
+ pythonBin?: string;
4634
+ /** Compute device passed to F5-TTS (default 'cpu'). */
4635
+ device?: string;
4636
+ }
4637
+
4638
+ interface MediaManagerOptions {
4639
+ /**
4640
+ * Directory media output files are written to. Created on first use.
4641
+ * Defaults to `<dataDir>/media` when a dataDir is given, otherwise to
4642
+ * `<os-tmp>/agenticmail-media`.
4643
+ */
4644
+ outputDir?: string;
4645
+ /** AgenticMail data directory — used to derive a default outputDir. */
4646
+ dataDir?: string;
4647
+ }
4648
+ declare class MediaManager {
4649
+ private readonly outputDir;
4650
+ constructor(options?: MediaManagerOptions);
4651
+ /** Ensure the output directory exists; returns it. */
4652
+ private ensureOutputDir;
4653
+ /** Build an output path inside the managed output dir. */
4654
+ private outPath;
4655
+ /** Build a sub-directory inside the managed output dir. */
4656
+ private outDir;
4657
+ /** Stat a produced file into a {@link MediaFileResult} envelope. */
4658
+ private fileResult;
4659
+ /** Run ffmpeg with an argument array. */
4660
+ private ffmpeg;
4661
+ /** Run ImageMagick with an argument array (handles magick/convert). */
4662
+ private magick;
4663
+ /** Run an `identify`-style probe via the ImageMagick binary. */
4664
+ private magickIdentify;
4665
+ /** Probe a media file with ffprobe, returning parsed JSON. */
4666
+ private ffprobe;
4667
+ /** Return the media binary capability report (graceful-degradation surface). */
4668
+ capabilities(opts?: {
4669
+ force?: boolean;
4670
+ }): MediaCapabilityReport;
4671
+ /** List the built-in Edge TTS voice presets. */
4672
+ listVoices(): {
4673
+ presets: Array<{
4674
+ name: string;
4675
+ full: string;
4676
+ }>;
4677
+ default: string;
4678
+ };
4679
+ /**
4680
+ * Synthesise speech with Edge TTS. node-edge-tts is an optional peer
4681
+ * dependency — when it is absent this throws a clear, actionable
4682
+ * error instead of crashing. The MP3 is transcoded to OGG/Opus when
4683
+ * ffmpeg is available (so it can be sent as a voice note); otherwise
4684
+ * the raw MP3 is returned.
4685
+ */
4686
+ ttsGenerate(opts: TtsGenerateOptions): Promise<MediaFileResult>;
4687
+ /** Edit an image with ImageMagick. */
4688
+ imageEdit(opts: ImageEditOptions): Promise<MediaFileResult>;
4689
+ /** Edit audio with ffmpeg. */
4690
+ audioEdit(opts: AudioEditOptions): Promise<MediaFileResult>;
4691
+ /** Probe a media file's metadata with ffprobe. */
4692
+ mediaInfo(input: string): Promise<MediaInfoResult>;
4693
+ /** Edit a video with ffmpeg (+ ImageMagick for caption rendering). */
4694
+ videoEdit(opts: VideoEditOptions): Promise<MediaFileResult>;
4695
+ private videoColorGrade;
4696
+ private videoTransition;
4697
+ private videoTextOverlay;
4698
+ private videoPictureInPicture;
4699
+ private videoSplitScreen;
4700
+ private videoKenBurns;
4701
+ private videoSlowMotion;
4702
+ private videoWatermark;
4703
+ private videoConcatenate;
4704
+ private videoAudioMix;
4705
+ /**
4706
+ * Burn dynamic word-chunked captions onto a video. Needs ffmpeg,
4707
+ * ImageMagick, and whisper.cpp (with a model file). Mirrors the
4708
+ * source MCP's CapCut-style caption renderer.
4709
+ */
4710
+ private videoAutoCaption;
4711
+ /**
4712
+ * Analyse a video — extract frames at intervals and (when a whisper
4713
+ * model is given) transcribe the audio — and return a merged
4714
+ * timeline of what is shown and said.
4715
+ */
4716
+ videoUnderstand(opts: VideoUnderstandOptions): Promise<VideoUnderstandResult>;
4717
+ /**
4718
+ * Synthesise speech in a reference voice with F5-TTS. Needs a Python
4719
+ * interpreter that has the `f5-tts` and `soundfile` packages. The
4720
+ * reference audio + transcript MUST be supplied by the caller — no
4721
+ * built-in voice profile. The Python is run via execFile with an
4722
+ * argument array; the script and its inputs are written to a temp
4723
+ * file and passed by path, so no caller value is interpolated into a
4724
+ * command line.
4725
+ */
4726
+ voiceClone(opts: VoiceCloneOptions): Promise<MediaFileResult>;
4727
+ /** Parse a whisper-produced SRT (located by stem) into timed segments. */
4728
+ private parseSrt;
4729
+ /** Unlink a file, swallowing any error (cleanup best-effort). */
4730
+ private tryUnlink;
4731
+ /** Remove the SRT(s) produced for a given stem. */
4732
+ private tryUnlinkSrt;
4733
+ /** Recursively remove a directory, swallowing errors. */
4734
+ private tryRmDir;
4735
+ }
4736
+
4737
+ /**
4738
+ * Detect whether a single binary is available. Result is cached; pass
4739
+ * `{ force: true }` to re-probe (e.g. after the operator installs it).
4740
+ */
4741
+ declare function detectBinary(binary: MediaBinary, opts?: {
4742
+ force?: boolean;
4743
+ }): MediaCapability;
4744
+ /**
4745
+ * Resolve the command name to invoke for a binary. Throws a clear,
4746
+ * actionable error if the binary is not available — callers use this
4747
+ * at the top of every media operation so a missing dependency fails
4748
+ * fast with an install hint instead of an opaque ENOENT deep inside
4749
+ * an execFile call.
4750
+ */
4751
+ declare function requireBinary(binary: MediaBinary): string;
4752
+ /**
4753
+ * Validate that a whisper.cpp model file exists on disk. whisper.cpp
4754
+ * needs an explicit model path; this surfaces a clear error rather
4755
+ * than letting the CLI fail cryptically.
4756
+ */
4757
+ declare function requireWhisperModel(modelPath: string | undefined): string;
4758
+ /**
4759
+ * Build the full media capability report — one entry per binary, plus
4760
+ * a `ready` flag (true once ffmpeg + ffprobe, the baseline, are both
4761
+ * present). Used by the health surface and the `media_capabilities`
4762
+ * tool so an agent can see what is available before attempting an op.
4763
+ */
4764
+ declare function getMediaCapabilities(opts?: {
4765
+ force?: boolean;
4766
+ }): MediaCapabilityReport;
4767
+ /** Clear the detection cache — used by tests and after a re-install. */
4768
+ declare function clearMediaCapabilityCache(): void;
4769
+
2775
4770
  /**
2776
4771
  * Stable thread-id derivation.
2777
4772
  *
@@ -3231,4 +5226,4 @@ declare class MemorySearchIndex {
3231
5226
  has(id: string): boolean;
3232
5227
  }
3233
5228
 
3234
- export { AGENT_ROLES, AccountManager, type AddressInfo, type Agent, AgentDeletionService, type AgentMemoryEntry, type AgentMemoryFields, AgentMemoryManager, type AgentMemoryOptions, type AgentMemoryRead, AgentMemoryStore, type AgentRole, AgenticMailClient, type AgenticMailClientOptions, type AgenticMailConfig, type ArchiveAndDeleteOptions, type ArchivedEmail, type Attachment, type AttachmentAdvisory, BRIDGE_OPERATOR_LIVE_WINDOW_MS, type BridgeMailContext, type BridgeWakeError, type BridgeWakePromptArgs, type BridgeWakeResult, type BridgeWakeRoute, type CachedMessage, CloudflareClient, type CreateAgentOptions, type CreateMemoryInput, DEFAULT_AGENT_NAME, DEFAULT_AGENT_ROLE, DEFAULT_SESSION_MAX_AGE_MS, DNSConfigurator, type Database, type DeletionReport, type DeletionSummary, DependencyChecker, DependencyInstaller, type DependencyStatus, type DnsRecord, type DnsSetupResult, type DomainInfo, DomainManager, type DomainModeConfig, type DomainPurchaseResult, DomainPurchaser, type DomainSearchResult, type DomainSetupResult, ELKS_REALTIME_AUDIO_FORMATS, type ElksRealtimeAudioFormat, type ElksRealtimeAudioMessage, type ElksRealtimeByeMessage, type ElksRealtimeHelloMessage, type ElksRealtimeInboundMessage, type ElksRealtimeOutboundMessage, type EmailEnvelope, type EmailRouteAction, type EmailRouteClass, type EmailRouteClassification, type EmailRouteInput, EmailSearchIndex, type FolderInfo, type GatewayConfig, GatewayManager, type GatewayManagerOptions, type GatewayMode, type GatewayStatus, type HostName, type HostSession, type HostSessionResumeMode, type InboundEmail, type InboundSmsEvent, type InboxEvent, type InboxExpungeEvent, type InboxFlagsEvent, type InboxNewEvent, InboxWatcher, type InboxWatcherOptions, type InstallProgress, type LinkAdvisory, type LocalSmtpConfig, MEMORY_CATEGORIES, MailReceiver, type MailReceiverOptions, MailSender, type MailSenderOptions, type MailboxInfo, type MemoryCategory, type MemoryImportance, type MemoryQueryOptions, MemorySearchIndex, type MemorySource, type MemoryStats, type OpenClawPhoneMissionPolicy, type OutboundCategory, type OutboundScanInput, type OutboundScanResult, type OutboundWarning, PHONE_MAX_CONCURRENT_MISSIONS, PHONE_MIN_WEBHOOK_SECRET_LENGTH, PHONE_MISSION_STATES, PHONE_RATE_LIMIT_PER_HOUR, PHONE_RATE_LIMIT_PER_MINUTE, PHONE_REGION_SCOPES, PHONE_SERVER_MAX_ATTEMPTS, PHONE_SERVER_MAX_CALL_DURATION_SECONDS, PHONE_SERVER_MAX_COST_PER_MISSION, PHONE_TASK_MAX_LENGTH, type ParsedAttachment, type ParsedEmail, type ParsedSms, PathTraversalError, type PhoneAlternativePolicy, type PhoneCallMission, type PhoneConfirmPolicy, PhoneManager, type PhoneMissionStartValidationResult, type PhoneMissionState, type PhoneMissionTranscriptEntry, type PhoneMissionValidationIssue, type PhoneMissionValidationResult, type PhoneNumberRisk, PhoneRateLimitError, type PhoneRegionScope, type PhoneTransportConfig, type PhoneTransportProfile, type PhoneTransportProvider, type PhoneTransportValidationResult, PhoneWebhookAuthError, type PhoneWebhookResult, type PlanBridgeWakeArgs, type PurchasedDomain, REDACTED, RELAY_PRESETS, RelayBridge, type RelayBridgeOptions, type RelayConfig, RelayGateway, type RelayProvider, type RelaySearchResult, type ResumeErrorClassificationOptions, SPAM_THRESHOLD, type SafeJoinOptions, type SanitizeDetection, type SanitizeResult, type SearchCriteria, type SearchableEmail, type SecurityAdvisory, type SendMailOptions, type SendResult, type SendResultWithRaw, type SendSmsInput, type SendSmsResult, ServiceManager, type ServiceStatus, type SetupConfig, SetupManager, type SetupResult, type Severity, type SmsConfig, SmsManager, type SmsMessage, SmsPoller, type SmsProvider, type SpamCategory, type SpamResult, type SpamRuleMatch, StalwartAdmin, type StalwartAdminOptions, type StalwartPrincipal, type StartPhoneCallOptions, type StartPhoneCallResult, type StartPhoneMissionInput, TELEPHONY_TRANSPORT_CAPABILITIES, type TelephonyTransportCapability, ThreadCache, type ThreadCacheEntry, type ThreadCacheOptions, type ThreadIdInput, type TunnelConfig, TunnelManager, UnsafeApiUrlError, type UpdateMemoryInput, type ValidatedPhoneMissionStart, WARNING_THRESHOLD, type WatcherOptions, assertWithinBase, bridgeWakeErrorMessage, bridgeWakeLastSeenAgeMs, buildApiUrl, buildElksAudioMessage, buildElksByeMessage, buildElksHandshakeMessages, buildElksInterruptMessage, buildElksListeningMessage, buildElksSendingMessage, buildInboundSecurityAdvisory, buildPhoneTransportConfig, classifyEmailRoute, classifyPhoneNumberRisk, classifyResumeError, closeDatabase, composeBridgeWakePrompt, createTestDatabase, debug, debugWarn, ensureDataDir, extractVerificationCode, flushTelemetry, forgetHostSession, getDatabase, getOperatorEmail, getSmsProvider, hostSessionStoragePath, inferPhoneRegion, isInternalEmail, isLoopbackMailHost, isPhoneRegionAllowed, isSessionFresh, isValidPhoneNumber, loadHostSession, mapProviderSmsStatus, normalizeAddress, normalizePhoneNumber, normalizeSubject, operatorPrefsStoragePath, parseElksRealtimeMessage, parseEmail, parseGoogleVoiceSms, planBridgeWake, recordToolCall, redactObject, redactPhoneTransportConfig, redactSecret, redactSmsConfig, resolveConfig, resolveTlsRejectUnauthorized, safeJoin, sanitizeEmail, saveConfig, saveHostSession, scanOutboundEmail, scoreEmail, setOperatorEmail, setTelemetryVersion, shouldSkipBridgeWakeForLiveOperator, startRelayBridge, stem, threadIdFor, tokenize, tryJoin, validateApiUrl, validatePhoneMissionPolicy, validatePhoneMissionStart, validatePhoneTransportProfile };
5229
+ export { AGENT_ROLES, ASK_OPERATOR_TOOL, AccountManager, type AddressInfo, type Agent, AgentDeletionService, type AgentMemoryEntry, type AgentMemoryFields, AgentMemoryManager, type AgentMemoryOptions, type AgentMemoryRead, AgentMemoryStore, type AgentRole, AgenticMailClient, type AgenticMailClientOptions, type AgenticMailConfig, type ArchiveAndDeleteOptions, type ArchivedEmail, type Attachment, type AttachmentAdvisory, type AudioAction, type AudioEditOptions, BRIDGE_OPERATOR_LIVE_WINDOW_MS, type BridgeMailContext, type BridgeWakeError, type BridgeWakePromptArgs, type BridgeWakeResult, type BridgeWakeRoute, type CachedMessage, CloudflareClient, type CreateAgentOptions, type CreateMemoryInput, DEFAULT_AGENT_NAME, DEFAULT_AGENT_ROLE, DEFAULT_REALTIME_AUDIO_FORMAT, DEFAULT_REALTIME_MODEL, DEFAULT_REALTIME_VOICE, DEFAULT_SESSION_MAX_AGE_MS, DEFAULT_WEB_SEARCH_ENDPOINT, DNSConfigurator, type Database, type DeletionReport, type DeletionSummary, DependencyChecker, DependencyInstaller, type DependencyStatus, type DnsRecord, type DnsSetupResult, type DomainInfo, DomainManager, type DomainModeConfig, type DomainPurchaseResult, DomainPurchaser, type DomainSearchResult, type DomainSetupResult, ELKS_REALTIME_AUDIO_FORMATS, ELKS_REALTIME_WS_PATH, type ElksRealtimeAudioFormat, type ElksRealtimeAudioMessage, type ElksRealtimeByeMessage, type ElksRealtimeHelloMessage, type ElksRealtimeInboundMessage, type ElksRealtimeOutboundMessage, ElksRealtimeTransport, type EmailEnvelope, type EmailRouteAction, type EmailRouteClass, type EmailRouteClassification, type EmailRouteInput, EmailSearchIndex, type FolderInfo, GET_DATETIME_TOOL, type GatewayConfig, GatewayManager, type GatewayManagerOptions, type GatewayMode, type GatewayStatus, type GetDatetimeOptions, type GetUpdatesOptions, type HostName, type HostSession, type HostSessionResumeMode, type ImageAction, type ImageEditOptions, type InboundEmail, type InboundSmsEvent, type InboxEvent, type InboxExpungeEvent, type InboxFlagsEvent, type InboxNewEvent, InboxWatcher, type InboxWatcherOptions, type InstallProgress, type LinkAdvisory, type LocalSmtpConfig, MEMORY_CATEGORIES, MailReceiver, type MailReceiverOptions, MailSender, type MailSenderOptions, type MailboxInfo, type MediaBinary, type MediaCapability, type MediaCapabilityReport, type MediaFileResult, type MediaInfoResult, MediaManager, type MediaManagerOptions, type MediaStreamInfo, type MemoryCategory, type MemoryImportance, type MemoryQueryOptions, type MemoryRecaller, MemorySearchIndex, type MemorySource, type MemoryStats, OPENAI_REALTIME_URL, OPERATOR_QUERY_POLL_INTERVAL_MS, OPERATOR_QUERY_SUBJECT_TAG, OPERATOR_QUERY_TIMEOUT_MS, OPERATOR_QUERY_TIMEOUT_SENTINEL, type OpenClawPhoneMissionPolicy, type OperatorQueryNotificationInput, type OperatorQueryPollOptions, type OperatorQueryUrgency, type OperatorReplyKind, type OutboundCategory, type OutboundScanInput, type OutboundScanResult, type OutboundWarning, PHONE_CALL_CONTROL_PROVIDERS, PHONE_MAX_CONCURRENT_MISSIONS, PHONE_MIN_WEBHOOK_SECRET_LENGTH, PHONE_MISSION_STATES, PHONE_RATE_LIMIT_PER_HOUR, PHONE_RATE_LIMIT_PER_MINUTE, PHONE_REGION_SCOPES, PHONE_SERVER_MAX_ATTEMPTS, PHONE_SERVER_MAX_CALL_DURATION_SECONDS, PHONE_SERVER_MAX_COST_PER_MISSION, PHONE_TASK_MAX_LENGTH, type ParsedAttachment, type ParsedEmail, type ParsedOperatorReply, type ParsedSms, type ParsedTelegramMessage, PathTraversalError, type PhoneAlternativePolicy, type PhoneCallMission, type PhoneConfirmPolicy, PhoneManager, type PhoneMissionStartValidationResult, type PhoneMissionState, type PhoneMissionTranscriptEntry, type PhoneMissionValidationIssue, type PhoneMissionValidationResult, type PhoneNumberRisk, type PhoneOperatorQuery, PhoneRateLimitError, type PhoneRegionScope, type PhoneTransportConfig, type PhoneTransportProfile, type PhoneTransportProvider, type PhoneTransportValidationResult, PhoneWebhookAuthError, type PhoneWebhookResult, type PlanBridgeWakeArgs, type PurchasedDomain, REALTIME_AUDIO_SAMPLE_RATE, REALTIME_MAX_AUDIO_FRAME_BASE64, REALTIME_TOOL_CALL_TIMEOUT_MS, REALTIME_TOOL_DEFINITIONS, RECALL_MEMORY_TOOL, REDACTED, RELAY_PRESETS, type RealtimeBridgePort, type RealtimeBridgeTranscriptEntry, type RealtimeInboundEvent, type RealtimeInstructionOptions, type RealtimeSessionConfigOptions, type RealtimeToolCall, type RealtimeToolDefinition, type RealtimeToolHandler, type RealtimeToolResult, type RealtimeTransportAdapter, type RealtimeTransportProvider, RealtimeVoiceBridge, type RealtimeVoiceBridgeOptions, RelayBridge, type RelayBridgeOptions, type RelayConfig, RelayGateway, type RelayProvider, type RelaySearchResult, type ResumeErrorClassificationOptions, SEARCH_EMAIL_TOOL, SPAM_THRESHOLD, type SafeJoinOptions, type SanitizeDetection, type SanitizeResult, type SearchCriteria, type SearchableEmail, type SecurityAdvisory, type SendMailOptions, type SendResult, type SendResultWithRaw, type SendSmsInput, type SendSmsResult, type SendTelegramMessageOptions, type SendTelegramMessageResult, ServiceManager, type ServiceStatus, type SetWebhookOptions, type SetupConfig, SetupManager, type SetupResult, type Severity, type SmsConfig, SmsManager, type SmsMessage, SmsPoller, type SmsProvider, type SpamCategory, type SpamResult, type SpamRuleMatch, StalwartAdmin, type StalwartAdminOptions, type StalwartPrincipal, type StartPhoneCallOptions, type StartPhoneCallResult, type StartPhoneMissionInput, TELEGRAM_API_BASE, TELEGRAM_CHUNK_SIZE, TELEGRAM_MESSAGE_LIMIT, TELEGRAM_MIN_WEBHOOK_SECRET_LENGTH, TELEGRAM_OPERATOR_QUERY_TAG, TELEGRAM_STOP_WORDS, TELEGRAM_WEBHOOK_SECRET_RE, TELEPHONY_TRANSPORT_CAPABILITIES, TWILIO_MEDIA_SAMPLE_RATE, TWILIO_REALTIME_WS_PATH, TelegramApiError, type TelegramApiOptions, type TelegramBotInfo, type TelegramChatType, type TelegramConfig, TelegramManager, type TelegramMessage, type TelegramMode, type TelephonyTransportCapability, ThreadCache, type ThreadCacheEntry, type ThreadCacheOptions, type ThreadIdInput, type ToolExecutor, type TtsGenerateOptions, type TunnelConfig, TunnelManager, type TwilioConnectedMessage, type TwilioMarkMessage, type TwilioMediaMessage, type TwilioRealtimeInboundMessage, type TwilioRealtimeOutboundMessage, TwilioRealtimeTransport, type TwilioStartMessage, type TwilioStopMessage, type TwilioStreamTwiMLOptions, UnsafeApiUrlError, type UpdateMemoryInput, type ValidatedPhoneMissionStart, type VideoAction, type VideoEditOptions, type VideoTimelineEntry, type VideoUnderstandOptions, type VideoUnderstandResult, type VoiceCloneOptions, WARNING_THRESHOLD, WEB_SEARCH_TOOL, WEB_SEARCH_UNTRUSTED_PREFIX, type WatcherOptions, type WebSearchOptions, assertWithinBase, bridgeWakeErrorMessage, bridgeWakeLastSeenAgeMs, buildApiUrl, buildElksAudioMessage, buildElksByeMessage, buildElksHandshakeMessages, buildElksInterruptMessage, buildElksListeningMessage, buildElksSendingMessage, buildInboundSecurityAdvisory, buildOpenAIRealtimeUrl, buildPhoneTransportConfig, buildRealtimeInstructions, buildRealtimeSessionConfig, buildRealtimeToolGuidance, buildTwilioClearMessage, buildTwilioMarkMessage, buildTwilioMediaMessage, buildTwilioSayTwiML, buildTwilioSignature, buildTwilioStreamTwiML, callTelegramApi, classifyEmailRoute, classifyPhoneNumberRisk, classifyResumeError, clearMediaCapabilityCache, closeDatabase, composeBridgeWakePrompt, createRealtimeTransport, createTestDatabase, createToolExecutor, debug, debugWarn, deleteTelegramWebhook, detectBinary, ensureDataDir, escapeXml, extractEmailAddress, extractVerificationCode, flushTelemetry, forgetHostSession, formatOperatorQueryTelegramMessage, getDatabase, getDatetime, getMediaCapabilities, getOperatorEmail, getSmsProvider, getTelegramChat, getTelegramMe, getTelegramUpdates, getTelegramWebhookInfo, hostSessionStoragePath, inferPhoneRegion, isInternalEmail, isLoopbackMailHost, isOperatorReplySender, isPhoneRegionAllowed, isSessionFresh, isTelegramChatAllowed, isTelegramStopCommand, isValidPhoneNumber, loadHostSession, mapProviderSmsStatus, nextTelegramOffset, normalizeAddress, normalizePhoneNumber, normalizeSubject, operatorPrefsStoragePath, operatorQuerySubject, parseElksRealtimeMessage, parseEmail, parseGoogleVoiceSms, parseOperatorQueryReply, parseTelegramOperatorReply, parseTelegramUpdate, parseTwilioRealtimeMessage, planBridgeWake, pollForOperatorAnswer, recallMemory, recordToolCall, redactBotToken, redactObject, redactPhoneTransportConfig, redactSecret, redactSmsConfig, redactTelegramConfig, requireBinary, requireWhisperModel, resolveConfig, resolveTlsRejectUnauthorized, safeJoin, sanitizeEmail, saveConfig, saveHostSession, scanOutboundEmail, scoreEmail, sendTelegramMessage, setOperatorEmail, setTelegramWebhook, setTelemetryVersion, shouldSkipBridgeWakeForLiveOperator, splitTelegramMessage, startRelayBridge, stem, stripTelegramMarkdown, threadIdFor, tokenize, tryJoin, validateApiUrl, validatePhoneMissionPolicy, validatePhoneMissionStart, validatePhoneTransportProfile, validateTwilioSignature, webSearch };