@agenticmail/core 0.9.15 → 0.9.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -220,6 +220,15 @@ interface AgenticMailConfig {
220
220
  * system event (visible in the web UI) but no email is sent.
221
221
  */
222
222
  operatorEmail?: string;
223
+ /**
224
+ * OpenAI API key — used only by the realtime voice bridge to open an
225
+ * OpenAI Realtime (`gpt-realtime`) session for live phone calls. When
226
+ * unset, phone missions still place and track calls (call-control),
227
+ * but a 46elks realtime-media WebSocket cannot be bridged to a
228
+ * conversational model. Read from the `OPENAI_API_KEY` env var or
229
+ * `config.json`. Optional — no other feature depends on it.
230
+ */
231
+ openaiApiKey?: string;
223
232
  masterKey: string;
224
233
  dataDir: string;
225
234
  }
@@ -1692,6 +1701,396 @@ declare class SmsPoller {
1692
1701
  private pollOnce;
1693
1702
  }
1694
1703
 
1704
+ /**
1705
+ * Telegram Bot API client — dependency-free (global `fetch`, Node 22+).
1706
+ *
1707
+ * Ported and MERGED from two already-tuned internal sources (licensing
1708
+ * confirmed by Ope, 2026-05-19 — plan §13.5):
1709
+ * - enterprise `agent-tools/tools/messaging/telegram.ts`
1710
+ * (`tgApi`, `stripMarkdown`, webhook management)
1711
+ * - agent-harness `fola-lib/telegram-api.mjs`
1712
+ * (auto-splitting `sendMessage`, the long-poll timeout discipline)
1713
+ *
1714
+ * Host-specific pieces are intentionally dropped to fit the single-tenant
1715
+ * open-source product: the local Bot API server auto-detection
1716
+ * (`http://localhost:8081`) and the filesystem media-download paths are
1717
+ * Fola-host infrastructure, not channel logic.
1718
+ *
1719
+ * Secrets: the bot token rides in the request URL path. Every error this
1720
+ * module surfaces is scrubbed with {@link redactBotToken} so a token can
1721
+ * never reach a log line or an API response — matching the SMS/phone
1722
+ * credential-handling bar.
1723
+ */
1724
+ /** Official Telegram Bot API base. */
1725
+ declare const TELEGRAM_API_BASE = "https://api.telegram.org";
1726
+ /** Telegram's hard per-message ceiling is 4096 characters. */
1727
+ declare const TELEGRAM_MESSAGE_LIMIT = 4096;
1728
+ /** We split below the hard limit, leaving headroom for safety. */
1729
+ declare const TELEGRAM_CHUNK_SIZE = 4000;
1730
+ /** A failed Telegram Bot API call. `description` is the provider message. */
1731
+ declare class TelegramApiError extends Error {
1732
+ readonly isTelegramApiError = true;
1733
+ readonly description: string;
1734
+ readonly errorCode?: number;
1735
+ constructor(method: string, description: string, errorCode?: number);
1736
+ }
1737
+ /**
1738
+ * Scrub bot tokens from any string before it can be logged or returned.
1739
+ *
1740
+ * A token looks like `<digits>:<35 url-safe chars>`. We redact both an
1741
+ * explicitly-known token (exact match) and anything matching the generic
1742
+ * token shape — so a token leaks neither when the caller knows it nor
1743
+ * when it is buried in, say, a `fetch` failure message carrying the URL.
1744
+ */
1745
+ declare function redactBotToken(text: string, token?: string): string;
1746
+ interface TelegramApiOptions {
1747
+ /**
1748
+ * Long-poll requests (`getUpdates` with a non-zero `timeout`) need an
1749
+ * HTTP timeout longer than the server-side poll window, or the socket
1750
+ * is torn down before Telegram replies.
1751
+ */
1752
+ longPoll?: boolean;
1753
+ }
1754
+ /**
1755
+ * POST a Telegram Bot API method with a JSON body and return `result`.
1756
+ * Throws {@link TelegramApiError} (token-scrubbed) on any failure.
1757
+ */
1758
+ declare function callTelegramApi<T = unknown>(token: string, method: string, body?: Record<string, unknown>, options?: TelegramApiOptions): Promise<T>;
1759
+ /**
1760
+ * Strip Markdown so agent replies arrive as clean plain text. Telegram's
1761
+ * Markdown parse modes are bypassed entirely (no `parse_mode`) to avoid
1762
+ * formatting collisions on arbitrary agent output.
1763
+ */
1764
+ declare function stripTelegramMarkdown(text: string): string;
1765
+ /**
1766
+ * Split text into <= `maxLen` chunks, cutting on a newline boundary when
1767
+ * one is reasonably close so messages do not break mid-word.
1768
+ */
1769
+ declare function splitTelegramMessage(text: string, maxLen?: number): string[];
1770
+ interface SendTelegramMessageOptions {
1771
+ /** Reply to a specific message id in the chat. */
1772
+ replyToMessageId?: number;
1773
+ /** Deliver silently (no push notification). */
1774
+ disableNotification?: boolean;
1775
+ }
1776
+ interface SendTelegramMessageResult {
1777
+ /** message_id of each chunk Telegram accepted, in order. */
1778
+ messageIds: number[];
1779
+ /** Number of chunks the text was split into. */
1780
+ chunks: number;
1781
+ }
1782
+ /**
1783
+ * Send a text message, auto-splitting anything over the per-message
1784
+ * ceiling. The reply target (if any) is attached only to the first
1785
+ * chunk so a long reply still threads correctly.
1786
+ */
1787
+ declare function sendTelegramMessage(token: string, chatId: string | number, text: string, options?: SendTelegramMessageOptions): Promise<SendTelegramMessageResult>;
1788
+ interface TelegramBotInfo {
1789
+ id: number;
1790
+ is_bot: boolean;
1791
+ username?: string;
1792
+ first_name?: string;
1793
+ }
1794
+ /** `getMe` — used to validate a token and capture the bot identity. */
1795
+ declare function getTelegramMe(token: string): Promise<TelegramBotInfo>;
1796
+ /** `getChat` — chat metadata (title, type, member count, ...). */
1797
+ declare function getTelegramChat(token: string, chatId: string | number): Promise<Record<string, unknown>>;
1798
+ interface GetUpdatesOptions {
1799
+ /** Max updates per call (1-100). */
1800
+ limit?: number;
1801
+ /** Long-poll window in seconds (0 = short poll). */
1802
+ timeoutSec?: number;
1803
+ }
1804
+ /** `getUpdates` long-poll — the poll-mode transport. */
1805
+ declare function getTelegramUpdates(token: string, offset: number, options?: GetUpdatesOptions): Promise<Array<Record<string, unknown>>>;
1806
+ interface SetWebhookOptions {
1807
+ /**
1808
+ * Shared secret echoed by Telegram in the
1809
+ * `X-Telegram-Bot-Api-Secret-Token` header on every webhook delivery.
1810
+ */
1811
+ secretToken?: string;
1812
+ /** Drop updates that queued up while no webhook was set. */
1813
+ dropPendingUpdates?: boolean;
1814
+ }
1815
+ /** `setWebhook` — register the inbound webhook URL (webhook-mode transport). */
1816
+ declare function setTelegramWebhook(token: string, url: string, options?: SetWebhookOptions): Promise<boolean>;
1817
+ /** `deleteWebhook` — switch back to poll mode. */
1818
+ declare function deleteTelegramWebhook(token: string): Promise<boolean>;
1819
+ /** `getWebhookInfo` — current webhook status. */
1820
+ declare function getTelegramWebhookInfo(token: string): Promise<Record<string, unknown>>;
1821
+
1822
+ /**
1823
+ * Telegram update parsing — pure, dependency-free.
1824
+ *
1825
+ * Ported from the inbound-message handling in the agent-harness Fola
1826
+ * bridge (`fola-telegram-bridge.mjs` — the `formatPrompt` header fields
1827
+ * and the `STOP_WORDS` abort set), stripped of every Fola-host specific
1828
+ * (session routing, the fola-claude prompt envelope, media downloads).
1829
+ */
1830
+ type TelegramChatType = 'private' | 'group' | 'supergroup' | 'channel' | 'unknown';
1831
+ /** A normalized inbound Telegram text message. */
1832
+ interface ParsedTelegramMessage {
1833
+ /** Telegram `update_id` — drives the poll offset. */
1834
+ updateId: number;
1835
+ /** `message_id` within the chat. */
1836
+ messageId: number;
1837
+ /** Chat id as a string (Telegram ids are 64-bit; strings avoid loss). */
1838
+ chatId: string;
1839
+ chatType: TelegramChatType;
1840
+ chatTitle?: string;
1841
+ /** Sender id as a string; falls back to the chat id for channel posts. */
1842
+ fromId: string;
1843
+ fromName: string;
1844
+ fromUsername?: string;
1845
+ /** Message text (or media caption). Always non-empty for a parsed result. */
1846
+ text: string;
1847
+ /** `message_id` of the message this one replies to, if any. */
1848
+ replyToMessageId?: number;
1849
+ /** Text of the replied-to message, when Telegram included it. */
1850
+ replyToText?: string;
1851
+ /** ISO-8601 timestamp derived from the Telegram `date` epoch. */
1852
+ date: string;
1853
+ }
1854
+ /**
1855
+ * Normalize a raw Telegram update into a {@link ParsedTelegramMessage},
1856
+ * or `null` when it carries no usable text (callback queries, service
1857
+ * messages, edits with no text, non-text media without a caption).
1858
+ * Handles both `message` and `channel_post` envelopes.
1859
+ */
1860
+ declare function parseTelegramUpdate(update: unknown): ParsedTelegramMessage | null;
1861
+ /**
1862
+ * Words that, sent alone (case-insensitive, optional trailing
1863
+ * punctuation), signal "abort whatever you are doing for me". Kept
1864
+ * deliberately narrow — anything longer or ambiguous is a normal
1865
+ * message. Ported verbatim from the Fola bridge.
1866
+ */
1867
+ declare const TELEGRAM_STOP_WORDS: ReadonlySet<string>;
1868
+ /** True when `text` is a bare stop command. */
1869
+ declare function isTelegramStopCommand(text: string): boolean;
1870
+ /**
1871
+ * Compute the next `getUpdates` offset from a batch: one past the
1872
+ * highest `update_id` seen. Advancing the offset is what acknowledges
1873
+ * updates to Telegram, so this must run on the raw batch even if
1874
+ * individual updates fail to parse.
1875
+ */
1876
+ declare function nextTelegramOffset(currentOffset: number, updates: Array<{
1877
+ update_id?: unknown;
1878
+ }>): number;
1879
+
1880
+ /**
1881
+ * Telegram Manager — per-agent Telegram bot configuration + message log.
1882
+ *
1883
+ * Models the existing {@link import('../sms/manager.js').SmsManager} — the
1884
+ * closest existing channel — so the hardening bar matches:
1885
+ * - Config lives in agent metadata under the `telegram` key.
1886
+ * - Credential fields (`botToken`, `webhookSecret`) are encrypted at
1887
+ * rest with the same AES-256-GCM scheme (`encryptSecret`) and
1888
+ * redacted on every read that leaves the process.
1889
+ * - Inbound/outbound messages are stored in a `telegram_messages` table.
1890
+ *
1891
+ * Single-tenant: there is no org / multi-tenant scoping and nothing is
1892
+ * hardcoded — every bot token, chat id and operator identity is
1893
+ * per-agent config supplied by the user.
1894
+ */
1895
+
1896
+ /** Transport mode: long-poll `getUpdates`, or a registered webhook. */
1897
+ type TelegramMode = 'poll' | 'webhook';
1898
+ interface TelegramConfig {
1899
+ /** Whether the Telegram channel is active for this agent. */
1900
+ enabled: boolean;
1901
+ /** Bot API token from @BotFather. SECRET — encrypted at rest, redacted on read. */
1902
+ botToken: string;
1903
+ /** Bot @username (non-secret, for display). Captured from `getMe` at setup. */
1904
+ botUsername?: string;
1905
+ /** Numeric bot id (non-secret). Captured from `getMe` at setup. */
1906
+ botId?: number;
1907
+ /**
1908
+ * Chat ids permitted to message the agent. EMPTY = nobody (fail-closed,
1909
+ * same posture as the Fola bridge). The operator chat is always allowed.
1910
+ */
1911
+ allowedChatIds: string[];
1912
+ /** Chat that receives `ask_operator` notifications + can approve (plan §13.4). */
1913
+ operatorChatId?: string;
1914
+ /** Inbound transport. */
1915
+ mode: TelegramMode;
1916
+ /** Public webhook URL (webhook mode only). */
1917
+ webhookUrl?: string;
1918
+ /**
1919
+ * Shared secret echoed by Telegram in the
1920
+ * `X-Telegram-Bot-Api-Secret-Token` header. SECRET — encrypted at
1921
+ * rest, redacted on read. Doubles as the webhook routing key.
1922
+ */
1923
+ webhookSecret?: string;
1924
+ /** Persisted `getUpdates` offset (poll mode). */
1925
+ pollOffset?: number;
1926
+ /** When the channel was configured. */
1927
+ configuredAt: string;
1928
+ }
1929
+ interface TelegramMessage {
1930
+ id: string;
1931
+ agentId: string;
1932
+ direction: 'inbound' | 'outbound';
1933
+ chatId: string;
1934
+ /** Telegram `message_id` (may be absent for a failed outbound attempt). */
1935
+ telegramMessageId?: number;
1936
+ /** Sender id (inbound only). */
1937
+ fromId?: string;
1938
+ text: string;
1939
+ status: 'received' | 'sent' | 'failed' | 'pending';
1940
+ createdAt: string;
1941
+ metadata?: Record<string, unknown>;
1942
+ }
1943
+ /** Telegram's `secret_token` charset, and our minimum entropy floor. */
1944
+ declare const TELEGRAM_WEBHOOK_SECRET_RE: RegExp;
1945
+ declare const TELEGRAM_MIN_WEBHOOK_SECRET_LENGTH = 16;
1946
+ /**
1947
+ * Redact the credential fields of a Telegram config for any value that
1948
+ * leaves the process (API responses, logs). The bot token is collapsed
1949
+ * to `***` entirely — never partially shown — matching the SMS bar and
1950
+ * the plan §13.5 rule "no token ever in a log line or API response".
1951
+ */
1952
+ declare function redactTelegramConfig(config: TelegramConfig): TelegramConfig;
1953
+ /**
1954
+ * Allow-list gate for INBOUND messages. A chat may talk to the agent
1955
+ * only if it is on `allowedChatIds` OR it is the configured operator
1956
+ * chat. An empty allow-list means nobody — fail closed.
1957
+ */
1958
+ declare function isTelegramChatAllowed(config: TelegramConfig, chatId: string): boolean;
1959
+ declare class TelegramManager {
1960
+ private db;
1961
+ private encryptionKey?;
1962
+ private initialized;
1963
+ /**
1964
+ * Optional master key used to encrypt Telegram credentials at rest
1965
+ * (the same AES-256-GCM scheme SMS/phone use). When absent (tests, or
1966
+ * a deployment with no master key) configs are stored as-is and reads
1967
+ * tolerate plaintext — upgrades and downgrades both stay safe.
1968
+ */
1969
+ constructor(db: Database, encryptionKey?: string | undefined);
1970
+ private ensureTable;
1971
+ /** Encrypt the credential fields of a config before persisting. */
1972
+ private encryptConfig;
1973
+ /** Decrypt the credential fields of a config after loading. */
1974
+ private decryptConfig;
1975
+ /** Normalize a stored/loaded config object, defaulting missing fields. */
1976
+ private normalizeConfig;
1977
+ /** Get the Telegram config from agent metadata (credentials decrypted). */
1978
+ getConfig(agentId: string): TelegramConfig | null;
1979
+ /** Save the Telegram config to agent metadata (credentials encrypted). */
1980
+ saveConfig(agentId: string, config: TelegramConfig): void;
1981
+ /** Remove the Telegram config from agent metadata. */
1982
+ removeConfig(agentId: string): void;
1983
+ /** Persist a new poll offset without touching the rest of the config. */
1984
+ updatePollOffset(agentId: string, offset: number): void;
1985
+ /**
1986
+ * Resolve the agent that owns a webhook secret. Used to authenticate +
1987
+ * route an inbound Telegram webhook delivery: a webhook carries no bot
1988
+ * identity, so the `X-Telegram-Bot-Api-Secret-Token` header is the
1989
+ * routing key. The comparison is constant-time, and a non-match
1990
+ * returns `null` so the route can answer with a single uniform 403
1991
+ * (no enumeration oracle — same posture as the SMS webhook).
1992
+ */
1993
+ findAgentByWebhookSecret(secret: string): {
1994
+ agentId: string;
1995
+ config: TelegramConfig;
1996
+ } | null;
1997
+ /** True if an inbound message with this Telegram id is already stored. */
1998
+ inboundMessageExists(agentId: string, chatId: string, telegramMessageId: number): boolean;
1999
+ /** Record an inbound Telegram message. */
2000
+ recordInbound(agentId: string, input: {
2001
+ chatId: string;
2002
+ telegramMessageId: number;
2003
+ fromId?: string;
2004
+ text: string;
2005
+ createdAt?: string;
2006
+ }, metadata?: Record<string, unknown>): TelegramMessage;
2007
+ /** Record an outbound Telegram message attempt. */
2008
+ recordOutbound(agentId: string, input: {
2009
+ chatId: string;
2010
+ text: string;
2011
+ telegramMessageId?: number;
2012
+ status?: TelegramMessage['status'];
2013
+ }, metadata?: Record<string, unknown>): TelegramMessage;
2014
+ /** Update the status (+ optional metadata) of a stored message. */
2015
+ updateStatus(id: string, status: TelegramMessage['status'], metadata?: Record<string, unknown>): void;
2016
+ /** List stored Telegram messages for an agent, newest first. */
2017
+ listMessages(agentId: string, opts?: {
2018
+ direction?: 'inbound' | 'outbound';
2019
+ chatId?: string;
2020
+ limit?: number;
2021
+ offset?: number;
2022
+ }): TelegramMessage[];
2023
+ }
2024
+
2025
+ /**
2026
+ * Telegram ⇄ `ask_operator` bridge (plan §13.4 / §13.5) — pure helpers.
2027
+ *
2028
+ * The realtime voice agent's `ask_operator` tool records an *operator
2029
+ * query* on the phone mission; the API exposes it through the
2030
+ * operator-query endpoints. Email is the channel-agnostic default
2031
+ * notifier (plan §5). This module makes Telegram a first-class
2032
+ * notification + approval channel WITHOUT duplicating any of that
2033
+ * machinery — it only:
2034
+ *
2035
+ * 1. {@link formatOperatorQueryTelegramMessage} — renders the
2036
+ * notification text the operator receives.
2037
+ * 2. {@link parseTelegramOperatorReply} — parses the operator's
2038
+ * Telegram reply back into a `{ queryId, answer }`.
2039
+ *
2040
+ * The caller feeds the parsed result straight into the SAME
2041
+ * `PhoneManager.answerOperatorQuery` the inbound email-reply hook uses.
2042
+ */
2043
+ /**
2044
+ * Token embedded in a notification so the operator's reply can be
2045
+ * matched back to a query. Kept short — the operator may see it.
2046
+ */
2047
+ declare const TELEGRAM_OPERATOR_QUERY_TAG = "AMQ";
2048
+ interface OperatorQueryNotificationInput {
2049
+ queryId: string;
2050
+ question: string;
2051
+ callContext?: string;
2052
+ urgency?: string;
2053
+ missionId?: string;
2054
+ }
2055
+ /**
2056
+ * Render the Telegram message body for an `ask_operator` notification.
2057
+ * The trailing `[AMQ <id>]` token lets a reply be matched to the query
2058
+ * even when the operator does not use Telegram's native reply gesture.
2059
+ */
2060
+ declare function formatOperatorQueryTelegramMessage(input: OperatorQueryNotificationInput): string;
2061
+ /** A `kind` of `approve`/`deny` is a decision; `answer` is free-form. */
2062
+ type OperatorReplyKind = 'answer' | 'approve' | 'deny';
2063
+ interface ParsedOperatorReply {
2064
+ /**
2065
+ * Query id when the reply names one — explicitly (`/answer <id>`,
2066
+ * an inline `oq_…` token) or implicitly (a native Telegram reply
2067
+ * quoting a `[AMQ <id>]`-tagged notification). `undefined` when the
2068
+ * reply is a bare message and the caller must resolve the target.
2069
+ */
2070
+ queryId?: string;
2071
+ /** The answer text to record against the query. Always non-empty. */
2072
+ answer: string;
2073
+ kind: OperatorReplyKind;
2074
+ }
2075
+ /**
2076
+ * Parse an operator's Telegram message into a {@link ParsedOperatorReply},
2077
+ * or `null` when it carries no usable answer.
2078
+ *
2079
+ * Recognized forms (most explicit first):
2080
+ * - `/answer <queryId> <text>`
2081
+ * - `/approve [<queryId>] [note]` · `/deny [<queryId>] [note]`
2082
+ * - a plain message — `queryId` is taken from a quoted tagged
2083
+ * notification (`replyToText`) or an inline `oq_…` / `[AMQ …]` token,
2084
+ * and is otherwise left `undefined` for the caller to resolve.
2085
+ *
2086
+ * The `@botname` suffix Telegram appends to commands in groups
2087
+ * (`/approve@mybot`) is tolerated.
2088
+ */
2089
+ declare function parseTelegramOperatorReply(input: {
2090
+ text: string;
2091
+ replyToText?: string;
2092
+ }): ParsedOperatorReply | null;
2093
+
1695
2094
  declare const ELKS_REALTIME_AUDIO_FORMATS: readonly ["ulaw", "pcm_16000", "pcm_24000", "wav"];
1696
2095
  type ElksRealtimeAudioFormat = typeof ELKS_REALTIME_AUDIO_FORMATS[number];
1697
2096
  interface ElksRealtimeHelloMessage {
@@ -1737,6 +2136,1093 @@ declare function buildElksHandshakeMessages(options?: {
1737
2136
  sendFormat?: ElksRealtimeAudioFormat;
1738
2137
  }): ElksRealtimeOutboundMessage[];
1739
2138
 
2139
+ /**
2140
+ * Twilio Media Streams wire protocol.
2141
+ *
2142
+ * The realtime-voice counterpart of `realtime.ts` (the 46elks protocol).
2143
+ * Twilio connects a call's audio to a WebSocket via a TwiML
2144
+ * `<Connect><Stream url="wss://…"/></Connect>`; over that socket Twilio
2145
+ * speaks JSON messages keyed by an `event` field.
2146
+ *
2147
+ * Inbound (Twilio → us):
2148
+ * - `{ event: "connected", protocol, version }`
2149
+ * - `{ event: "start", start: { streamSid, callSid, accountSid,
2150
+ * mediaFormat, tracks, customParameters } }`
2151
+ * - `{ event: "media", media: { payload: "<base64 µ-law>", track,
2152
+ * chunk, timestamp } }`
2153
+ * - `{ event: "stop", stop: { accountSid, callSid } }`
2154
+ * - `{ event: "mark", mark: { name } }`
2155
+ *
2156
+ * Outbound (us → Twilio) — every outbound frame must echo the
2157
+ * `streamSid` Twilio handed us in the `start` event:
2158
+ * - `{ event: "media", streamSid, media: { payload: "<base64 µ-law>" } }`
2159
+ * - `{ event: "mark", streamSid, mark: { name } }`
2160
+ * - `{ event: "clear", streamSid }` — flush buffered playback
2161
+ * (barge-in / interrupt; the analogue of 46elks `interrupt`).
2162
+ *
2163
+ * The audio is G.711 µ-law, 8 kHz, mono, base64 — unlike 46elks which
2164
+ * uses linear PCM. OpenAI's GA Realtime API speaks µ-law natively
2165
+ * (`{ type: "audio/pcmu" }` @ 8 kHz), so on a Twilio call the bridge
2166
+ * does NO transcoding end to end.
2167
+ *
2168
+ * > The Media Streams message shapes above follow Twilio's public
2169
+ * > `<Stream>` documentation. Verify against current docs before the
2170
+ * > live smoke-test (same discipline as the 46elks / OpenAI wire notes).
2171
+ */
2172
+ /** µ-law payload sample rate Twilio Media Streams uses, in Hz. */
2173
+ declare const TWILIO_MEDIA_SAMPLE_RATE = 8000;
2174
+ /** The `connected` handshake frame — first frame Twilio sends. */
2175
+ interface TwilioConnectedMessage {
2176
+ event: 'connected';
2177
+ protocol?: string;
2178
+ version?: string;
2179
+ [key: string]: unknown;
2180
+ }
2181
+ /**
2182
+ * The `start` frame — carries the `streamSid` every outbound frame must
2183
+ * echo, plus the `callSid` we resolve the phone mission from. Twilio
2184
+ * sends exactly one `start` per stream, right after `connected`.
2185
+ */
2186
+ interface TwilioStartMessage {
2187
+ event: 'start';
2188
+ /** Stream identifier — required on every outbound media/mark/clear. */
2189
+ streamSid: string;
2190
+ /** The call SID — matches the `sid` from the Calls.json response. */
2191
+ callSid: string;
2192
+ accountSid?: string;
2193
+ mediaFormat?: {
2194
+ encoding?: string;
2195
+ sampleRate?: number;
2196
+ channels?: number;
2197
+ };
2198
+ tracks?: string[];
2199
+ /** `<Parameter>` values declared inside the TwiML `<Stream>`. */
2200
+ customParameters?: Record<string, string>;
2201
+ [key: string]: unknown;
2202
+ }
2203
+ /** A `media` frame — one small base64 µ-law audio chunk. */
2204
+ interface TwilioMediaMessage {
2205
+ event: 'media';
2206
+ /** Base64-encoded G.711 µ-law, 8 kHz mono. */
2207
+ payload: string;
2208
+ /** Inbound only: which leg the audio is from (`inbound`/`outbound`). */
2209
+ track?: string;
2210
+ }
2211
+ /** The `stop` frame — Twilio has stopped streaming this call's audio. */
2212
+ interface TwilioStopMessage {
2213
+ event: 'stop';
2214
+ callSid?: string;
2215
+ [key: string]: unknown;
2216
+ }
2217
+ /** A `mark` frame — a playback checkpoint echoed back when reached. */
2218
+ interface TwilioMarkMessage {
2219
+ event: 'mark';
2220
+ name: string;
2221
+ }
2222
+ type TwilioRealtimeInboundMessage = TwilioConnectedMessage | TwilioStartMessage | TwilioMediaMessage | TwilioStopMessage | TwilioMarkMessage;
2223
+ type TwilioRealtimeOutboundMessage = {
2224
+ event: 'media';
2225
+ streamSid: string;
2226
+ media: {
2227
+ payload: string;
2228
+ };
2229
+ } | {
2230
+ event: 'mark';
2231
+ streamSid: string;
2232
+ mark: {
2233
+ name: string;
2234
+ };
2235
+ } | {
2236
+ event: 'clear';
2237
+ streamSid: string;
2238
+ };
2239
+ /**
2240
+ * Parse one raw inbound Twilio Media Streams frame. Accepts a JSON
2241
+ * string or an already-parsed object. Throws on a malformed/unknown
2242
+ * frame — the caller (the bridge) swallows the throw and ignores the
2243
+ * frame, never tearing the call down for one bad message.
2244
+ */
2245
+ declare function parseTwilioRealtimeMessage(input: unknown): TwilioRealtimeInboundMessage;
2246
+ /**
2247
+ * Build an outbound `media` frame — synthesised agent audio for Twilio
2248
+ * to play to the caller. `data` may be a base64 string or raw bytes.
2249
+ * `streamSid` is the id Twilio handed us in the `start` event.
2250
+ */
2251
+ declare function buildTwilioMediaMessage(streamSid: string, data: string | Uint8Array): TwilioRealtimeOutboundMessage;
2252
+ /**
2253
+ * Build a `clear` frame — tells Twilio to flush any audio still
2254
+ * buffered for playback. This is how barge-in / interrupt works on a
2255
+ * Twilio call: when the caller starts talking the agent must stop
2256
+ * mid-sentence, so we drop everything queued. (46elks calls this
2257
+ * `interrupt`; Twilio calls it `clear`.)
2258
+ */
2259
+ declare function buildTwilioClearMessage(streamSid: string): TwilioRealtimeOutboundMessage;
2260
+ /**
2261
+ * Build a `mark` frame — a named playback checkpoint. Twilio echoes the
2262
+ * mark back (as an inbound `mark` event) once the audio queued before
2263
+ * it has actually been played to the caller. Useful for end-of-turn
2264
+ * detection; the bridge does not require it but exposes the builder for
2265
+ * parity with the protocol surface.
2266
+ */
2267
+ declare function buildTwilioMarkMessage(streamSid: string, name: string): TwilioRealtimeOutboundMessage;
2268
+
2269
+ /**
2270
+ * Twilio webhook helpers — request-signature validation and TwiML
2271
+ * generation.
2272
+ *
2273
+ * This is the call-control counterpart of the 46elks webhook handling
2274
+ * in `manager.ts`. Twilio secures every webhook it sends with an
2275
+ * `X-Twilio-Signature` header; we answer Twilio's voice webhook with
2276
+ * TwiML (an XML document) that connects the call's audio to the
2277
+ * realtime voice WebSocket.
2278
+ *
2279
+ * Everything here is pure (no I/O, no sockets) so it is fully
2280
+ * unit-testable and keeps `@agenticmail/core` dependency-light.
2281
+ */
2282
+ /**
2283
+ * Compute the Twilio request signature for a webhook request.
2284
+ *
2285
+ * Twilio's scheme (per its Security docs): take the full request URL,
2286
+ * then for an `application/x-www-form-urlencoded` POST append every
2287
+ * POST parameter — sorted by key — as `key` immediately followed by
2288
+ * `value`, with no separators. HMAC-SHA1 that string keyed by the
2289
+ * account `AuthToken`, and base64-encode the digest. Twilio sends the
2290
+ * result in the `X-Twilio-Signature` header.
2291
+ *
2292
+ * > Verify the exact construction against Twilio's current Security
2293
+ * > documentation before the live smoke-test.
2294
+ *
2295
+ * @param authToken the Twilio account AuthToken (the signing key)
2296
+ * @param url the full URL Twilio requested, exactly as configured
2297
+ * @param params the POST body parameters (form-encoded fields)
2298
+ */
2299
+ declare function buildTwilioSignature(authToken: string, url: string, params?: Record<string, string>): string;
2300
+ /**
2301
+ * Validate an `X-Twilio-Signature` header against the request, timing-
2302
+ * safe. Fails closed: a missing/empty signature or token, or any
2303
+ * mismatch, returns `false` — never throws. The comparison is constant-
2304
+ * time so a caller cannot probe the expected signature byte by byte.
2305
+ */
2306
+ declare function validateTwilioSignature(authToken: string, url: string, params: Record<string, string>, providedSignature: string): boolean;
2307
+ /**
2308
+ * XML-escape a string for safe inclusion in a TwiML attribute or text
2309
+ * node. TwiML is XML, so any value we interpolate (a websocket URL with
2310
+ * query params, a `<Parameter>` value) must have its metacharacters
2311
+ * escaped or a stray `&`/`"` would produce malformed XML.
2312
+ */
2313
+ declare function escapeXml(value: string): string;
2314
+ interface TwilioStreamTwiMLOptions {
2315
+ /** The `wss://…` URL Twilio should open a Media Stream to. */
2316
+ streamUrl: string;
2317
+ /**
2318
+ * `<Parameter>` name/value pairs nested in the `<Stream>` — Twilio
2319
+ * echoes them back inside the media-stream `start` event's
2320
+ * `customParameters`. Used to carry the mission id / token so the
2321
+ * media socket can be matched to its phone mission even though the
2322
+ * `<Stream>` URL itself is fixed.
2323
+ */
2324
+ parameters?: Record<string, string>;
2325
+ }
2326
+ /**
2327
+ * Build the TwiML returned from the Twilio voice webhook: a
2328
+ * `<Connect><Stream>` document that hands the live call audio to our
2329
+ * realtime voice WebSocket.
2330
+ *
2331
+ * <?xml version="1.0" encoding="UTF-8"?>
2332
+ * <Response>
2333
+ * <Connect>
2334
+ * <Stream url="wss://host/path">
2335
+ * <Parameter name="missionId" value="…"/>
2336
+ * </Stream>
2337
+ * </Connect>
2338
+ * </Response>
2339
+ *
2340
+ * `<Connect><Stream>` (as opposed to `<Start><Stream>`) makes the
2341
+ * stream bidirectional and keeps the call alive for its duration — the
2342
+ * call ends when the stream ends. Every interpolated value is
2343
+ * XML-escaped.
2344
+ *
2345
+ * > The `<Connect><Stream>` / `<Parameter>` TwiML verbs are per
2346
+ * > Twilio's public Media Streams docs; verify against current docs
2347
+ * > before the live smoke-test.
2348
+ */
2349
+ declare function buildTwilioStreamTwiML(opts: TwilioStreamTwiMLOptions): string;
2350
+ /**
2351
+ * Build a minimal `<Response><Say>…</Say></Response>` TwiML document —
2352
+ * the fallback Twilio voice response when the realtime voice runtime
2353
+ * cannot be connected (e.g. no OpenAI key). Mirrors the 46elks
2354
+ * voice-start `play` action.
2355
+ */
2356
+ declare function buildTwilioSayTwiML(message: string): string;
2357
+
2358
+ /**
2359
+ * Realtime voice WebSocket endpoint paths.
2360
+ *
2361
+ * These are the URL paths a phone carrier's media socket connects to.
2362
+ * They live in `@agenticmail/core` (rather than only in the API
2363
+ * package) because the {@link PhoneManager} needs the Twilio path to
2364
+ * build the `<Stream url>` inside the TwiML it returns from the Twilio
2365
+ * voice webhook. The API package mounts its WebSocket servers on the
2366
+ * same constants, so the path is defined exactly once.
2367
+ */
2368
+ /**
2369
+ * Path the 46elks websocket-number's `websocket_url` points at. 46elks
2370
+ * resolves the mission from the `hello` frame's `callid`, so the path
2371
+ * carries no mission identity itself.
2372
+ */
2373
+ declare const ELKS_REALTIME_WS_PATH = "/api/agenticmail/calls/realtime";
2374
+ /**
2375
+ * Path a Twilio `<Connect><Stream>` connects to. The mission id + token
2376
+ * ride as query params (and as `<Parameter>` values in the TwiML), so
2377
+ * the media socket can be matched to its mission before the Twilio
2378
+ * `start` frame even arrives.
2379
+ */
2380
+ declare const TWILIO_REALTIME_WS_PATH = "/api/agenticmail/calls/twilio-stream";
2381
+
2382
+ /**
2383
+ * Realtime transport adapter — the provider-pluggable seam of the
2384
+ * realtime voice bridge.
2385
+ *
2386
+ * `RealtimeVoiceBridge` is transport-agnostic on the *socket* side
2387
+ * (it only ever sees abstract {@link RealtimeBridgePort}s), but the
2388
+ * *messages* on the carrier side are provider-specific: 46elks speaks
2389
+ * `hello`/`audio`/`bye` JSON over linear PCM, while Twilio Media Streams
2390
+ * speaks `connected`/`start`/`media`/`stop` JSON over G.711 µ-law.
2391
+ *
2392
+ * A {@link RealtimeTransportAdapter} captures exactly those differences:
2393
+ * - how to interpret one inbound carrier frame ({@link parseInbound}),
2394
+ * - how to build the outbound control frames (handshake / audio /
2395
+ * interrupt / bye),
2396
+ * - and which OpenAI Realtime audio format the carrier's audio needs.
2397
+ *
2398
+ * Everything else — the OpenAI session lifecycle, function calling,
2399
+ * barge-in handling, transcript accumulation, teardown — is identical
2400
+ * across providers and lives once in the bridge. This is the
2401
+ * "generalise the bridge" design: one bridge, one OpenAI side, a thin
2402
+ * per-provider adapter, no duplicated conversation logic.
2403
+ *
2404
+ * Adapters are pure (no sockets, no I/O), so the bridge stays fully
2405
+ * unit-testable and `@agenticmail/core` stays dependency-light.
2406
+ */
2407
+
2408
+ /** Provider identifier — kept in lockstep with `PhoneTransportProvider`. */
2409
+ type RealtimeTransportProvider = '46elks' | 'twilio';
2410
+ /**
2411
+ * A normalised inbound carrier event. Provider message shapes are
2412
+ * collapsed into this small, bridge-facing vocabulary so the bridge
2413
+ * never has to branch on the provider:
2414
+ * - `hello` — the call leg is live; carries the provider call id.
2415
+ * (46elks `hello`; Twilio `start`. Twilio's `connected`
2416
+ * frame is internal handshake noise → `ignore`.)
2417
+ * - `audio` — one inbound audio frame (base64), to relay to OpenAI.
2418
+ * - `bye` — the caller side ended the call.
2419
+ * - `ignore` — a known-but-uninteresting frame (e.g. Twilio `mark`).
2420
+ */
2421
+ type RealtimeInboundEvent = {
2422
+ kind: 'hello';
2423
+ callId: string;
2424
+ from?: string;
2425
+ to?: string;
2426
+ } | {
2427
+ kind: 'audio';
2428
+ data: string;
2429
+ } | {
2430
+ kind: 'bye';
2431
+ reason?: string;
2432
+ message?: string;
2433
+ } | {
2434
+ kind: 'ignore';
2435
+ };
2436
+ /**
2437
+ * The provider-specific seam the bridge drives. One instance per call.
2438
+ * Adapters that need per-call state (Twilio's `streamSid`) are
2439
+ * stateful — {@link parseInbound} latches that state from the `hello`
2440
+ * frame and the outbound builders read it back.
2441
+ */
2442
+ interface RealtimeTransportAdapter {
2443
+ /** Provider this adapter speaks for — used only for log/transcript text. */
2444
+ readonly provider: RealtimeTransportProvider;
2445
+ /**
2446
+ * Prefix for the carrier-side `onEnd` reason strings (`<prefix>-bye`,
2447
+ * `<prefix>-closed`, `<prefix>-error`). The 46elks adapter keeps the
2448
+ * historical `elks` prefix so existing call sites + tests that match
2449
+ * on `elks-bye` / `elks-closed` stay correct; Twilio uses `twilio`.
2450
+ */
2451
+ readonly endReasonPrefix: string;
2452
+ /**
2453
+ * The OpenAI Realtime audio format the carrier's audio requires.
2454
+ * 46elks linear PCM → `audio/pcm` @ 24 kHz; Twilio G.711 µ-law →
2455
+ * `audio/pcmu` @ 8 kHz. Used by `buildRealtimeSessionConfig` so the
2456
+ * OpenAI session in/out format matches the carrier with no transcode.
2457
+ */
2458
+ readonly openaiAudioFormat: {
2459
+ type: string;
2460
+ rate?: number;
2461
+ };
2462
+ /**
2463
+ * Normalise one raw inbound carrier frame. Throws on a malformed
2464
+ * frame (the bridge catches the throw and ignores that frame). A
2465
+ * well-formed but uninteresting frame returns `{ kind: 'ignore' }`.
2466
+ */
2467
+ parseInbound(raw: string | Record<string, unknown>): RealtimeInboundEvent;
2468
+ /**
2469
+ * Frames to send the carrier the moment the call leg is live (right
2470
+ * after the `hello` event). 46elks needs a `listening`+`sending`
2471
+ * format handshake; Twilio needs nothing, so this is `[]`.
2472
+ */
2473
+ buildHandshake(): Record<string, unknown>[];
2474
+ /** Build one outbound audio frame (synthesised agent speech, base64). */
2475
+ buildAudio(base64: string): Record<string, unknown>;
2476
+ /**
2477
+ * Build the barge-in frame — tells the carrier to drop buffered
2478
+ * playback so the agent stops mid-sentence. 46elks `interrupt`;
2479
+ * Twilio `clear`.
2480
+ */
2481
+ buildInterrupt(): Record<string, unknown>;
2482
+ /**
2483
+ * Build the end-of-call frame for the carrier, or `null` if the
2484
+ * carrier has none (Twilio has no client-initiated stream-stop frame;
2485
+ * the bridge just closes the socket).
2486
+ */
2487
+ buildBye(): Record<string, unknown> | null;
2488
+ }
2489
+ /**
2490
+ * The 46elks realtime-media adapter — the original transport, behaviour
2491
+ * unchanged. Audio is linear PCM, so the OpenAI session uses
2492
+ * `audio/pcm` @ 24 kHz (matching 46elks `pcm_24000`).
2493
+ */
2494
+ declare class ElksRealtimeTransport implements RealtimeTransportAdapter {
2495
+ private readonly listenFormat;
2496
+ private readonly sendFormat;
2497
+ readonly provider: "46elks";
2498
+ readonly endReasonPrefix = "elks";
2499
+ readonly openaiAudioFormat: {
2500
+ type: string;
2501
+ rate: number;
2502
+ };
2503
+ constructor(listenFormat?: ElksRealtimeAudioFormat, sendFormat?: ElksRealtimeAudioFormat);
2504
+ parseInbound(raw: string | Record<string, unknown>): RealtimeInboundEvent;
2505
+ buildHandshake(): Record<string, unknown>[];
2506
+ buildAudio(base64: string): Record<string, unknown>;
2507
+ buildInterrupt(): Record<string, unknown>;
2508
+ buildBye(): Record<string, unknown>;
2509
+ }
2510
+ /**
2511
+ * The Twilio Media Streams adapter. Audio is G.711 µ-law @ 8 kHz, which
2512
+ * the OpenAI GA Realtime API speaks natively (`audio/pcmu`) — so the
2513
+ * bridge does NO transcoding for a Twilio call.
2514
+ *
2515
+ * Stateful per call: Twilio assigns a `streamSid` in the `start` frame
2516
+ * that EVERY outbound frame must echo. {@link parseInbound} latches it
2517
+ * on `start`; the outbound builders read it back. Building an outbound
2518
+ * frame before `start` throws — but the bridge never does that (it only
2519
+ * sends audio after the `hello` event, which `start` produces).
2520
+ */
2521
+ declare class TwilioRealtimeTransport implements RealtimeTransportAdapter {
2522
+ readonly provider: "twilio";
2523
+ readonly endReasonPrefix = "twilio";
2524
+ readonly openaiAudioFormat: {
2525
+ type: string;
2526
+ rate: number;
2527
+ };
2528
+ /** Latched from the Twilio `start` frame; required on every outbound. */
2529
+ private streamSid;
2530
+ /** The active `streamSid`, once the `start` frame has been seen. */
2531
+ get currentStreamSid(): string;
2532
+ parseInbound(raw: string | Record<string, unknown>): RealtimeInboundEvent;
2533
+ buildHandshake(): Record<string, unknown>[];
2534
+ buildAudio(base64: string): Record<string, unknown>;
2535
+ buildInterrupt(): Record<string, unknown>;
2536
+ buildBye(): Record<string, unknown> | null;
2537
+ }
2538
+ /** Construct the transport adapter for a provider. */
2539
+ declare function createRealtimeTransport(provider: RealtimeTransportProvider): RealtimeTransportAdapter;
2540
+
2541
+ /**
2542
+ * Realtime voice tools — the tool-using layer on top of the v0.9.52
2543
+ * audio bridge (see `realtime-bridge.ts`).
2544
+ *
2545
+ * # What this file is
2546
+ *
2547
+ * v0.9.52 wired a 46elks ⇄ OpenAI Realtime audio bridge that could only
2548
+ * *converse*. v0.9.53 turns the voice agent into something that can
2549
+ * *act* on a live call: ask the human operator a question, look things
2550
+ * up, tell the time. The enabler is OpenAI Realtime **function
2551
+ * calling** — tools declared in `session.tools`, called by the model
2552
+ * mid-call, dispatched here, and answered back over the wire.
2553
+ *
2554
+ * This module is the transport-agnostic half of that:
2555
+ * - {@link RealtimeToolDefinition} — the JSON-schema tool shapes the
2556
+ * bridge declares to OpenAI.
2557
+ * - {@link ToolExecutor} — the interface the bridge dispatches calls
2558
+ * into; {@link createToolExecutor} builds one from a handler map.
2559
+ * - The *pure / fast* tool implementations ({@link getDatetime},
2560
+ * {@link recallMemory}, {@link webSearch}) — no sockets, no DB
2561
+ * handles passed in directly, fully unit-testable.
2562
+ * - {@link pollForOperatorAnswer} — the poll loop `ask_operator` uses
2563
+ * to block (with an injectable clock + sleep) until the operator
2564
+ * answers or the hard timeout elapses.
2565
+ * - {@link parseOperatorQueryReply} — parses an operator's *email*
2566
+ * reply back into a query answer (the channel-agnostic default
2567
+ * notifier path, see the plan §5).
2568
+ *
2569
+ * The *side-effecting* wiring — actually recording an operator query on
2570
+ * a mission, sending the notification email, calling a web-search API —
2571
+ * lives in `@agenticmail/api`'s `realtime-ws.ts`, which builds a
2572
+ * {@link ToolExecutor} per connection. Keeping that split means
2573
+ * `@agenticmail/core` stays dependency-light and every piece here is
2574
+ * testable with fakes.
2575
+ *
2576
+ * > NOTE on OpenAI Realtime GA event/field names: the function-calling
2577
+ * > wire shapes (`session.tools`, `tool_choice`, `function_call_output`,
2578
+ * > `response.function_call_arguments.done`) follow the plan. Any name
2579
+ * > that could not be verified against the v0.9.52 code is flagged with
2580
+ * > a comment in `realtime-bridge.ts` — verify against current OpenAI
2581
+ * > docs before the live smoke test (same discipline as v0.9.52's
2582
+ * > `response.output_audio.delta` vs legacy `response.audio.delta`).
2583
+ */
2584
+ /**
2585
+ * A function tool as declared in the OpenAI Realtime `session.tools`
2586
+ * array. The shape is the GA `gpt-realtime` function-tool schema:
2587
+ * `{ type: 'function', name, description, parameters: <JSON Schema> }`.
2588
+ */
2589
+ interface RealtimeToolDefinition {
2590
+ type: 'function';
2591
+ name: string;
2592
+ description: string;
2593
+ parameters: {
2594
+ type: 'object';
2595
+ properties: Record<string, unknown>;
2596
+ required?: string[];
2597
+ additionalProperties?: boolean;
2598
+ };
2599
+ }
2600
+ /** A function call parsed off the wire and ready to dispatch. */
2601
+ interface RealtimeToolCall {
2602
+ /** OpenAI `call_id` — echoed back on the `function_call_output`. */
2603
+ callId: string;
2604
+ /** The tool name the model chose. */
2605
+ name: string;
2606
+ /** Parsed `arguments` object (`{}` if the model sent none / invalid). */
2607
+ arguments: Record<string, unknown>;
2608
+ }
2609
+ /** The result of executing a tool — `output` is a string for the model. */
2610
+ interface RealtimeToolResult {
2611
+ /** Sent back verbatim as the `function_call_output` `output` field. */
2612
+ output: string;
2613
+ }
2614
+ /**
2615
+ * Dispatches a model tool call to its implementation. The bridge holds
2616
+ * one of these (injected) and calls {@link execute} for every
2617
+ * function call; it must always resolve (never reject) — a failing
2618
+ * tool resolves to an `output` the model can read and recover from.
2619
+ */
2620
+ interface ToolExecutor {
2621
+ execute(call: RealtimeToolCall): Promise<RealtimeToolResult>;
2622
+ }
2623
+ /**
2624
+ * One tool's implementation. Receives the parsed arguments + the raw
2625
+ * call. May return a plain string or a value that is JSON-stringified.
2626
+ * It MAY throw — {@link createToolExecutor} catches and turns a thrown
2627
+ * error into a model-readable output, so a buggy tool never wedges the
2628
+ * call.
2629
+ */
2630
+ type RealtimeToolHandler = (args: Record<string, unknown>, call: RealtimeToolCall) => Promise<string | Record<string, unknown>> | string | Record<string, unknown>;
2631
+ /**
2632
+ * Hard ceiling on how long `ask_operator` blocks waiting for an answer.
2633
+ * Per Ope (2026-05-19): a human caller will hold ~5 minutes; past that
2634
+ * the graceful path is a callback (plan §7), not an indefinite hold.
2635
+ */
2636
+ declare const OPERATOR_QUERY_TIMEOUT_MS: number;
2637
+ /** How often `ask_operator` re-checks the query record for an answer. */
2638
+ declare const OPERATOR_QUERY_POLL_INTERVAL_MS = 3000;
2639
+ /**
2640
+ * Returned to the model as the `ask_operator` tool output when the
2641
+ * operator did not answer in time. Phrased as an instruction the model
2642
+ * can act on — it must NOT invent an answer, it should tell the caller
2643
+ * a follow-up / callback is coming (the bridge + API handle the actual
2644
+ * callback-on-disconnect, see the plan §7).
2645
+ */
2646
+ declare const OPERATOR_QUERY_TIMEOUT_SENTINEL: string;
2647
+ /**
2648
+ * Subject-line tag used by the email notifier. The query id is embedded
2649
+ * so an operator's *reply* can be parsed straight back into an answer
2650
+ * ({@link parseOperatorQueryReply}) — the channel-agnostic default path.
2651
+ */
2652
+ declare const OPERATOR_QUERY_SUBJECT_TAG = "AgenticMail Operator Query";
2653
+ /**
2654
+ * Phase 1 keystone — human-in-the-loop. The model calls this when it
2655
+ * needs information, a decision, or approval it does not have. It can
2656
+ * take minutes to answer, so the model is instructed (see
2657
+ * {@link buildRealtimeToolGuidance}) to put the caller on hold first.
2658
+ */
2659
+ declare const ASK_OPERATOR_TOOL: RealtimeToolDefinition;
2660
+ /** Phase 2 — a web search. Returns the top results as text. */
2661
+ declare const WEB_SEARCH_TOOL: RealtimeToolDefinition;
2662
+ /** Phase 2 — searches the agent's own persistent memory. */
2663
+ declare const RECALL_MEMORY_TOOL: RealtimeToolDefinition;
2664
+ /** Phase 2 — the current date/time, for resolving "tomorrow" etc. */
2665
+ declare const GET_DATETIME_TOOL: RealtimeToolDefinition;
2666
+ /**
2667
+ * Phase 2 (optional) — searches the agent's AgenticMail inbox. Defined
2668
+ * here so the schema is canonical, but NOT wired into the default
2669
+ * executor in `realtime-ws.ts` (it needs IMAP access); a deployment can
2670
+ * opt in. Kept in the file so the tool surface is documented in one place.
2671
+ */
2672
+ declare const SEARCH_EMAIL_TOOL: RealtimeToolDefinition;
2673
+ /** Every tool defined in this module, keyed by name. */
2674
+ declare const REALTIME_TOOL_DEFINITIONS: Record<string, RealtimeToolDefinition>;
2675
+ /**
2676
+ * Build the natural-language guidance appended to the Realtime session
2677
+ * `instructions` when tools are present. This is the *model-side* half
2678
+ * of "keep the line warm" (plan §6): the model announces a hold before
2679
+ * a slow tool and reassures the caller while it waits. The bridge-side
2680
+ * safety net (the tool-call timeout) is in `realtime-bridge.ts`.
2681
+ */
2682
+ declare function buildRealtimeToolGuidance(tools: readonly RealtimeToolDefinition[]): string;
2683
+ /**
2684
+ * Build a {@link ToolExecutor} from a `name → handler` map. The
2685
+ * executor:
2686
+ * - resolves an unknown tool name to a model-readable "not available"
2687
+ * output (never rejects),
2688
+ * - catches a thrown / rejected handler and turns it into a
2689
+ * model-readable failure output,
2690
+ * - JSON-stringifies a non-string handler return.
2691
+ *
2692
+ * So a single buggy or missing tool can never crash the bridge or
2693
+ * wedge the call — the model just gets an output it can recover from.
2694
+ */
2695
+ declare function createToolExecutor(handlers: Record<string, RealtimeToolHandler>): ToolExecutor;
2696
+ interface GetDatetimeOptions {
2697
+ /** IANA timezone; defaults to UTC. */
2698
+ timezone?: string;
2699
+ /** Clock override for tests. */
2700
+ now?: Date;
2701
+ }
2702
+ /**
2703
+ * `get_datetime` — current date/time as a human-readable line plus the
2704
+ * exact ISO timestamp. Pure: an injectable clock makes it deterministic
2705
+ * in tests. An invalid timezone falls back to UTC rather than throwing.
2706
+ */
2707
+ declare function getDatetime(options?: GetDatetimeOptions): string;
2708
+ /**
2709
+ * Minimal structural interface for the bit of `AgentMemoryManager`
2710
+ * {@link recallMemory} needs. Declared here so this module does not
2711
+ * have to import the full memory manager — `AgentMemoryManager`
2712
+ * satisfies it structurally.
2713
+ */
2714
+ interface MemoryRecaller {
2715
+ recall(agentId: string, query: string, limit?: number): Promise<Array<{
2716
+ title: string;
2717
+ content: string;
2718
+ }>>;
2719
+ }
2720
+ /**
2721
+ * `recall_memory` — query the agent's own persistent memory. Returns a
2722
+ * compact numbered list, or a clear "nothing found" line so the model
2723
+ * does not stall on an empty result.
2724
+ */
2725
+ declare function recallMemory(memory: MemoryRecaller, agentId: string, query: string, limit?: number): Promise<string>;
2726
+ interface WebSearchOptions {
2727
+ /**
2728
+ * Search endpoint. Defaults to the DuckDuckGo HTML endpoint
2729
+ * ({@link DEFAULT_WEB_SEARCH_ENDPOINT}) — free, no API key, per the
2730
+ * scope decision (plan §13.1). Overridable for tests / a mirror.
2731
+ */
2732
+ endpoint?: string;
2733
+ /** `fetch` override for tests. */
2734
+ fetchFn?: typeof fetch;
2735
+ /** Max results to fold into the output (default 5, capped at 10). */
2736
+ maxResults?: number;
2737
+ }
2738
+ /**
2739
+ * Default web-search endpoint — DuckDuckGo's keyless HTML results page.
2740
+ * Chosen because it needs no API key or account (plan §13.1: "web_search
2741
+ * → DuckDuckGo only, free, no key").
2742
+ */
2743
+ declare const DEFAULT_WEB_SEARCH_ENDPOINT = "https://html.duckduckgo.com/html/";
2744
+ /**
2745
+ * Untrusted-content marker prefixed to every non-empty `web_search`
2746
+ * result block (v0.9.53 security review).
2747
+ *
2748
+ * Web-search results are scraped page titles + snippets — attacker-
2749
+ * controllable text that the model consumes verbatim as a
2750
+ * `function_call_output`. A page that ranks for the caller's query can
2751
+ * plant instructions in its `<title>` or snippet; without an explicit
2752
+ * delimiter the model may read them as commands rather than data — the
2753
+ * classic search-result prompt-injection vector. This marker tells the
2754
+ * model the block is strictly reference data and that any instructions
2755
+ * inside it must be ignored.
2756
+ *
2757
+ * (The enterprise web-search tool wrapped results the same way via
2758
+ * `wrapWebContent` / `externalContent.untrusted`; the fresh DuckDuckGo
2759
+ * helper here re-establishes that defense without copying the file.)
2760
+ */
2761
+ declare const WEB_SEARCH_UNTRUSTED_PREFIX: string;
2762
+ /**
2763
+ * `web_search` — a keyless DuckDuckGo lookup returning the top results
2764
+ * as text. Reimplemented fresh as a small HTML-scrape helper (plan
2765
+ * §13.1 / the host's provenance note: do not copy the enterprise
2766
+ * file). No API key is needed, so unlike the other tools this one is
2767
+ * always available.
2768
+ *
2769
+ * Fails *soft*: a non-OK response, an unreadable body, or a thrown
2770
+ * fetch all resolve to a model-readable line rather than throwing — a
2771
+ * search outage must never kill a live call.
2772
+ *
2773
+ * > DuckDuckGo's HTML page is an unversioned scrape target, not a
2774
+ * > documented API: the `result__a` / `result__snippet` selectors and
2775
+ * > the `/l/?uddg=` redirect wrapper below match the endpoint today;
2776
+ * > verify them before the live smoke test (same "verify the wire
2777
+ * > shape" discipline as the OpenAI Realtime event names).
2778
+ */
2779
+ declare function webSearch(query: string, options?: WebSearchOptions): Promise<string>;
2780
+ interface OperatorQueryPollOptions {
2781
+ /** Hard timeout (default {@link OPERATOR_QUERY_TIMEOUT_MS}). */
2782
+ timeoutMs?: number;
2783
+ /** Poll interval (default {@link OPERATOR_QUERY_POLL_INTERVAL_MS}). */
2784
+ pollIntervalMs?: number;
2785
+ /** Clock override for tests (returns ms). */
2786
+ now?: () => number;
2787
+ /** Sleep override for tests. */
2788
+ sleep?: (ms: number) => Promise<void>;
2789
+ /** Abort handle — when `aborted` flips true the poll resolves null. */
2790
+ signal?: {
2791
+ readonly aborted: boolean;
2792
+ };
2793
+ }
2794
+ /**
2795
+ * Poll `readAnswer` until it yields a non-empty answer or the timeout
2796
+ * elapses. Returns the trimmed answer, or `null` on timeout / abort.
2797
+ *
2798
+ * This is the loop `ask_operator` runs to *block* the tool call while
2799
+ * it waits for the human. The clock + sleep are injectable so tests run
2800
+ * the full timeout path in microseconds. The `signal` lets the caller
2801
+ * abandon the wait early — e.g. the call dropped, so there is no point
2802
+ * polling (the unanswered query then drives callback-on-disconnect).
2803
+ */
2804
+ declare function pollForOperatorAnswer(readAnswer: () => Promise<string | null | undefined> | string | null | undefined, options?: OperatorQueryPollOptions): Promise<string | null>;
2805
+ /**
2806
+ * Build the notification email subject for an operator query. The query
2807
+ * id is embedded in a `[tag id]` token so the operator's reply (subject
2808
+ * `Re: [tag id] …`) can be parsed straight back into an answer.
2809
+ */
2810
+ declare function operatorQuerySubject(queryId: string, callContext?: string): string;
2811
+ /**
2812
+ * Parse an operator's email reply into `{ queryId, answer }`, or `null`
2813
+ * if the email is not an operator-query reply (no id token in the
2814
+ * subject) or carries no usable answer text.
2815
+ *
2816
+ * This is the pure half of the channel-agnostic notifier (plan §5): the
2817
+ * default notifier emails the operator; their reply lands back in the
2818
+ * agent's inbox; the inbound mail hook runs this and posts the answer
2819
+ * to the same query record the HTTP endpoint writes to.
2820
+ */
2821
+ declare function parseOperatorQueryReply(input: {
2822
+ subject?: string;
2823
+ text?: string;
2824
+ }): {
2825
+ queryId: string;
2826
+ answer: string;
2827
+ } | null;
2828
+ /**
2829
+ * Extract the bare email address from a `From`-style value, lowercased
2830
+ * and trimmed. Accepts `"Name" <addr@host>`, `<addr@host>`, or a plain
2831
+ * `addr@host`; returns `''` for an empty / unusable input.
2832
+ */
2833
+ declare function extractEmailAddress(value: string | null | undefined): string;
2834
+ /**
2835
+ * Fail-closed sender check for the operator email-reply answer path
2836
+ * (plan §5; added in the v0.9.53 security review).
2837
+ *
2838
+ * An operator-query answer arriving by email is gated by the query id
2839
+ * embedded in the subject — an unguessable 122-bit v4 UUID, but one that
2840
+ * rides in *plaintext* subject lines through quoting, forwarding, and
2841
+ * relay/provider logs. The id alone is therefore a materially weaker
2842
+ * gate than the HMAC per-mission token used on the phone webhook (which
2843
+ * only 46elks ever sees). So an emailed answer is accepted ONLY when its
2844
+ * `From` address matches the configured operator address (case-
2845
+ * insensitive, address-only).
2846
+ *
2847
+ * Returns `false` when no `operatorEmail` is configured — no operator
2848
+ * means nobody is trusted (fail closed), so the email-reply path is
2849
+ * simply inert on a deployment without one.
2850
+ *
2851
+ * NOTE: ultimate strength still depends on inbound SPF/DKIM rejecting a
2852
+ * spoofed `From`; this check closes the casual-leak path — a leaked or
2853
+ * forwarded subject token replied to from an arbitrary address.
2854
+ */
2855
+ declare function isOperatorReplySender(from: string | null | undefined, operatorEmail: string | null | undefined): boolean;
2856
+
2857
+ /**
2858
+ * Realtime voice bridge — wires the OpenAI Realtime API to a phone
2859
+ * carrier's realtime-media WebSocket so a phone mission can actually
2860
+ * *converse*.
2861
+ *
2862
+ * # Shape of the integration
2863
+ *
2864
+ * caller ⇄ carrier ⇄ (carrier media WebSocket) ⇄ AgenticMail
2865
+ * │
2866
+ * RealtimeVoiceBridge
2867
+ * │
2868
+ * (OpenAI Realtime WebSocket)
2869
+ * │
2870
+ * gpt-realtime
2871
+ *
2872
+ * The carrier streams the live call audio to AgenticMail as JSON frames
2873
+ * (base64 audio); AgenticMail relays them to OpenAI as
2874
+ * `input_audio_buffer.append`; OpenAI streams synthesised speech back
2875
+ * as `response.output_audio.delta`; AgenticMail relays that to the
2876
+ * carrier. Server-side VAD on the OpenAI session handles turn-taking —
2877
+ * no manual commit / response.create.
2878
+ *
2879
+ * # Provider-pluggable transport
2880
+ *
2881
+ * The carrier side speaks a provider-specific wire protocol — 46elks
2882
+ * (`hello`/`audio`/`bye`, linear PCM) and Twilio Media Streams
2883
+ * (`connected`/`start`/`media`/`stop`, G.711 µ-law). Those differences
2884
+ * are isolated behind a {@link RealtimeTransportAdapter}: the bridge
2885
+ * itself — OpenAI session lifecycle, function calling, barge-in,
2886
+ * transcript, teardown — is identical across providers and lives here
2887
+ * once. The bridge defaults to the 46elks adapter, so existing callers
2888
+ * that pass no `transport` keep their exact prior behaviour.
2889
+ *
2890
+ * # Memory injection — the whole point
2891
+ *
2892
+ * Before the OpenAI session starts, the agent's persistent memory is
2893
+ * rendered (`AgentMemoryManager.generateMemoryContext()`) and folded
2894
+ * into the Realtime session `instructions`. The model is told to treat
2895
+ * that block as *its own* long-term knowledge — so on the call it acts
2896
+ * with full continuity, as if it had always known those things.
2897
+ *
2898
+ * # Why this file is transport-agnostic
2899
+ *
2900
+ * `RealtimeVoiceBridge` never touches a socket. It takes two abstract
2901
+ * {@link RealtimeBridgePort}s (one per side) and is driven by
2902
+ * `handle*Message` / `handle*Open` / `handle*Close` calls. The real
2903
+ * WebSocket plumbing lives in `@agenticmail/api` (which has the `ws`
2904
+ * dependency); tests drive the bridge with in-memory fake ports. This
2905
+ * keeps `@agenticmail/core` dependency-free and the bridge logic fully
2906
+ * unit-testable without a live OpenAI key or a carrier websocket number.
2907
+ *
2908
+ * The exact OpenAI Realtime wire shapes below are the GA `gpt-realtime`
2909
+ * protocol (session config nested under `audio.input` / `audio.output`,
2910
+ * `format` as an object, `response.output_audio.delta` for output). The
2911
+ * legacy beta output event name `response.audio.delta` is also handled
2912
+ * defensively — some `gpt-realtime` deployments still emit it.
2913
+ */
2914
+
2915
+ /** OpenAI Realtime WebSocket base URL (model passed as `?model=`). */
2916
+ declare const OPENAI_REALTIME_URL = "wss://api.openai.com/v1/realtime";
2917
+ /** GA Realtime model. */
2918
+ declare const DEFAULT_REALTIME_MODEL = "gpt-realtime";
2919
+ /** Default GA Realtime voice. */
2920
+ declare const DEFAULT_REALTIME_VOICE = "marin";
2921
+ /** PCM sample rate shared by 46elks `pcm_24000` and the OpenAI session. */
2922
+ declare const REALTIME_AUDIO_SAMPLE_RATE = 24000;
2923
+ /**
2924
+ * #46-H1 — hard ceiling on a single inbound audio frame, measured in
2925
+ * base64 characters. Realtime frames are tiny (20–100 ms of audio); a
2926
+ * frame larger than this is either a buggy or a hostile peer trying to
2927
+ * push an unbounded allocation through the bridge. Oversized frames are
2928
+ * dropped, never forwarded. ~256 KiB of base64 ≈ 4 s of 24 kHz PCM16 —
2929
+ * far above any legitimate realtime frame, so this never trims real
2930
+ * speech, it only fences off abuse.
2931
+ */
2932
+ declare const REALTIME_MAX_AUDIO_FRAME_BASE64: number;
2933
+ /**
2934
+ * Bridge-side safety net for a slow tool call (plan §6). The model-side
2935
+ * keeps the line warm (it announces a hold and reassures the caller);
2936
+ * this is the floor under that — if a tool's `execute()` never settles
2937
+ * (a hung `ask_operator`, a wedged search) the bridge still answers the
2938
+ * model after this long so the call cannot be wedged forever. It is set
2939
+ * deliberately ABOVE `OPERATOR_QUERY_TIMEOUT_MS` (5 min) so the tool's
2940
+ * own graceful timeout sentinel normally wins the race; this only fires
2941
+ * if the executor itself hangs.
2942
+ */
2943
+ declare const REALTIME_TOOL_CALL_TIMEOUT_MS: number;
2944
+ interface RealtimeInstructionOptions {
2945
+ /** The concrete objective of this call. */
2946
+ task: string;
2947
+ /** Rendered agent memory block (from `generateMemoryContext()`). */
2948
+ memoryContext?: string;
2949
+ /** The agent's display name, used in the persona line. */
2950
+ agentName?: string;
2951
+ /** Override the default persona preamble. */
2952
+ persona?: string;
2953
+ /**
2954
+ * Natural-language tool-use guidance, appended as its own section.
2955
+ * Normally produced by `buildRealtimeToolGuidance()` and folded in
2956
+ * automatically by `buildRealtimeSessionConfig()` when tools are set.
2957
+ */
2958
+ toolGuidance?: string;
2959
+ }
2960
+ /**
2961
+ * Compose the Realtime session `instructions` string. The agent's
2962
+ * memory is presented as the model's *own* knowledge — not as external
2963
+ * notes — so the call feels continuous with everything the agent has
2964
+ * learned elsewhere.
2965
+ */
2966
+ declare function buildRealtimeInstructions(opts: RealtimeInstructionOptions): string;
2967
+ interface RealtimeSessionConfigOptions extends RealtimeInstructionOptions {
2968
+ /** OpenAI Realtime voice (default {@link DEFAULT_REALTIME_VOICE}). */
2969
+ voice?: string;
2970
+ /** OpenAI Realtime model (default {@link DEFAULT_REALTIME_MODEL}). */
2971
+ model?: string;
2972
+ /** Provide a fully-formed instruction string instead of composing one. */
2973
+ instructions?: string;
2974
+ /**
2975
+ * Function tools to declare on the session. When present they are
2976
+ * emitted under `session.tools` with a `tool_choice`, and (unless
2977
+ * `instructions` is overridden) tool-use guidance is folded into the
2978
+ * composed instructions automatically.
2979
+ */
2980
+ tools?: RealtimeToolDefinition[];
2981
+ /** `tool_choice` override — defaults to `'auto'` when tools are set. */
2982
+ toolChoice?: 'auto' | 'none' | 'required';
2983
+ /**
2984
+ * OpenAI Realtime audio format for the session's input AND output.
2985
+ * Defaults to linear PCM @ 24 kHz (`{ type: 'audio/pcm', rate: 24000 }`)
2986
+ * — the format a 46elks `pcm_24000` call needs. A Twilio call must
2987
+ * pass `{ type: 'audio/pcmu', rate: 8000 }` (G.711 µ-law @ 8 kHz) so
2988
+ * the OpenAI session speaks the carrier's native codec with NO
2989
+ * transcoding. Normally sourced from a {@link RealtimeTransportAdapter}'s
2990
+ * `openaiAudioFormat`.
2991
+ */
2992
+ audioFormat?: {
2993
+ type: string;
2994
+ rate?: number;
2995
+ };
2996
+ }
2997
+ /** Default OpenAI Realtime audio format — linear PCM @ 24 kHz (46elks). */
2998
+ declare const DEFAULT_REALTIME_AUDIO_FORMAT: {
2999
+ readonly type: "audio/pcm";
3000
+ readonly rate: 24000;
3001
+ };
3002
+ /**
3003
+ * Build the `session.update` client event for the GA `gpt-realtime`
3004
+ * API. Audio in/out default to PCM16 @ 24 kHz (matches 46elks
3005
+ * `pcm_24000`); a Twilio call passes `audioFormat: { type: 'audio/pcmu',
3006
+ * rate: 8000 }` so the session speaks G.711 µ-law natively. Turn-taking
3007
+ * is server-side VAD, and the agent's memory is folded into
3008
+ * `instructions`.
3009
+ *
3010
+ * When `tools` are supplied they are declared under `session.tools`
3011
+ * with a `tool_choice` (default `'auto'`), and — unless the caller
3012
+ * passed an explicit `instructions` string — natural-language tool-use
3013
+ * guidance is appended to the composed instructions so the model knows
3014
+ * to put the caller on hold before a slow tool (plan §6).
3015
+ *
3016
+ * > The `session.tools` / `tool_choice` field names follow the OpenAI
3017
+ * > Realtime function-calling protocol per the plan §3, and the
3018
+ * > `audio/pcmu` µ-law format token follows the OpenAI GA Realtime
3019
+ * > audio-format protocol; verify both against current OpenAI docs
3020
+ * > before the live smoke test.
3021
+ */
3022
+ declare function buildRealtimeSessionConfig(opts: RealtimeSessionConfigOptions): Record<string, unknown>;
3023
+ /** Build the `wss://…/v1/realtime?model=…` URL for a model. */
3024
+ declare function buildOpenAIRealtimeUrl(model?: string): string;
3025
+ /** One side of the bridge — a JSON message sink that can be closed. */
3026
+ interface RealtimeBridgePort {
3027
+ /** Send one JSON message to the peer. Must not throw. */
3028
+ send(message: Record<string, unknown>): void;
3029
+ /** Close the underlying connection. Must be idempotent. */
3030
+ close(): void;
3031
+ }
3032
+ interface RealtimeBridgeTranscriptEntry {
3033
+ source: 'system' | 'provider' | 'agent';
3034
+ text: string;
3035
+ metadata?: Record<string, unknown>;
3036
+ }
3037
+ interface RealtimeVoiceBridgeOptions {
3038
+ /**
3039
+ * Port to the carrier realtime-media side. Named `elks` for backward
3040
+ * compatibility (it predates the Twilio transport); `carrier` is the
3041
+ * provider-neutral alias and takes precedence if both are given.
3042
+ */
3043
+ elks?: RealtimeBridgePort;
3044
+ /** Provider-neutral alias for {@link elks} — the carrier media port. */
3045
+ carrier?: RealtimeBridgePort;
3046
+ /** Port to the OpenAI Realtime side. */
3047
+ openai: RealtimeBridgePort;
3048
+ /** `session.update` payload — sent to OpenAI once its socket opens. */
3049
+ sessionConfig: Record<string, unknown>;
3050
+ /**
3051
+ * Carrier transport adapter — encodes/decodes the provider's wire
3052
+ * protocol (46elks vs Twilio Media Streams). Defaults to the 46elks
3053
+ * adapter, so callers that omit it keep the original behaviour.
3054
+ */
3055
+ transport?: RealtimeTransportAdapter;
3056
+ /**
3057
+ * 46elks-only: audio format we ask 46elks to send us (default
3058
+ * `pcm_24000`). Ignored unless the default 46elks transport is used
3059
+ * and no explicit `transport` was supplied.
3060
+ */
3061
+ listenFormat?: ElksRealtimeAudioFormat;
3062
+ /**
3063
+ * 46elks-only: audio format we declare for the audio we send 46elks
3064
+ * (default `pcm_24000`). Ignored unless the default 46elks transport
3065
+ * is used and no explicit `transport` was supplied.
3066
+ */
3067
+ sendFormat?: ElksRealtimeAudioFormat;
3068
+ /** Per-frame base64 ceiling (default {@link REALTIME_MAX_AUDIO_FRAME_BASE64}). */
3069
+ maxAudioFrameBase64?: number;
3070
+ /**
3071
+ * Dispatches the model's function calls. When omitted the bridge has
3072
+ * no tools — a function call from the model is acknowledged with a
3073
+ * "no tools available" output rather than left to wedge the model.
3074
+ */
3075
+ toolExecutor?: ToolExecutor;
3076
+ /**
3077
+ * Bridge-side safety-net timeout for a single tool call (default
3078
+ * {@link REALTIME_TOOL_CALL_TIMEOUT_MS}). If the executor does not
3079
+ * settle within this window the bridge answers the model itself so
3080
+ * the call cannot be wedged by a hung tool.
3081
+ */
3082
+ maxToolCallMs?: number;
3083
+ /** Sink for transcript / lifecycle entries worth persisting on the mission. */
3084
+ onTranscript?: (entry: RealtimeBridgeTranscriptEntry) => void;
3085
+ /**
3086
+ * Called exactly once when the bridge has fully ended. `pendingToolCalls`
3087
+ * is how many tool calls were still in flight at teardown — non-zero
3088
+ * means the call dropped mid-tool (e.g. an unanswered `ask_operator`),
3089
+ * which is the signal the API layer uses to arm callback-on-disconnect
3090
+ * (plan §7).
3091
+ */
3092
+ onEnd?: (summary: {
3093
+ reason: string;
3094
+ pendingToolCalls: number;
3095
+ }) => void;
3096
+ }
3097
+ /**
3098
+ * Bridges a phone carrier's realtime-media connection to an OpenAI
3099
+ * Realtime connection. Provider-pluggable: the carrier wire protocol
3100
+ * (46elks vs Twilio Media Streams) is isolated in a
3101
+ * {@link RealtimeTransportAdapter}; the conversation logic here is
3102
+ * provider-neutral. The caller pumps raw messages in via
3103
+ * `handleCarrierMessage` / `handleOpenAIMessage` and connection
3104
+ * lifecycle via `handle*Open` / `handle*Close`. Every public method is
3105
+ * safe to call after the bridge has ended (they become no-ops).
3106
+ *
3107
+ * The legacy `handleElks*` method names remain as thin aliases for the
3108
+ * `handleCarrier*` methods so existing 46elks call sites and tests do
3109
+ * not break.
3110
+ */
3111
+ declare class RealtimeVoiceBridge {
3112
+ private readonly carrier;
3113
+ private readonly openai;
3114
+ private readonly sessionConfig;
3115
+ private readonly transport;
3116
+ private readonly maxAudioFrameBase64;
3117
+ private readonly toolExecutor?;
3118
+ private readonly maxToolCallMs;
3119
+ private readonly onTranscript?;
3120
+ private readonly onEnd?;
3121
+ /** Carrier `hello`/`start` received — the call leg is live. */
3122
+ private helloSeen;
3123
+ /** OpenAI socket open + `session.update` sent. */
3124
+ private openaiReady;
3125
+ /** Bridge has ended — all further input is ignored. */
3126
+ private ended;
3127
+ /** Carrier call id from the `hello` event (46elks `callid` / Twilio `callSid`). */
3128
+ private callId;
3129
+ /** Audio frames received before OpenAI was ready, flushed on open. */
3130
+ private readonly pendingAudio;
3131
+ /** Oversized-frame counter — reported once, not per frame. */
3132
+ private droppedFrames;
3133
+ private droppedFramesReported;
3134
+ /** Accumulated assistant speech transcript for the current response. */
3135
+ private assistantTranscript;
3136
+ /**
3137
+ * Function-call name keyed by `call_id`, captured from
3138
+ * `response.output_item.added`. The later `*.arguments.done` event is
3139
+ * not guaranteed to echo the tool name, so we remember it here.
3140
+ */
3141
+ private readonly toolCallNames;
3142
+ /** `call_id`s whose tool call is currently executing. */
3143
+ private readonly inFlightToolCalls;
3144
+ constructor(opts: RealtimeVoiceBridgeOptions);
3145
+ /** True once the bridge has ended. */
3146
+ get isEnded(): boolean;
3147
+ /** The carrier call id, once the `hello`/`start` event has been seen. */
3148
+ get currentCallId(): string;
3149
+ /** The carrier transport provider this bridge is running for. */
3150
+ get provider(): string;
3151
+ /** How many tool calls are executing right now. */
3152
+ get pendingToolCalls(): number;
3153
+ /** Call when the OpenAI socket opens — sends `session.update`. */
3154
+ handleOpenAIOpen(): void;
3155
+ /** Call when the OpenAI socket closes. */
3156
+ handleOpenAIClose(): void;
3157
+ /** Call when the OpenAI socket errors. */
3158
+ handleOpenAIError(err: unknown): void;
3159
+ /**
3160
+ * Call when the carrier media socket closes. The `onEnd` reason is
3161
+ * `<prefix>-closed`, where the prefix comes from the transport adapter
3162
+ * (`elks` for 46elks, `twilio` for Twilio) — so historical 46elks
3163
+ * reason strings (`elks-closed`) are preserved.
3164
+ */
3165
+ handleCarrierClose(): void;
3166
+ /** Call when the carrier media socket errors. */
3167
+ handleCarrierError(err: unknown): void;
3168
+ /** @deprecated 46elks-era alias for {@link handleCarrierClose}. */
3169
+ handleElksClose(): void;
3170
+ /** @deprecated 46elks-era alias for {@link handleCarrierError}. */
3171
+ handleElksError(err: unknown): void;
3172
+ /**
3173
+ * Feed one raw message from the carrier media socket. Accepts a JSON
3174
+ * string or an already-parsed object. The transport adapter
3175
+ * normalises the provider-specific frame; malformed frames throw out
3176
+ * of the adapter and are ignored here (the bridge is never torn down
3177
+ * for one bad frame).
3178
+ */
3179
+ handleCarrierMessage(raw: string | Record<string, unknown>): void;
3180
+ /** @deprecated 46elks-era alias for {@link handleCarrierMessage}. */
3181
+ handleElksMessage(raw: string | Record<string, unknown>): void;
3182
+ /** Relay caller audio to OpenAI, enforcing the per-frame size cap. */
3183
+ private forwardInboundAudio;
3184
+ /**
3185
+ * Feed one raw message from the OpenAI Realtime socket. Accepts a
3186
+ * JSON string or an already-parsed object. Unknown event types are
3187
+ * ignored.
3188
+ */
3189
+ handleOpenAIMessage(raw: string | Record<string, unknown>): void;
3190
+ /** Relay synthesised agent audio to the carrier, enforcing the size cap. */
3191
+ private forwardOutboundAudio;
3192
+ /**
3193
+ * Parse a `response.function_call_arguments.done` event and dispatch
3194
+ * the tool call. Resolves `name` from the event or the map captured
3195
+ * on `response.output_item.added`; parses `arguments` (a JSON string)
3196
+ * defensively. Always answers the model — an unknown name, missing
3197
+ * executor, or oversized fan-out each gets a model-readable output
3198
+ * rather than being dropped (a dropped `call_id` wedges the model,
3199
+ * which waits forever for its `function_call_output`).
3200
+ */
3201
+ private dispatchToolCall;
3202
+ /** Execute one tool call, racing the executor against the safety-net timeout. */
3203
+ private runToolCall;
3204
+ /**
3205
+ * Send a tool result back to OpenAI: a `function_call_output`
3206
+ * conversation item, then `response.create` so the model resumes
3207
+ * speaking with the result in hand.
3208
+ *
3209
+ * > `conversation.item.create` with `{ type: 'function_call_output',
3210
+ * > call_id, output }` followed by `response.create` is the OpenAI
3211
+ * > Realtime function-calling return path per the plan §3. Verify
3212
+ * > against current OpenAI docs before the live smoke test.
3213
+ */
3214
+ private answerToolCall;
3215
+ /**
3216
+ * End the bridge. Idempotent — the first call wins, later calls are
3217
+ * no-ops. Sends the carrier's end-of-call frame (if it has one — 46elks
3218
+ * `bye`; Twilio has none), closes both ports, fires `onEnd`.
3219
+ */
3220
+ end(reason: string): void;
3221
+ private noteDroppedFrame;
3222
+ private emitTranscript;
3223
+ private safeSend;
3224
+ }
3225
+
1740
3226
  declare const PHONE_REGION_SCOPES: readonly ["AT", "DE", "EU", "WORLD"];
1741
3227
  type PhoneRegionScope = typeof PHONE_REGION_SCOPES[number];
1742
3228
  declare const TELEPHONY_TRANSPORT_CAPABILITIES: readonly ["sms", "call_control", "realtime_media", "recording_supported"];
@@ -1839,7 +3325,9 @@ declare function validatePhoneMissionStart(input: unknown, transport: unknown, o
1839
3325
  allowPremiumOrSpecialNumbers?: boolean;
1840
3326
  }): PhoneMissionStartValidationResult;
1841
3327
 
1842
- type PhoneTransportProvider = '46elks';
3328
+ type PhoneTransportProvider = '46elks' | 'twilio';
3329
+ /** Providers that support starting outbound call-control missions. */
3330
+ declare const PHONE_CALL_CONTROL_PROVIDERS: readonly PhoneTransportProvider[];
1843
3331
  /**
1844
3332
  * Abuse / cost controls for the call-control surface. A phone mission
1845
3333
  * places a real, billed outbound call, so /calls/start needs hard limits
@@ -1881,6 +3369,29 @@ interface PhoneMissionTranscriptEntry {
1881
3369
  text: string;
1882
3370
  metadata?: Record<string, unknown>;
1883
3371
  }
3372
+ type OperatorQueryUrgency = 'normal' | 'high';
3373
+ /**
3374
+ * A question the voice agent put to its human operator mid-call via the
3375
+ * `ask_operator` tool (plan §4 / §5). Persisted on the mission under
3376
+ * `metadata.operatorQueries[]` and exposed by the operator-query API
3377
+ * endpoints. The bridge's `ask_operator` tool blocks polling this
3378
+ * record until `answer` is set or the hard timeout elapses.
3379
+ */
3380
+ interface PhoneOperatorQuery {
3381
+ /** `oq_<uuid>` — unique across all missions. */
3382
+ id: string;
3383
+ /** The question, sanitised + length-bounded. */
3384
+ question: string;
3385
+ /** One-line context on the call, if the agent supplied it. */
3386
+ callContext?: string;
3387
+ urgency: OperatorQueryUrgency;
3388
+ askedAt: string;
3389
+ /** The operator's answer — set once, never overwritten (idempotent). */
3390
+ answer?: string;
3391
+ answeredAt?: string;
3392
+ /** Channel the answer arrived on (e.g. `api`, `email`). */
3393
+ answeredVia?: string;
3394
+ }
1884
3395
  interface PhoneCallMission {
1885
3396
  id: string;
1886
3397
  agentId: string;
@@ -1951,6 +3462,52 @@ declare class PhoneManager {
1951
3462
  private authenticateWebhook;
1952
3463
  handleVoiceStartWebhook(missionId: string, providedToken: string, payload?: Record<string, unknown>): PhoneWebhookResult;
1953
3464
  handleHangupWebhook(missionId: string, providedToken: string, payload?: Record<string, unknown>): PhoneCallMission;
3465
+ /**
3466
+ * Handle Twilio's voice webhook — the `Url` Twilio fetches when the
3467
+ * outbound call connects. The mirror of {@link handleVoiceStartWebhook}
3468
+ * for Twilio: it authenticates the per-mission token, transitions the
3469
+ * mission to `connected`, and returns the TwiML to send back.
3470
+ *
3471
+ * `twiml` is a `<Connect><Stream>` document that wires the call's
3472
+ * audio to the realtime voice WebSocket — the same realtime path the
3473
+ * 46elks websocket-number uses. The route serves it with
3474
+ * `Content-Type: text/xml`.
3475
+ *
3476
+ * Like the 46elks handler this is terminal-state-guarded (#43-H5,
3477
+ * a late/replayed webhook cannot resurrect a finished mission) and
3478
+ * idempotent (a duplicate is acknowledged with the same TwiML but
3479
+ * changes nothing).
3480
+ */
3481
+ handleTwilioVoiceWebhook(missionId: string, providedToken: string, payload?: Record<string, unknown>): {
3482
+ mission: PhoneCallMission;
3483
+ twiml: string;
3484
+ };
3485
+ /**
3486
+ * Handle Twilio's status callback — the `StatusCallback` Twilio POSTs
3487
+ * with the terminal call status. The mirror of
3488
+ * {@link handleHangupWebhook} for Twilio. Idempotent + terminal-state
3489
+ * guarded; records the reported `CallDuration` and accumulates cost
3490
+ * from `Price` when Twilio supplied it (Twilio reports the final
3491
+ * price asynchronously, so it may be absent on the first callback —
3492
+ * the duration ceiling / rate limit / concurrency cap remain the
3493
+ * preventive cost controls, #43-H2).
3494
+ */
3495
+ handleTwilioStatusWebhook(missionId: string, providedToken: string, payload?: Record<string, unknown>): PhoneCallMission;
3496
+ /**
3497
+ * Build the TwiML for the Twilio voice webhook — a `<Connect><Stream>`
3498
+ * pointing at the realtime voice WebSocket. The `<Stream>` URL is
3499
+ * derived from `webhookBaseUrl` (https → wss); the per-mission token
3500
+ * (#43-H7) rides as both a `<Parameter>` and a query param so the
3501
+ * media socket can be matched to its mission.
3502
+ */
3503
+ private buildTwilioVoiceTwiML;
3504
+ /**
3505
+ * Read the call cost off a Twilio status callback (`Price`, a
3506
+ * negative or string number), add it to the mission's running total,
3507
+ * and flag a policy-cap breach (#43-H2). Twilio prices are reported
3508
+ * as a negative amount (a debit); we use the absolute value.
3509
+ */
3510
+ private buildTwilioCostMetadataPatch;
1954
3511
  /**
1955
3512
  * Read the call cost off a 46elks hangup payload, add it to the
1956
3513
  * mission's running total, and flag a policy-cap breach (#43-H2).
@@ -1961,7 +3518,117 @@ declare class PhoneManager {
1961
3518
  private buildCostMetadataPatch;
1962
3519
  private buildVoiceStartAction;
1963
3520
  cancelMission(agentId: string, missionId: string): PhoneCallMission;
3521
+ /**
3522
+ * Resolve a mission by the provider's call id (the 46elks `callid`).
3523
+ * The realtime voice bridge uses this to match an inbound 46elks
3524
+ * realtime-media WebSocket — whose `hello` frame carries `callid` —
3525
+ * back to the mission that placed the call, so the right agent's
3526
+ * memory and task can be loaded into the OpenAI Realtime session.
3527
+ */
3528
+ findMissionByProviderCallId(providerCallId: string, agentId?: string): PhoneCallMission | null;
3529
+ /**
3530
+ * Append transcript entries produced by the realtime voice bridge and
3531
+ * optionally transition the mission status. A mission already in a
3532
+ * terminal state keeps that state — a late bridge event must not
3533
+ * resurrect a completed/failed/cancelled mission (mirrors the
3534
+ * terminal-state guard on the webhook handlers). No-op if the mission
3535
+ * no longer exists.
3536
+ */
3537
+ recordRealtimeActivity(missionId: string, entries: PhoneMissionTranscriptEntry[], status?: PhoneMissionState): PhoneCallMission | null;
3538
+ /**
3539
+ * Record an operator query against a mission — the first step of the
3540
+ * `ask_operator` tool (plan §4). Returns the persisted query; the
3541
+ * bridge then polls {@link getOperatorQuery} for an answer. Throws on
3542
+ * an unknown mission or an empty question.
3543
+ */
3544
+ addOperatorQuery(missionId: string, input: {
3545
+ question: string;
3546
+ callContext?: string;
3547
+ urgency?: string;
3548
+ }): {
3549
+ mission: PhoneCallMission;
3550
+ query: PhoneOperatorQuery;
3551
+ };
3552
+ /** List the operator queries recorded on a mission. */
3553
+ listOperatorQueries(missionId: string, agentId?: string): PhoneOperatorQuery[];
3554
+ /** Read one operator query, or null if the mission/query is unknown. */
3555
+ getOperatorQuery(missionId: string, queryId: string, agentId?: string): PhoneOperatorQuery | null;
3556
+ /**
3557
+ * Resolve a mission + query by the query id alone — used by the
3558
+ * inbound email-reply hook, which only has the id parsed out of the
3559
+ * reply subject. A LIKE prefilter (id escaped so its `_`/`-` are
3560
+ * literal) narrows the scan; the match is then verified exactly.
3561
+ */
3562
+ findMissionByOperatorQueryId(queryId: string): {
3563
+ mission: PhoneCallMission;
3564
+ query: PhoneOperatorQuery;
3565
+ } | null;
3566
+ /**
3567
+ * Record the operator's answer to a query. Idempotent — the first
3568
+ * answer wins; a later answer for the same query returns the existing
3569
+ * record unchanged with `alreadyAnswered: true`, so a duplicate
3570
+ * (e.g. an email reply AND an API POST) cannot fight. Returns null if
3571
+ * the mission/query is unknown; throws on an empty answer.
3572
+ */
3573
+ answerOperatorQuery(missionId: string, queryId: string, answer: string, options?: {
3574
+ via?: string;
3575
+ agentId?: string;
3576
+ }): {
3577
+ mission: PhoneCallMission;
3578
+ query: PhoneOperatorQuery;
3579
+ alreadyAnswered: boolean;
3580
+ } | null;
3581
+ /**
3582
+ * Flag a mission for callback-on-disconnect: the call dropped while
3583
+ * an operator query was still unanswered, so once the operator
3584
+ * answers the API should dial the caller back. Returns the mission
3585
+ * unchanged (not flagged) if every query is already answered; null if
3586
+ * the mission is unknown.
3587
+ */
3588
+ flagCallbackPending(missionId: string): PhoneCallMission | null;
3589
+ /** Missions currently flagged for callback-on-disconnect. */
3590
+ findCallbackPendingMissions(agentId?: string): PhoneCallMission[];
3591
+ /**
3592
+ * Trigger a callback (plan §7) when a callback-pending mission now has
3593
+ * an answered query: re-dial the same number with a continuation task
3594
+ * carrying the operator's answer. Returns the (updated) original
3595
+ * mission + the new callback mission, or null if no callback is due.
3596
+ *
3597
+ * `callbackPending` is cleared BEFORE dialing so a concurrent second
3598
+ * answer cannot double-dial; if the dial throws it is restored so the
3599
+ * callback is not silently lost, and the error is rethrown.
3600
+ */
3601
+ triggerCallback(missionId: string, options?: StartPhoneCallOptions): Promise<{
3602
+ mission: PhoneCallMission;
3603
+ callbackMission: PhoneCallMission;
3604
+ } | null>;
1964
3605
  private build46ElksCallRequest;
3606
+ /**
3607
+ * Build the Twilio outbound-call request — the mirror of
3608
+ * {@link build46ElksCallRequest} for Twilio's Calls.json endpoint:
3609
+ *
3610
+ * POST https://api.twilio.com/2010-04-01/Accounts/{AccountSid}/Calls.json
3611
+ *
3612
+ * with an `application/x-www-form-urlencoded` body. `From`/`To` are
3613
+ * the numbers; `Url` is a TwiML webhook Twilio fetches when the call
3614
+ * connects — it points at our voice-start webhook, which returns the
3615
+ * `<Connect><Stream>` TwiML that wires the call's audio to the
3616
+ * realtime voice WebSocket. `StatusCallback` is Twilio's hangup-
3617
+ * equivalent — fired with the final call status (the analogue of the
3618
+ * 46elks `whenhangup`). `TimeLimit` caps the call duration, re-clamped
3619
+ * to the server ceiling (#43-H6) exactly as the 46elks `timeout` is.
3620
+ *
3621
+ * Both webhook URLs carry the per-mission HMAC token (#43-H7), never
3622
+ * the raw `webhookSecret`. The Twilio `AccountSid` is `config.username`
3623
+ * and the `AuthToken` is `config.password` (HTTP Basic on the request,
3624
+ * and the key Twilio signs `X-Twilio-Signature` with).
3625
+ *
3626
+ * > The Calls.json endpoint path, the `From`/`To`/`Url`/
3627
+ * > `StatusCallback`/`TimeLimit` body fields, and the `<Connect>
3628
+ * > <Stream>` TwiML are per Twilio's public Programmable Voice docs;
3629
+ * > verify against current docs before the live smoke-test.
3630
+ */
3631
+ private buildTwilioCallRequest;
1965
3632
  private insertMission;
1966
3633
  private updateProviderCall;
1967
3634
  private updateMissionStatus;
@@ -1971,6 +3638,10 @@ declare function buildPhoneTransportConfig(input: {
1971
3638
  phoneNumber?: unknown;
1972
3639
  username?: unknown;
1973
3640
  password?: unknown;
3641
+ /** Twilio alias for {@link username} — the account SID. */
3642
+ accountSid?: unknown;
3643
+ /** Twilio alias for {@link password} — the account auth token. */
3644
+ authToken?: unknown;
1974
3645
  webhookBaseUrl?: unknown;
1975
3646
  webhookSecret?: unknown;
1976
3647
  apiUrl?: unknown;
@@ -2772,6 +4443,333 @@ declare class SetupManager {
2772
4443
  isInitialized(): boolean;
2773
4444
  }
2774
4445
 
4446
+ /** External binary a media operation depends on. */
4447
+ type MediaBinary = 'ffmpeg' | 'ffprobe' | 'imagemagick' | 'whisper' | 'python' | 'edge-tts';
4448
+ /**
4449
+ * Detection result for one external binary. `available: false` carries
4450
+ * an actionable `installHint` the agent can relay to the operator.
4451
+ */
4452
+ interface MediaCapability {
4453
+ /** Binary identifier. */
4454
+ binary: MediaBinary;
4455
+ /** Whether the binary was found and is runnable. */
4456
+ available: boolean;
4457
+ /** Version string when detectable. */
4458
+ version?: string;
4459
+ /** Resolved path / command used to invoke it. */
4460
+ command?: string;
4461
+ /** Human-readable description of what it powers. */
4462
+ description: string;
4463
+ /** Install instructions, present when `available` is false. */
4464
+ installHint?: string;
4465
+ }
4466
+ /** Aggregate media capability report — one entry per binary. */
4467
+ interface MediaCapabilityReport {
4468
+ /** Per-binary detection results. */
4469
+ capabilities: MediaCapability[];
4470
+ /** True when at least ffmpeg + ffprobe are present (the baseline). */
4471
+ ready: boolean;
4472
+ /** When the report was generated. */
4473
+ checkedAt: string;
4474
+ }
4475
+ /** Result envelope for an operation that produced an output file. */
4476
+ interface MediaFileResult {
4477
+ ok: true;
4478
+ /** Absolute path of the produced file. */
4479
+ filePath: string;
4480
+ /** Size of the produced file in bytes. */
4481
+ sizeBytes: number;
4482
+ /** Output container/format, when meaningful. */
4483
+ format?: string;
4484
+ /** Operation-specific extra fields. */
4485
+ [key: string]: unknown;
4486
+ }
4487
+ interface TtsGenerateOptions {
4488
+ /** Text to synthesise. */
4489
+ text: string;
4490
+ /** Voice preset name or a full Edge voice id. */
4491
+ voice?: string;
4492
+ /** Speaking rate, e.g. "+20%" / "-10%". */
4493
+ rate?: string;
4494
+ /** Pitch shift, e.g. "+5Hz" / "-10Hz". */
4495
+ pitch?: string;
4496
+ }
4497
+ type ImageAction = 'resize' | 'crop' | 'rotate' | 'convert' | 'compress' | 'text_overlay' | 'flip' | 'blur' | 'sharpen' | 'grayscale';
4498
+ interface ImageEditOptions {
4499
+ /** Absolute path to the input image. */
4500
+ input: string;
4501
+ /** Edit action to perform. */
4502
+ action: ImageAction;
4503
+ width?: number;
4504
+ height?: number;
4505
+ angle?: number;
4506
+ format?: string;
4507
+ quality?: number;
4508
+ text?: string;
4509
+ position?: string;
4510
+ fontSize?: number;
4511
+ fontColor?: string;
4512
+ blurRadius?: number;
4513
+ direction?: 'horizontal' | 'vertical';
4514
+ offsetX?: number;
4515
+ offsetY?: number;
4516
+ }
4517
+ type VideoAction = 'trim' | 'extract_frame' | 'extract_frames' | 'convert' | 'gif' | 'compress' | 'resize' | 'add_audio' | 'remove_audio' | 'speed' | 'color_grade' | 'transition' | 'text_overlay' | 'picture_in_picture' | 'split_screen' | 'ken_burns' | 'slow_motion' | 'watermark' | 'concatenate' | 'audio_mix' | 'auto_caption';
4518
+ interface VideoEditOptions {
4519
+ /** Absolute path to the input video (or image for ken_burns). */
4520
+ input: string;
4521
+ /** Edit action to perform. */
4522
+ action: VideoAction;
4523
+ start?: string;
4524
+ end?: string;
4525
+ duration?: string;
4526
+ timestamp?: string;
4527
+ interval?: number;
4528
+ format?: string;
4529
+ width?: number;
4530
+ height?: number;
4531
+ fps?: number;
4532
+ crf?: number;
4533
+ audioPath?: string;
4534
+ speedFactor?: number;
4535
+ secondInput?: string;
4536
+ transitionType?: string;
4537
+ transitionDuration?: number;
4538
+ text?: string;
4539
+ fontSize?: number;
4540
+ fontColor?: string;
4541
+ textPosition?: string;
4542
+ textBg?: string;
4543
+ textStart?: string;
4544
+ textEnd?: string;
4545
+ overlayOpacity?: number;
4546
+ overlayScale?: number;
4547
+ watermarkPosition?: string;
4548
+ watermarkPath?: string;
4549
+ pipWidth?: number;
4550
+ pipPosition?: string;
4551
+ splitDirection?: 'horizontal' | 'vertical';
4552
+ zoomDirection?: string;
4553
+ zoomDuration?: number;
4554
+ zoomFactor?: number;
4555
+ files?: string[];
4556
+ bgVolume?: string;
4557
+ fgVolume?: string;
4558
+ colorPreset?: string;
4559
+ lutPath?: string;
4560
+ captionColor?: string;
4561
+ captionFontSize?: number;
4562
+ /** Path to a whisper.cpp model file (.bin) — required for auto_caption. */
4563
+ whisperModel?: string;
4564
+ }
4565
+ type AudioAction = 'trim' | 'convert' | 'merge' | 'volume' | 'speed' | 'extract' | 'reverse' | 'fade';
4566
+ interface AudioEditOptions {
4567
+ /** Absolute path to the input audio (or video for `extract`). */
4568
+ input?: string;
4569
+ /** Edit action to perform. */
4570
+ action: AudioAction;
4571
+ start?: string;
4572
+ end?: string;
4573
+ duration?: string;
4574
+ format?: string;
4575
+ files?: string[];
4576
+ volume?: string;
4577
+ speedFactor?: number;
4578
+ fadeType?: 'in' | 'out' | 'both';
4579
+ fadeDuration?: number;
4580
+ }
4581
+ interface MediaStreamInfo {
4582
+ type?: string;
4583
+ codec?: string;
4584
+ width?: number;
4585
+ height?: number;
4586
+ duration?: string;
4587
+ bitRate?: string;
4588
+ sampleRate?: string;
4589
+ channels?: number;
4590
+ fps?: string;
4591
+ }
4592
+ interface MediaInfoResult {
4593
+ ok: true;
4594
+ file: string;
4595
+ format?: string;
4596
+ duration?: string;
4597
+ sizeBytes: number;
4598
+ bitRate?: string;
4599
+ streams: MediaStreamInfo[];
4600
+ }
4601
+ interface VideoUnderstandOptions {
4602
+ /** Absolute path to the input video. */
4603
+ input: string;
4604
+ /** Seconds between extracted frames (default 3). */
4605
+ frameInterval?: number;
4606
+ /** Maximum number of frames to extract (default 30). */
4607
+ maxFrames?: number;
4608
+ /** Path to a whisper.cpp model file (.bin) — enables transcription. */
4609
+ whisperModel?: string;
4610
+ }
4611
+ interface VideoTimelineEntry {
4612
+ timeSeconds: number;
4613
+ timeDisplay: string;
4614
+ framePath: string;
4615
+ spokenText: string;
4616
+ }
4617
+ interface VideoUnderstandResult {
4618
+ ok: true;
4619
+ video: string;
4620
+ duration: number;
4621
+ resolution: string;
4622
+ totalFramesExtracted: number;
4623
+ transcriptSegments: number;
4624
+ timeline: VideoTimelineEntry[];
4625
+ frameDir: string;
4626
+ hint: string;
4627
+ }
4628
+ interface VoiceCloneOptions {
4629
+ /** Text to speak in the cloned voice. */
4630
+ text: string;
4631
+ /** Absolute path to the reference audio sample (required). */
4632
+ refAudio: string;
4633
+ /** Transcript of the reference audio (required). */
4634
+ refText: string;
4635
+ /** Optional path to the Python interpreter with F5-TTS installed. */
4636
+ pythonBin?: string;
4637
+ /** Compute device passed to F5-TTS (default 'cpu'). */
4638
+ device?: string;
4639
+ }
4640
+
4641
+ interface MediaManagerOptions {
4642
+ /**
4643
+ * Directory media output files are written to. Created on first use.
4644
+ * Defaults to `<dataDir>/media` when a dataDir is given, otherwise to
4645
+ * `<os-tmp>/agenticmail-media`.
4646
+ */
4647
+ outputDir?: string;
4648
+ /** AgenticMail data directory — used to derive a default outputDir. */
4649
+ dataDir?: string;
4650
+ }
4651
+ declare class MediaManager {
4652
+ private readonly outputDir;
4653
+ constructor(options?: MediaManagerOptions);
4654
+ /** Ensure the output directory exists; returns it. */
4655
+ private ensureOutputDir;
4656
+ /** Build an output path inside the managed output dir. */
4657
+ private outPath;
4658
+ /** Build a sub-directory inside the managed output dir. */
4659
+ private outDir;
4660
+ /** Stat a produced file into a {@link MediaFileResult} envelope. */
4661
+ private fileResult;
4662
+ /** Run ffmpeg with an argument array. */
4663
+ private ffmpeg;
4664
+ /** Run ImageMagick with an argument array (handles magick/convert). */
4665
+ private magick;
4666
+ /** Run an `identify`-style probe via the ImageMagick binary. */
4667
+ private magickIdentify;
4668
+ /** Probe a media file with ffprobe, returning parsed JSON. */
4669
+ private ffprobe;
4670
+ /** Return the media binary capability report (graceful-degradation surface). */
4671
+ capabilities(opts?: {
4672
+ force?: boolean;
4673
+ }): MediaCapabilityReport;
4674
+ /** List the built-in Edge TTS voice presets. */
4675
+ listVoices(): {
4676
+ presets: Array<{
4677
+ name: string;
4678
+ full: string;
4679
+ }>;
4680
+ default: string;
4681
+ };
4682
+ /**
4683
+ * Synthesise speech with Edge TTS. node-edge-tts is an optional peer
4684
+ * dependency — when it is absent this throws a clear, actionable
4685
+ * error instead of crashing. The MP3 is transcoded to OGG/Opus when
4686
+ * ffmpeg is available (so it can be sent as a voice note); otherwise
4687
+ * the raw MP3 is returned.
4688
+ */
4689
+ ttsGenerate(opts: TtsGenerateOptions): Promise<MediaFileResult>;
4690
+ /** Edit an image with ImageMagick. */
4691
+ imageEdit(opts: ImageEditOptions): Promise<MediaFileResult>;
4692
+ /** Edit audio with ffmpeg. */
4693
+ audioEdit(opts: AudioEditOptions): Promise<MediaFileResult>;
4694
+ /** Probe a media file's metadata with ffprobe. */
4695
+ mediaInfo(input: string): Promise<MediaInfoResult>;
4696
+ /** Edit a video with ffmpeg (+ ImageMagick for caption rendering). */
4697
+ videoEdit(opts: VideoEditOptions): Promise<MediaFileResult>;
4698
+ private videoColorGrade;
4699
+ private videoTransition;
4700
+ private videoTextOverlay;
4701
+ private videoPictureInPicture;
4702
+ private videoSplitScreen;
4703
+ private videoKenBurns;
4704
+ private videoSlowMotion;
4705
+ private videoWatermark;
4706
+ private videoConcatenate;
4707
+ private videoAudioMix;
4708
+ /**
4709
+ * Burn dynamic word-chunked captions onto a video. Needs ffmpeg,
4710
+ * ImageMagick, and whisper.cpp (with a model file). Mirrors the
4711
+ * source MCP's CapCut-style caption renderer.
4712
+ */
4713
+ private videoAutoCaption;
4714
+ /**
4715
+ * Analyse a video — extract frames at intervals and (when a whisper
4716
+ * model is given) transcribe the audio — and return a merged
4717
+ * timeline of what is shown and said.
4718
+ */
4719
+ videoUnderstand(opts: VideoUnderstandOptions): Promise<VideoUnderstandResult>;
4720
+ /**
4721
+ * Synthesise speech in a reference voice with F5-TTS. Needs a Python
4722
+ * interpreter that has the `f5-tts` and `soundfile` packages. The
4723
+ * reference audio + transcript MUST be supplied by the caller — no
4724
+ * built-in voice profile. The Python is run via execFile with an
4725
+ * argument array; the script and its inputs are written to a temp
4726
+ * file and passed by path, so no caller value is interpolated into a
4727
+ * command line.
4728
+ */
4729
+ voiceClone(opts: VoiceCloneOptions): Promise<MediaFileResult>;
4730
+ /** Parse a whisper-produced SRT (located by stem) into timed segments. */
4731
+ private parseSrt;
4732
+ /** Unlink a file, swallowing any error (cleanup best-effort). */
4733
+ private tryUnlink;
4734
+ /** Remove the SRT(s) produced for a given stem. */
4735
+ private tryUnlinkSrt;
4736
+ /** Recursively remove a directory, swallowing errors. */
4737
+ private tryRmDir;
4738
+ }
4739
+
4740
+ /**
4741
+ * Detect whether a single binary is available. Result is cached; pass
4742
+ * `{ force: true }` to re-probe (e.g. after the operator installs it).
4743
+ */
4744
+ declare function detectBinary(binary: MediaBinary, opts?: {
4745
+ force?: boolean;
4746
+ }): MediaCapability;
4747
+ /**
4748
+ * Resolve the command name to invoke for a binary. Throws a clear,
4749
+ * actionable error if the binary is not available — callers use this
4750
+ * at the top of every media operation so a missing dependency fails
4751
+ * fast with an install hint instead of an opaque ENOENT deep inside
4752
+ * an execFile call.
4753
+ */
4754
+ declare function requireBinary(binary: MediaBinary): string;
4755
+ /**
4756
+ * Validate that a whisper.cpp model file exists on disk. whisper.cpp
4757
+ * needs an explicit model path; this surfaces a clear error rather
4758
+ * than letting the CLI fail cryptically.
4759
+ */
4760
+ declare function requireWhisperModel(modelPath: string | undefined): string;
4761
+ /**
4762
+ * Build the full media capability report — one entry per binary, plus
4763
+ * a `ready` flag (true once ffmpeg + ffprobe, the baseline, are both
4764
+ * present). Used by the health surface and the `media_capabilities`
4765
+ * tool so an agent can see what is available before attempting an op.
4766
+ */
4767
+ declare function getMediaCapabilities(opts?: {
4768
+ force?: boolean;
4769
+ }): MediaCapabilityReport;
4770
+ /** Clear the detection cache — used by tests and after a re-install. */
4771
+ declare function clearMediaCapabilityCache(): void;
4772
+
2775
4773
  /**
2776
4774
  * Stable thread-id derivation.
2777
4775
  *
@@ -3231,4 +5229,4 @@ declare class MemorySearchIndex {
3231
5229
  has(id: string): boolean;
3232
5230
  }
3233
5231
 
3234
- export { AGENT_ROLES, AccountManager, type AddressInfo, type Agent, AgentDeletionService, type AgentMemoryEntry, type AgentMemoryFields, AgentMemoryManager, type AgentMemoryOptions, type AgentMemoryRead, AgentMemoryStore, type AgentRole, AgenticMailClient, type AgenticMailClientOptions, type AgenticMailConfig, type ArchiveAndDeleteOptions, type ArchivedEmail, type Attachment, type AttachmentAdvisory, BRIDGE_OPERATOR_LIVE_WINDOW_MS, type BridgeMailContext, type BridgeWakeError, type BridgeWakePromptArgs, type BridgeWakeResult, type BridgeWakeRoute, type CachedMessage, CloudflareClient, type CreateAgentOptions, type CreateMemoryInput, DEFAULT_AGENT_NAME, DEFAULT_AGENT_ROLE, DEFAULT_SESSION_MAX_AGE_MS, DNSConfigurator, type Database, type DeletionReport, type DeletionSummary, DependencyChecker, DependencyInstaller, type DependencyStatus, type DnsRecord, type DnsSetupResult, type DomainInfo, DomainManager, type DomainModeConfig, type DomainPurchaseResult, DomainPurchaser, type DomainSearchResult, type DomainSetupResult, ELKS_REALTIME_AUDIO_FORMATS, type ElksRealtimeAudioFormat, type ElksRealtimeAudioMessage, type ElksRealtimeByeMessage, type ElksRealtimeHelloMessage, type ElksRealtimeInboundMessage, type ElksRealtimeOutboundMessage, type EmailEnvelope, type EmailRouteAction, type EmailRouteClass, type EmailRouteClassification, type EmailRouteInput, EmailSearchIndex, type FolderInfo, type GatewayConfig, GatewayManager, type GatewayManagerOptions, type GatewayMode, type GatewayStatus, type HostName, type HostSession, type HostSessionResumeMode, type InboundEmail, type InboundSmsEvent, type InboxEvent, type InboxExpungeEvent, type InboxFlagsEvent, type InboxNewEvent, InboxWatcher, type InboxWatcherOptions, type InstallProgress, type LinkAdvisory, type LocalSmtpConfig, MEMORY_CATEGORIES, MailReceiver, type MailReceiverOptions, MailSender, type MailSenderOptions, type MailboxInfo, type MemoryCategory, type MemoryImportance, type MemoryQueryOptions, MemorySearchIndex, type MemorySource, type MemoryStats, type OpenClawPhoneMissionPolicy, type OutboundCategory, type OutboundScanInput, type OutboundScanResult, type OutboundWarning, PHONE_MAX_CONCURRENT_MISSIONS, PHONE_MIN_WEBHOOK_SECRET_LENGTH, PHONE_MISSION_STATES, PHONE_RATE_LIMIT_PER_HOUR, PHONE_RATE_LIMIT_PER_MINUTE, PHONE_REGION_SCOPES, PHONE_SERVER_MAX_ATTEMPTS, PHONE_SERVER_MAX_CALL_DURATION_SECONDS, PHONE_SERVER_MAX_COST_PER_MISSION, PHONE_TASK_MAX_LENGTH, type ParsedAttachment, type ParsedEmail, type ParsedSms, PathTraversalError, type PhoneAlternativePolicy, type PhoneCallMission, type PhoneConfirmPolicy, PhoneManager, type PhoneMissionStartValidationResult, type PhoneMissionState, type PhoneMissionTranscriptEntry, type PhoneMissionValidationIssue, type PhoneMissionValidationResult, type PhoneNumberRisk, PhoneRateLimitError, type PhoneRegionScope, type PhoneTransportConfig, type PhoneTransportProfile, type PhoneTransportProvider, type PhoneTransportValidationResult, PhoneWebhookAuthError, type PhoneWebhookResult, type PlanBridgeWakeArgs, type PurchasedDomain, REDACTED, RELAY_PRESETS, RelayBridge, type RelayBridgeOptions, type RelayConfig, RelayGateway, type RelayProvider, type RelaySearchResult, type ResumeErrorClassificationOptions, SPAM_THRESHOLD, type SafeJoinOptions, type SanitizeDetection, type SanitizeResult, type SearchCriteria, type SearchableEmail, type SecurityAdvisory, type SendMailOptions, type SendResult, type SendResultWithRaw, type SendSmsInput, type SendSmsResult, ServiceManager, type ServiceStatus, type SetupConfig, SetupManager, type SetupResult, type Severity, type SmsConfig, SmsManager, type SmsMessage, SmsPoller, type SmsProvider, type SpamCategory, type SpamResult, type SpamRuleMatch, StalwartAdmin, type StalwartAdminOptions, type StalwartPrincipal, type StartPhoneCallOptions, type StartPhoneCallResult, type StartPhoneMissionInput, TELEPHONY_TRANSPORT_CAPABILITIES, type TelephonyTransportCapability, ThreadCache, type ThreadCacheEntry, type ThreadCacheOptions, type ThreadIdInput, type TunnelConfig, TunnelManager, UnsafeApiUrlError, type UpdateMemoryInput, type ValidatedPhoneMissionStart, WARNING_THRESHOLD, type WatcherOptions, assertWithinBase, bridgeWakeErrorMessage, bridgeWakeLastSeenAgeMs, buildApiUrl, buildElksAudioMessage, buildElksByeMessage, buildElksHandshakeMessages, buildElksInterruptMessage, buildElksListeningMessage, buildElksSendingMessage, buildInboundSecurityAdvisory, buildPhoneTransportConfig, classifyEmailRoute, classifyPhoneNumberRisk, classifyResumeError, closeDatabase, composeBridgeWakePrompt, createTestDatabase, debug, debugWarn, ensureDataDir, extractVerificationCode, flushTelemetry, forgetHostSession, getDatabase, getOperatorEmail, getSmsProvider, hostSessionStoragePath, inferPhoneRegion, isInternalEmail, isLoopbackMailHost, isPhoneRegionAllowed, isSessionFresh, isValidPhoneNumber, loadHostSession, mapProviderSmsStatus, normalizeAddress, normalizePhoneNumber, normalizeSubject, operatorPrefsStoragePath, parseElksRealtimeMessage, parseEmail, parseGoogleVoiceSms, planBridgeWake, recordToolCall, redactObject, redactPhoneTransportConfig, redactSecret, redactSmsConfig, resolveConfig, resolveTlsRejectUnauthorized, safeJoin, sanitizeEmail, saveConfig, saveHostSession, scanOutboundEmail, scoreEmail, setOperatorEmail, setTelemetryVersion, shouldSkipBridgeWakeForLiveOperator, startRelayBridge, stem, threadIdFor, tokenize, tryJoin, validateApiUrl, validatePhoneMissionPolicy, validatePhoneMissionStart, validatePhoneTransportProfile };
5232
+ export { AGENT_ROLES, ASK_OPERATOR_TOOL, AccountManager, type AddressInfo, type Agent, AgentDeletionService, type AgentMemoryEntry, type AgentMemoryFields, AgentMemoryManager, type AgentMemoryOptions, type AgentMemoryRead, AgentMemoryStore, type AgentRole, AgenticMailClient, type AgenticMailClientOptions, type AgenticMailConfig, type ArchiveAndDeleteOptions, type ArchivedEmail, type Attachment, type AttachmentAdvisory, type AudioAction, type AudioEditOptions, BRIDGE_OPERATOR_LIVE_WINDOW_MS, type BridgeMailContext, type BridgeWakeError, type BridgeWakePromptArgs, type BridgeWakeResult, type BridgeWakeRoute, type CachedMessage, CloudflareClient, type CreateAgentOptions, type CreateMemoryInput, DEFAULT_AGENT_NAME, DEFAULT_AGENT_ROLE, DEFAULT_REALTIME_AUDIO_FORMAT, DEFAULT_REALTIME_MODEL, DEFAULT_REALTIME_VOICE, DEFAULT_SESSION_MAX_AGE_MS, DEFAULT_WEB_SEARCH_ENDPOINT, DNSConfigurator, type Database, type DeletionReport, type DeletionSummary, DependencyChecker, DependencyInstaller, type DependencyStatus, type DnsRecord, type DnsSetupResult, type DomainInfo, DomainManager, type DomainModeConfig, type DomainPurchaseResult, DomainPurchaser, type DomainSearchResult, type DomainSetupResult, ELKS_REALTIME_AUDIO_FORMATS, ELKS_REALTIME_WS_PATH, type ElksRealtimeAudioFormat, type ElksRealtimeAudioMessage, type ElksRealtimeByeMessage, type ElksRealtimeHelloMessage, type ElksRealtimeInboundMessage, type ElksRealtimeOutboundMessage, ElksRealtimeTransport, type EmailEnvelope, type EmailRouteAction, type EmailRouteClass, type EmailRouteClassification, type EmailRouteInput, EmailSearchIndex, type FolderInfo, GET_DATETIME_TOOL, type GatewayConfig, GatewayManager, type GatewayManagerOptions, type GatewayMode, type GatewayStatus, type GetDatetimeOptions, type GetUpdatesOptions, type HostName, type HostSession, type HostSessionResumeMode, type ImageAction, type ImageEditOptions, type InboundEmail, type InboundSmsEvent, type InboxEvent, type InboxExpungeEvent, type InboxFlagsEvent, type InboxNewEvent, InboxWatcher, type InboxWatcherOptions, type InstallProgress, type LinkAdvisory, type LocalSmtpConfig, MEMORY_CATEGORIES, MailReceiver, type MailReceiverOptions, MailSender, type MailSenderOptions, type MailboxInfo, type MediaBinary, type MediaCapability, type MediaCapabilityReport, type MediaFileResult, type MediaInfoResult, MediaManager, type MediaManagerOptions, type MediaStreamInfo, type MemoryCategory, type MemoryImportance, type MemoryQueryOptions, type MemoryRecaller, MemorySearchIndex, type MemorySource, type MemoryStats, OPENAI_REALTIME_URL, OPERATOR_QUERY_POLL_INTERVAL_MS, OPERATOR_QUERY_SUBJECT_TAG, OPERATOR_QUERY_TIMEOUT_MS, OPERATOR_QUERY_TIMEOUT_SENTINEL, type OpenClawPhoneMissionPolicy, type OperatorQueryNotificationInput, type OperatorQueryPollOptions, type OperatorQueryUrgency, type OperatorReplyKind, type OutboundCategory, type OutboundScanInput, type OutboundScanResult, type OutboundWarning, PHONE_CALL_CONTROL_PROVIDERS, PHONE_MAX_CONCURRENT_MISSIONS, PHONE_MIN_WEBHOOK_SECRET_LENGTH, PHONE_MISSION_STATES, PHONE_RATE_LIMIT_PER_HOUR, PHONE_RATE_LIMIT_PER_MINUTE, PHONE_REGION_SCOPES, PHONE_SERVER_MAX_ATTEMPTS, PHONE_SERVER_MAX_CALL_DURATION_SECONDS, PHONE_SERVER_MAX_COST_PER_MISSION, PHONE_TASK_MAX_LENGTH, type ParsedAttachment, type ParsedEmail, type ParsedOperatorReply, type ParsedSms, type ParsedTelegramMessage, PathTraversalError, type PhoneAlternativePolicy, type PhoneCallMission, type PhoneConfirmPolicy, PhoneManager, type PhoneMissionStartValidationResult, type PhoneMissionState, type PhoneMissionTranscriptEntry, type PhoneMissionValidationIssue, type PhoneMissionValidationResult, type PhoneNumberRisk, type PhoneOperatorQuery, PhoneRateLimitError, type PhoneRegionScope, type PhoneTransportConfig, type PhoneTransportProfile, type PhoneTransportProvider, type PhoneTransportValidationResult, PhoneWebhookAuthError, type PhoneWebhookResult, type PlanBridgeWakeArgs, type PurchasedDomain, REALTIME_AUDIO_SAMPLE_RATE, REALTIME_MAX_AUDIO_FRAME_BASE64, REALTIME_TOOL_CALL_TIMEOUT_MS, REALTIME_TOOL_DEFINITIONS, RECALL_MEMORY_TOOL, REDACTED, RELAY_PRESETS, type RealtimeBridgePort, type RealtimeBridgeTranscriptEntry, type RealtimeInboundEvent, type RealtimeInstructionOptions, type RealtimeSessionConfigOptions, type RealtimeToolCall, type RealtimeToolDefinition, type RealtimeToolHandler, type RealtimeToolResult, type RealtimeTransportAdapter, type RealtimeTransportProvider, RealtimeVoiceBridge, type RealtimeVoiceBridgeOptions, RelayBridge, type RelayBridgeOptions, type RelayConfig, RelayGateway, type RelayProvider, type RelaySearchResult, type ResumeErrorClassificationOptions, SEARCH_EMAIL_TOOL, SPAM_THRESHOLD, type SafeJoinOptions, type SanitizeDetection, type SanitizeResult, type SearchCriteria, type SearchableEmail, type SecurityAdvisory, type SendMailOptions, type SendResult, type SendResultWithRaw, type SendSmsInput, type SendSmsResult, type SendTelegramMessageOptions, type SendTelegramMessageResult, ServiceManager, type ServiceStatus, type SetWebhookOptions, type SetupConfig, SetupManager, type SetupResult, type Severity, type SmsConfig, SmsManager, type SmsMessage, SmsPoller, type SmsProvider, type SpamCategory, type SpamResult, type SpamRuleMatch, StalwartAdmin, type StalwartAdminOptions, type StalwartPrincipal, type StartPhoneCallOptions, type StartPhoneCallResult, type StartPhoneMissionInput, TELEGRAM_API_BASE, TELEGRAM_CHUNK_SIZE, TELEGRAM_MESSAGE_LIMIT, TELEGRAM_MIN_WEBHOOK_SECRET_LENGTH, TELEGRAM_OPERATOR_QUERY_TAG, TELEGRAM_STOP_WORDS, TELEGRAM_WEBHOOK_SECRET_RE, TELEPHONY_TRANSPORT_CAPABILITIES, TWILIO_MEDIA_SAMPLE_RATE, TWILIO_REALTIME_WS_PATH, TelegramApiError, type TelegramApiOptions, type TelegramBotInfo, type TelegramChatType, type TelegramConfig, TelegramManager, type TelegramMessage, type TelegramMode, type TelephonyTransportCapability, ThreadCache, type ThreadCacheEntry, type ThreadCacheOptions, type ThreadIdInput, type ToolExecutor, type TtsGenerateOptions, type TunnelConfig, TunnelManager, type TwilioConnectedMessage, type TwilioMarkMessage, type TwilioMediaMessage, type TwilioRealtimeInboundMessage, type TwilioRealtimeOutboundMessage, TwilioRealtimeTransport, type TwilioStartMessage, type TwilioStopMessage, type TwilioStreamTwiMLOptions, UnsafeApiUrlError, type UpdateMemoryInput, type ValidatedPhoneMissionStart, type VideoAction, type VideoEditOptions, type VideoTimelineEntry, type VideoUnderstandOptions, type VideoUnderstandResult, type VoiceCloneOptions, WARNING_THRESHOLD, WEB_SEARCH_TOOL, WEB_SEARCH_UNTRUSTED_PREFIX, type WatcherOptions, type WebSearchOptions, assertWithinBase, bridgeWakeErrorMessage, bridgeWakeLastSeenAgeMs, buildApiUrl, buildElksAudioMessage, buildElksByeMessage, buildElksHandshakeMessages, buildElksInterruptMessage, buildElksListeningMessage, buildElksSendingMessage, buildInboundSecurityAdvisory, buildOpenAIRealtimeUrl, buildPhoneTransportConfig, buildRealtimeInstructions, buildRealtimeSessionConfig, buildRealtimeToolGuidance, buildTwilioClearMessage, buildTwilioMarkMessage, buildTwilioMediaMessage, buildTwilioSayTwiML, buildTwilioSignature, buildTwilioStreamTwiML, callTelegramApi, classifyEmailRoute, classifyPhoneNumberRisk, classifyResumeError, clearMediaCapabilityCache, closeDatabase, composeBridgeWakePrompt, createRealtimeTransport, createTestDatabase, createToolExecutor, debug, debugWarn, deleteTelegramWebhook, detectBinary, ensureDataDir, escapeXml, extractEmailAddress, extractVerificationCode, flushTelemetry, forgetHostSession, formatOperatorQueryTelegramMessage, getDatabase, getDatetime, getMediaCapabilities, getOperatorEmail, getSmsProvider, getTelegramChat, getTelegramMe, getTelegramUpdates, getTelegramWebhookInfo, hostSessionStoragePath, inferPhoneRegion, isInternalEmail, isLoopbackMailHost, isOperatorReplySender, isPhoneRegionAllowed, isSessionFresh, isTelegramChatAllowed, isTelegramStopCommand, isValidPhoneNumber, loadHostSession, mapProviderSmsStatus, nextTelegramOffset, normalizeAddress, normalizePhoneNumber, normalizeSubject, operatorPrefsStoragePath, operatorQuerySubject, parseElksRealtimeMessage, parseEmail, parseGoogleVoiceSms, parseOperatorQueryReply, parseTelegramOperatorReply, parseTelegramUpdate, parseTwilioRealtimeMessage, planBridgeWake, pollForOperatorAnswer, recallMemory, recordToolCall, redactBotToken, redactObject, redactPhoneTransportConfig, redactSecret, redactSmsConfig, redactTelegramConfig, requireBinary, requireWhisperModel, resolveConfig, resolveTlsRejectUnauthorized, safeJoin, sanitizeEmail, saveConfig, saveHostSession, scanOutboundEmail, scoreEmail, sendTelegramMessage, setOperatorEmail, setTelegramWebhook, setTelemetryVersion, shouldSkipBridgeWakeForLiveOperator, splitTelegramMessage, startRelayBridge, stem, stripTelegramMarkdown, threadIdFor, tokenize, tryJoin, validateApiUrl, validatePhoneMissionPolicy, validatePhoneMissionStart, validatePhoneTransportProfile, validateTwilioSignature, webSearch };