@agenticmail/core 0.9.15 → 0.9.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +3590 -105
- package/dist/index.d.cts +1997 -2
- package/dist/index.d.ts +1997 -2
- package/dist/index.js +3585 -174
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -220,6 +220,15 @@ interface AgenticMailConfig {
|
|
|
220
220
|
* system event (visible in the web UI) but no email is sent.
|
|
221
221
|
*/
|
|
222
222
|
operatorEmail?: string;
|
|
223
|
+
/**
|
|
224
|
+
* OpenAI API key — used only by the realtime voice bridge to open an
|
|
225
|
+
* OpenAI Realtime (`gpt-realtime`) session for live phone calls. When
|
|
226
|
+
* unset, phone missions still place and track calls (call-control),
|
|
227
|
+
* but a 46elks realtime-media WebSocket cannot be bridged to a
|
|
228
|
+
* conversational model. Read from the `OPENAI_API_KEY` env var or
|
|
229
|
+
* `config.json`. Optional — no other feature depends on it.
|
|
230
|
+
*/
|
|
231
|
+
openaiApiKey?: string;
|
|
223
232
|
masterKey: string;
|
|
224
233
|
dataDir: string;
|
|
225
234
|
}
|
|
@@ -1692,6 +1701,396 @@ declare class SmsPoller {
|
|
|
1692
1701
|
private pollOnce;
|
|
1693
1702
|
}
|
|
1694
1703
|
|
|
1704
|
+
/**
|
|
1705
|
+
* Telegram Bot API client — dependency-free (global `fetch`, Node 22+).
|
|
1706
|
+
*
|
|
1707
|
+
* Ported and MERGED from two already-tuned internal sources (licensing
|
|
1708
|
+
* confirmed by Ope, 2026-05-19 — plan §13.5):
|
|
1709
|
+
* - enterprise `agent-tools/tools/messaging/telegram.ts`
|
|
1710
|
+
* (`tgApi`, `stripMarkdown`, webhook management)
|
|
1711
|
+
* - agent-harness `fola-lib/telegram-api.mjs`
|
|
1712
|
+
* (auto-splitting `sendMessage`, the long-poll timeout discipline)
|
|
1713
|
+
*
|
|
1714
|
+
* Host-specific pieces are intentionally dropped to fit the single-tenant
|
|
1715
|
+
* open-source product: the local Bot API server auto-detection
|
|
1716
|
+
* (`http://localhost:8081`) and the filesystem media-download paths are
|
|
1717
|
+
* Fola-host infrastructure, not channel logic.
|
|
1718
|
+
*
|
|
1719
|
+
* Secrets: the bot token rides in the request URL path. Every error this
|
|
1720
|
+
* module surfaces is scrubbed with {@link redactBotToken} so a token can
|
|
1721
|
+
* never reach a log line or an API response — matching the SMS/phone
|
|
1722
|
+
* credential-handling bar.
|
|
1723
|
+
*/
|
|
1724
|
+
/** Official Telegram Bot API base. */
|
|
1725
|
+
declare const TELEGRAM_API_BASE = "https://api.telegram.org";
|
|
1726
|
+
/** Telegram's hard per-message ceiling is 4096 characters. */
|
|
1727
|
+
declare const TELEGRAM_MESSAGE_LIMIT = 4096;
|
|
1728
|
+
/** We split below the hard limit, leaving headroom for safety. */
|
|
1729
|
+
declare const TELEGRAM_CHUNK_SIZE = 4000;
|
|
1730
|
+
/** A failed Telegram Bot API call. `description` is the provider message. */
|
|
1731
|
+
declare class TelegramApiError extends Error {
|
|
1732
|
+
readonly isTelegramApiError = true;
|
|
1733
|
+
readonly description: string;
|
|
1734
|
+
readonly errorCode?: number;
|
|
1735
|
+
constructor(method: string, description: string, errorCode?: number);
|
|
1736
|
+
}
|
|
1737
|
+
/**
|
|
1738
|
+
* Scrub bot tokens from any string before it can be logged or returned.
|
|
1739
|
+
*
|
|
1740
|
+
* A token looks like `<digits>:<35 url-safe chars>`. We redact both an
|
|
1741
|
+
* explicitly-known token (exact match) and anything matching the generic
|
|
1742
|
+
* token shape — so a token leaks neither when the caller knows it nor
|
|
1743
|
+
* when it is buried in, say, a `fetch` failure message carrying the URL.
|
|
1744
|
+
*/
|
|
1745
|
+
declare function redactBotToken(text: string, token?: string): string;
|
|
1746
|
+
interface TelegramApiOptions {
|
|
1747
|
+
/**
|
|
1748
|
+
* Long-poll requests (`getUpdates` with a non-zero `timeout`) need an
|
|
1749
|
+
* HTTP timeout longer than the server-side poll window, or the socket
|
|
1750
|
+
* is torn down before Telegram replies.
|
|
1751
|
+
*/
|
|
1752
|
+
longPoll?: boolean;
|
|
1753
|
+
}
|
|
1754
|
+
/**
|
|
1755
|
+
* POST a Telegram Bot API method with a JSON body and return `result`.
|
|
1756
|
+
* Throws {@link TelegramApiError} (token-scrubbed) on any failure.
|
|
1757
|
+
*/
|
|
1758
|
+
declare function callTelegramApi<T = unknown>(token: string, method: string, body?: Record<string, unknown>, options?: TelegramApiOptions): Promise<T>;
|
|
1759
|
+
/**
|
|
1760
|
+
* Strip Markdown so agent replies arrive as clean plain text. Telegram's
|
|
1761
|
+
* Markdown parse modes are bypassed entirely (no `parse_mode`) to avoid
|
|
1762
|
+
* formatting collisions on arbitrary agent output.
|
|
1763
|
+
*/
|
|
1764
|
+
declare function stripTelegramMarkdown(text: string): string;
|
|
1765
|
+
/**
|
|
1766
|
+
* Split text into <= `maxLen` chunks, cutting on a newline boundary when
|
|
1767
|
+
* one is reasonably close so messages do not break mid-word.
|
|
1768
|
+
*/
|
|
1769
|
+
declare function splitTelegramMessage(text: string, maxLen?: number): string[];
|
|
1770
|
+
interface SendTelegramMessageOptions {
|
|
1771
|
+
/** Reply to a specific message id in the chat. */
|
|
1772
|
+
replyToMessageId?: number;
|
|
1773
|
+
/** Deliver silently (no push notification). */
|
|
1774
|
+
disableNotification?: boolean;
|
|
1775
|
+
}
|
|
1776
|
+
interface SendTelegramMessageResult {
|
|
1777
|
+
/** message_id of each chunk Telegram accepted, in order. */
|
|
1778
|
+
messageIds: number[];
|
|
1779
|
+
/** Number of chunks the text was split into. */
|
|
1780
|
+
chunks: number;
|
|
1781
|
+
}
|
|
1782
|
+
/**
|
|
1783
|
+
* Send a text message, auto-splitting anything over the per-message
|
|
1784
|
+
* ceiling. The reply target (if any) is attached only to the first
|
|
1785
|
+
* chunk so a long reply still threads correctly.
|
|
1786
|
+
*/
|
|
1787
|
+
declare function sendTelegramMessage(token: string, chatId: string | number, text: string, options?: SendTelegramMessageOptions): Promise<SendTelegramMessageResult>;
|
|
1788
|
+
interface TelegramBotInfo {
|
|
1789
|
+
id: number;
|
|
1790
|
+
is_bot: boolean;
|
|
1791
|
+
username?: string;
|
|
1792
|
+
first_name?: string;
|
|
1793
|
+
}
|
|
1794
|
+
/** `getMe` — used to validate a token and capture the bot identity. */
|
|
1795
|
+
declare function getTelegramMe(token: string): Promise<TelegramBotInfo>;
|
|
1796
|
+
/** `getChat` — chat metadata (title, type, member count, ...). */
|
|
1797
|
+
declare function getTelegramChat(token: string, chatId: string | number): Promise<Record<string, unknown>>;
|
|
1798
|
+
interface GetUpdatesOptions {
|
|
1799
|
+
/** Max updates per call (1-100). */
|
|
1800
|
+
limit?: number;
|
|
1801
|
+
/** Long-poll window in seconds (0 = short poll). */
|
|
1802
|
+
timeoutSec?: number;
|
|
1803
|
+
}
|
|
1804
|
+
/** `getUpdates` long-poll — the poll-mode transport. */
|
|
1805
|
+
declare function getTelegramUpdates(token: string, offset: number, options?: GetUpdatesOptions): Promise<Array<Record<string, unknown>>>;
|
|
1806
|
+
interface SetWebhookOptions {
|
|
1807
|
+
/**
|
|
1808
|
+
* Shared secret echoed by Telegram in the
|
|
1809
|
+
* `X-Telegram-Bot-Api-Secret-Token` header on every webhook delivery.
|
|
1810
|
+
*/
|
|
1811
|
+
secretToken?: string;
|
|
1812
|
+
/** Drop updates that queued up while no webhook was set. */
|
|
1813
|
+
dropPendingUpdates?: boolean;
|
|
1814
|
+
}
|
|
1815
|
+
/** `setWebhook` — register the inbound webhook URL (webhook-mode transport). */
|
|
1816
|
+
declare function setTelegramWebhook(token: string, url: string, options?: SetWebhookOptions): Promise<boolean>;
|
|
1817
|
+
/** `deleteWebhook` — switch back to poll mode. */
|
|
1818
|
+
declare function deleteTelegramWebhook(token: string): Promise<boolean>;
|
|
1819
|
+
/** `getWebhookInfo` — current webhook status. */
|
|
1820
|
+
declare function getTelegramWebhookInfo(token: string): Promise<Record<string, unknown>>;
|
|
1821
|
+
|
|
1822
|
+
/**
|
|
1823
|
+
* Telegram update parsing — pure, dependency-free.
|
|
1824
|
+
*
|
|
1825
|
+
* Ported from the inbound-message handling in the agent-harness Fola
|
|
1826
|
+
* bridge (`fola-telegram-bridge.mjs` — the `formatPrompt` header fields
|
|
1827
|
+
* and the `STOP_WORDS` abort set), stripped of every Fola-host specific
|
|
1828
|
+
* (session routing, the fola-claude prompt envelope, media downloads).
|
|
1829
|
+
*/
|
|
1830
|
+
type TelegramChatType = 'private' | 'group' | 'supergroup' | 'channel' | 'unknown';
|
|
1831
|
+
/** A normalized inbound Telegram text message. */
|
|
1832
|
+
interface ParsedTelegramMessage {
|
|
1833
|
+
/** Telegram `update_id` — drives the poll offset. */
|
|
1834
|
+
updateId: number;
|
|
1835
|
+
/** `message_id` within the chat. */
|
|
1836
|
+
messageId: number;
|
|
1837
|
+
/** Chat id as a string (Telegram ids are 64-bit; strings avoid loss). */
|
|
1838
|
+
chatId: string;
|
|
1839
|
+
chatType: TelegramChatType;
|
|
1840
|
+
chatTitle?: string;
|
|
1841
|
+
/** Sender id as a string; falls back to the chat id for channel posts. */
|
|
1842
|
+
fromId: string;
|
|
1843
|
+
fromName: string;
|
|
1844
|
+
fromUsername?: string;
|
|
1845
|
+
/** Message text (or media caption). Always non-empty for a parsed result. */
|
|
1846
|
+
text: string;
|
|
1847
|
+
/** `message_id` of the message this one replies to, if any. */
|
|
1848
|
+
replyToMessageId?: number;
|
|
1849
|
+
/** Text of the replied-to message, when Telegram included it. */
|
|
1850
|
+
replyToText?: string;
|
|
1851
|
+
/** ISO-8601 timestamp derived from the Telegram `date` epoch. */
|
|
1852
|
+
date: string;
|
|
1853
|
+
}
|
|
1854
|
+
/**
|
|
1855
|
+
* Normalize a raw Telegram update into a {@link ParsedTelegramMessage},
|
|
1856
|
+
* or `null` when it carries no usable text (callback queries, service
|
|
1857
|
+
* messages, edits with no text, non-text media without a caption).
|
|
1858
|
+
* Handles both `message` and `channel_post` envelopes.
|
|
1859
|
+
*/
|
|
1860
|
+
declare function parseTelegramUpdate(update: unknown): ParsedTelegramMessage | null;
|
|
1861
|
+
/**
|
|
1862
|
+
* Words that, sent alone (case-insensitive, optional trailing
|
|
1863
|
+
* punctuation), signal "abort whatever you are doing for me". Kept
|
|
1864
|
+
* deliberately narrow — anything longer or ambiguous is a normal
|
|
1865
|
+
* message. Ported verbatim from the Fola bridge.
|
|
1866
|
+
*/
|
|
1867
|
+
declare const TELEGRAM_STOP_WORDS: ReadonlySet<string>;
|
|
1868
|
+
/** True when `text` is a bare stop command. */
|
|
1869
|
+
declare function isTelegramStopCommand(text: string): boolean;
|
|
1870
|
+
/**
|
|
1871
|
+
* Compute the next `getUpdates` offset from a batch: one past the
|
|
1872
|
+
* highest `update_id` seen. Advancing the offset is what acknowledges
|
|
1873
|
+
* updates to Telegram, so this must run on the raw batch even if
|
|
1874
|
+
* individual updates fail to parse.
|
|
1875
|
+
*/
|
|
1876
|
+
declare function nextTelegramOffset(currentOffset: number, updates: Array<{
|
|
1877
|
+
update_id?: unknown;
|
|
1878
|
+
}>): number;
|
|
1879
|
+
|
|
1880
|
+
/**
|
|
1881
|
+
* Telegram Manager — per-agent Telegram bot configuration + message log.
|
|
1882
|
+
*
|
|
1883
|
+
* Models the existing {@link import('../sms/manager.js').SmsManager} — the
|
|
1884
|
+
* closest existing channel — so the hardening bar matches:
|
|
1885
|
+
* - Config lives in agent metadata under the `telegram` key.
|
|
1886
|
+
* - Credential fields (`botToken`, `webhookSecret`) are encrypted at
|
|
1887
|
+
* rest with the same AES-256-GCM scheme (`encryptSecret`) and
|
|
1888
|
+
* redacted on every read that leaves the process.
|
|
1889
|
+
* - Inbound/outbound messages are stored in a `telegram_messages` table.
|
|
1890
|
+
*
|
|
1891
|
+
* Single-tenant: there is no org / multi-tenant scoping and nothing is
|
|
1892
|
+
* hardcoded — every bot token, chat id and operator identity is
|
|
1893
|
+
* per-agent config supplied by the user.
|
|
1894
|
+
*/
|
|
1895
|
+
|
|
1896
|
+
/** Transport mode: long-poll `getUpdates`, or a registered webhook. */
|
|
1897
|
+
type TelegramMode = 'poll' | 'webhook';
|
|
1898
|
+
interface TelegramConfig {
|
|
1899
|
+
/** Whether the Telegram channel is active for this agent. */
|
|
1900
|
+
enabled: boolean;
|
|
1901
|
+
/** Bot API token from @BotFather. SECRET — encrypted at rest, redacted on read. */
|
|
1902
|
+
botToken: string;
|
|
1903
|
+
/** Bot @username (non-secret, for display). Captured from `getMe` at setup. */
|
|
1904
|
+
botUsername?: string;
|
|
1905
|
+
/** Numeric bot id (non-secret). Captured from `getMe` at setup. */
|
|
1906
|
+
botId?: number;
|
|
1907
|
+
/**
|
|
1908
|
+
* Chat ids permitted to message the agent. EMPTY = nobody (fail-closed,
|
|
1909
|
+
* same posture as the Fola bridge). The operator chat is always allowed.
|
|
1910
|
+
*/
|
|
1911
|
+
allowedChatIds: string[];
|
|
1912
|
+
/** Chat that receives `ask_operator` notifications + can approve (plan §13.4). */
|
|
1913
|
+
operatorChatId?: string;
|
|
1914
|
+
/** Inbound transport. */
|
|
1915
|
+
mode: TelegramMode;
|
|
1916
|
+
/** Public webhook URL (webhook mode only). */
|
|
1917
|
+
webhookUrl?: string;
|
|
1918
|
+
/**
|
|
1919
|
+
* Shared secret echoed by Telegram in the
|
|
1920
|
+
* `X-Telegram-Bot-Api-Secret-Token` header. SECRET — encrypted at
|
|
1921
|
+
* rest, redacted on read. Doubles as the webhook routing key.
|
|
1922
|
+
*/
|
|
1923
|
+
webhookSecret?: string;
|
|
1924
|
+
/** Persisted `getUpdates` offset (poll mode). */
|
|
1925
|
+
pollOffset?: number;
|
|
1926
|
+
/** When the channel was configured. */
|
|
1927
|
+
configuredAt: string;
|
|
1928
|
+
}
|
|
1929
|
+
interface TelegramMessage {
|
|
1930
|
+
id: string;
|
|
1931
|
+
agentId: string;
|
|
1932
|
+
direction: 'inbound' | 'outbound';
|
|
1933
|
+
chatId: string;
|
|
1934
|
+
/** Telegram `message_id` (may be absent for a failed outbound attempt). */
|
|
1935
|
+
telegramMessageId?: number;
|
|
1936
|
+
/** Sender id (inbound only). */
|
|
1937
|
+
fromId?: string;
|
|
1938
|
+
text: string;
|
|
1939
|
+
status: 'received' | 'sent' | 'failed' | 'pending';
|
|
1940
|
+
createdAt: string;
|
|
1941
|
+
metadata?: Record<string, unknown>;
|
|
1942
|
+
}
|
|
1943
|
+
/** Telegram's `secret_token` charset, and our minimum entropy floor. */
|
|
1944
|
+
declare const TELEGRAM_WEBHOOK_SECRET_RE: RegExp;
|
|
1945
|
+
declare const TELEGRAM_MIN_WEBHOOK_SECRET_LENGTH = 16;
|
|
1946
|
+
/**
|
|
1947
|
+
* Redact the credential fields of a Telegram config for any value that
|
|
1948
|
+
* leaves the process (API responses, logs). The bot token is collapsed
|
|
1949
|
+
* to `***` entirely — never partially shown — matching the SMS bar and
|
|
1950
|
+
* the plan §13.5 rule "no token ever in a log line or API response".
|
|
1951
|
+
*/
|
|
1952
|
+
declare function redactTelegramConfig(config: TelegramConfig): TelegramConfig;
|
|
1953
|
+
/**
|
|
1954
|
+
* Allow-list gate for INBOUND messages. A chat may talk to the agent
|
|
1955
|
+
* only if it is on `allowedChatIds` OR it is the configured operator
|
|
1956
|
+
* chat. An empty allow-list means nobody — fail closed.
|
|
1957
|
+
*/
|
|
1958
|
+
declare function isTelegramChatAllowed(config: TelegramConfig, chatId: string): boolean;
|
|
1959
|
+
declare class TelegramManager {
|
|
1960
|
+
private db;
|
|
1961
|
+
private encryptionKey?;
|
|
1962
|
+
private initialized;
|
|
1963
|
+
/**
|
|
1964
|
+
* Optional master key used to encrypt Telegram credentials at rest
|
|
1965
|
+
* (the same AES-256-GCM scheme SMS/phone use). When absent (tests, or
|
|
1966
|
+
* a deployment with no master key) configs are stored as-is and reads
|
|
1967
|
+
* tolerate plaintext — upgrades and downgrades both stay safe.
|
|
1968
|
+
*/
|
|
1969
|
+
constructor(db: Database, encryptionKey?: string | undefined);
|
|
1970
|
+
private ensureTable;
|
|
1971
|
+
/** Encrypt the credential fields of a config before persisting. */
|
|
1972
|
+
private encryptConfig;
|
|
1973
|
+
/** Decrypt the credential fields of a config after loading. */
|
|
1974
|
+
private decryptConfig;
|
|
1975
|
+
/** Normalize a stored/loaded config object, defaulting missing fields. */
|
|
1976
|
+
private normalizeConfig;
|
|
1977
|
+
/** Get the Telegram config from agent metadata (credentials decrypted). */
|
|
1978
|
+
getConfig(agentId: string): TelegramConfig | null;
|
|
1979
|
+
/** Save the Telegram config to agent metadata (credentials encrypted). */
|
|
1980
|
+
saveConfig(agentId: string, config: TelegramConfig): void;
|
|
1981
|
+
/** Remove the Telegram config from agent metadata. */
|
|
1982
|
+
removeConfig(agentId: string): void;
|
|
1983
|
+
/** Persist a new poll offset without touching the rest of the config. */
|
|
1984
|
+
updatePollOffset(agentId: string, offset: number): void;
|
|
1985
|
+
/**
|
|
1986
|
+
* Resolve the agent that owns a webhook secret. Used to authenticate +
|
|
1987
|
+
* route an inbound Telegram webhook delivery: a webhook carries no bot
|
|
1988
|
+
* identity, so the `X-Telegram-Bot-Api-Secret-Token` header is the
|
|
1989
|
+
* routing key. The comparison is constant-time, and a non-match
|
|
1990
|
+
* returns `null` so the route can answer with a single uniform 403
|
|
1991
|
+
* (no enumeration oracle — same posture as the SMS webhook).
|
|
1992
|
+
*/
|
|
1993
|
+
findAgentByWebhookSecret(secret: string): {
|
|
1994
|
+
agentId: string;
|
|
1995
|
+
config: TelegramConfig;
|
|
1996
|
+
} | null;
|
|
1997
|
+
/** True if an inbound message with this Telegram id is already stored. */
|
|
1998
|
+
inboundMessageExists(agentId: string, chatId: string, telegramMessageId: number): boolean;
|
|
1999
|
+
/** Record an inbound Telegram message. */
|
|
2000
|
+
recordInbound(agentId: string, input: {
|
|
2001
|
+
chatId: string;
|
|
2002
|
+
telegramMessageId: number;
|
|
2003
|
+
fromId?: string;
|
|
2004
|
+
text: string;
|
|
2005
|
+
createdAt?: string;
|
|
2006
|
+
}, metadata?: Record<string, unknown>): TelegramMessage;
|
|
2007
|
+
/** Record an outbound Telegram message attempt. */
|
|
2008
|
+
recordOutbound(agentId: string, input: {
|
|
2009
|
+
chatId: string;
|
|
2010
|
+
text: string;
|
|
2011
|
+
telegramMessageId?: number;
|
|
2012
|
+
status?: TelegramMessage['status'];
|
|
2013
|
+
}, metadata?: Record<string, unknown>): TelegramMessage;
|
|
2014
|
+
/** Update the status (+ optional metadata) of a stored message. */
|
|
2015
|
+
updateStatus(id: string, status: TelegramMessage['status'], metadata?: Record<string, unknown>): void;
|
|
2016
|
+
/** List stored Telegram messages for an agent, newest first. */
|
|
2017
|
+
listMessages(agentId: string, opts?: {
|
|
2018
|
+
direction?: 'inbound' | 'outbound';
|
|
2019
|
+
chatId?: string;
|
|
2020
|
+
limit?: number;
|
|
2021
|
+
offset?: number;
|
|
2022
|
+
}): TelegramMessage[];
|
|
2023
|
+
}
|
|
2024
|
+
|
|
2025
|
+
/**
|
|
2026
|
+
* Telegram ⇄ `ask_operator` bridge (plan §13.4 / §13.5) — pure helpers.
|
|
2027
|
+
*
|
|
2028
|
+
* The realtime voice agent's `ask_operator` tool records an *operator
|
|
2029
|
+
* query* on the phone mission; the API exposes it through the
|
|
2030
|
+
* operator-query endpoints. Email is the channel-agnostic default
|
|
2031
|
+
* notifier (plan §5). This module makes Telegram a first-class
|
|
2032
|
+
* notification + approval channel WITHOUT duplicating any of that
|
|
2033
|
+
* machinery — it only:
|
|
2034
|
+
*
|
|
2035
|
+
* 1. {@link formatOperatorQueryTelegramMessage} — renders the
|
|
2036
|
+
* notification text the operator receives.
|
|
2037
|
+
* 2. {@link parseTelegramOperatorReply} — parses the operator's
|
|
2038
|
+
* Telegram reply back into a `{ queryId, answer }`.
|
|
2039
|
+
*
|
|
2040
|
+
* The caller feeds the parsed result straight into the SAME
|
|
2041
|
+
* `PhoneManager.answerOperatorQuery` the inbound email-reply hook uses.
|
|
2042
|
+
*/
|
|
2043
|
+
/**
|
|
2044
|
+
* Token embedded in a notification so the operator's reply can be
|
|
2045
|
+
* matched back to a query. Kept short — the operator may see it.
|
|
2046
|
+
*/
|
|
2047
|
+
declare const TELEGRAM_OPERATOR_QUERY_TAG = "AMQ";
|
|
2048
|
+
interface OperatorQueryNotificationInput {
|
|
2049
|
+
queryId: string;
|
|
2050
|
+
question: string;
|
|
2051
|
+
callContext?: string;
|
|
2052
|
+
urgency?: string;
|
|
2053
|
+
missionId?: string;
|
|
2054
|
+
}
|
|
2055
|
+
/**
|
|
2056
|
+
* Render the Telegram message body for an `ask_operator` notification.
|
|
2057
|
+
* The trailing `[AMQ <id>]` token lets a reply be matched to the query
|
|
2058
|
+
* even when the operator does not use Telegram's native reply gesture.
|
|
2059
|
+
*/
|
|
2060
|
+
declare function formatOperatorQueryTelegramMessage(input: OperatorQueryNotificationInput): string;
|
|
2061
|
+
/** A `kind` of `approve`/`deny` is a decision; `answer` is free-form. */
|
|
2062
|
+
type OperatorReplyKind = 'answer' | 'approve' | 'deny';
|
|
2063
|
+
interface ParsedOperatorReply {
|
|
2064
|
+
/**
|
|
2065
|
+
* Query id when the reply names one — explicitly (`/answer <id>`,
|
|
2066
|
+
* an inline `oq_…` token) or implicitly (a native Telegram reply
|
|
2067
|
+
* quoting a `[AMQ <id>]`-tagged notification). `undefined` when the
|
|
2068
|
+
* reply is a bare message and the caller must resolve the target.
|
|
2069
|
+
*/
|
|
2070
|
+
queryId?: string;
|
|
2071
|
+
/** The answer text to record against the query. Always non-empty. */
|
|
2072
|
+
answer: string;
|
|
2073
|
+
kind: OperatorReplyKind;
|
|
2074
|
+
}
|
|
2075
|
+
/**
|
|
2076
|
+
* Parse an operator's Telegram message into a {@link ParsedOperatorReply},
|
|
2077
|
+
* or `null` when it carries no usable answer.
|
|
2078
|
+
*
|
|
2079
|
+
* Recognized forms (most explicit first):
|
|
2080
|
+
* - `/answer <queryId> <text>`
|
|
2081
|
+
* - `/approve [<queryId>] [note]` · `/deny [<queryId>] [note]`
|
|
2082
|
+
* - a plain message — `queryId` is taken from a quoted tagged
|
|
2083
|
+
* notification (`replyToText`) or an inline `oq_…` / `[AMQ …]` token,
|
|
2084
|
+
* and is otherwise left `undefined` for the caller to resolve.
|
|
2085
|
+
*
|
|
2086
|
+
* The `@botname` suffix Telegram appends to commands in groups
|
|
2087
|
+
* (`/approve@mybot`) is tolerated.
|
|
2088
|
+
*/
|
|
2089
|
+
declare function parseTelegramOperatorReply(input: {
|
|
2090
|
+
text: string;
|
|
2091
|
+
replyToText?: string;
|
|
2092
|
+
}): ParsedOperatorReply | null;
|
|
2093
|
+
|
|
1695
2094
|
declare const ELKS_REALTIME_AUDIO_FORMATS: readonly ["ulaw", "pcm_16000", "pcm_24000", "wav"];
|
|
1696
2095
|
type ElksRealtimeAudioFormat = typeof ELKS_REALTIME_AUDIO_FORMATS[number];
|
|
1697
2096
|
interface ElksRealtimeHelloMessage {
|
|
@@ -1737,6 +2136,1090 @@ declare function buildElksHandshakeMessages(options?: {
|
|
|
1737
2136
|
sendFormat?: ElksRealtimeAudioFormat;
|
|
1738
2137
|
}): ElksRealtimeOutboundMessage[];
|
|
1739
2138
|
|
|
2139
|
+
/**
|
|
2140
|
+
* Twilio Media Streams wire protocol.
|
|
2141
|
+
*
|
|
2142
|
+
* The realtime-voice counterpart of `realtime.ts` (the 46elks protocol).
|
|
2143
|
+
* Twilio connects a call's audio to a WebSocket via a TwiML
|
|
2144
|
+
* `<Connect><Stream url="wss://…"/></Connect>`; over that socket Twilio
|
|
2145
|
+
* speaks JSON messages keyed by an `event` field.
|
|
2146
|
+
*
|
|
2147
|
+
* Inbound (Twilio → us):
|
|
2148
|
+
* - `{ event: "connected", protocol, version }`
|
|
2149
|
+
* - `{ event: "start", start: { streamSid, callSid, accountSid,
|
|
2150
|
+
* mediaFormat, tracks, customParameters } }`
|
|
2151
|
+
* - `{ event: "media", media: { payload: "<base64 µ-law>", track,
|
|
2152
|
+
* chunk, timestamp } }`
|
|
2153
|
+
* - `{ event: "stop", stop: { accountSid, callSid } }`
|
|
2154
|
+
* - `{ event: "mark", mark: { name } }`
|
|
2155
|
+
*
|
|
2156
|
+
* Outbound (us → Twilio) — every outbound frame must echo the
|
|
2157
|
+
* `streamSid` Twilio handed us in the `start` event:
|
|
2158
|
+
* - `{ event: "media", streamSid, media: { payload: "<base64 µ-law>" } }`
|
|
2159
|
+
* - `{ event: "mark", streamSid, mark: { name } }`
|
|
2160
|
+
* - `{ event: "clear", streamSid }` — flush buffered playback
|
|
2161
|
+
* (barge-in / interrupt; the analogue of 46elks `interrupt`).
|
|
2162
|
+
*
|
|
2163
|
+
* The audio is G.711 µ-law, 8 kHz, mono, base64 — unlike 46elks which
|
|
2164
|
+
* uses linear PCM. OpenAI's GA Realtime API speaks µ-law natively
|
|
2165
|
+
* (`{ type: "audio/pcmu" }` @ 8 kHz), so on a Twilio call the bridge
|
|
2166
|
+
* does NO transcoding end to end.
|
|
2167
|
+
*
|
|
2168
|
+
* > The Media Streams message shapes above follow Twilio's public
|
|
2169
|
+
* > `<Stream>` documentation. Verify against current docs before the
|
|
2170
|
+
* > live smoke-test (same discipline as the 46elks / OpenAI wire notes).
|
|
2171
|
+
*/
|
|
2172
|
+
/** µ-law payload sample rate Twilio Media Streams uses, in Hz. */
|
|
2173
|
+
declare const TWILIO_MEDIA_SAMPLE_RATE = 8000;
|
|
2174
|
+
/** The `connected` handshake frame — first frame Twilio sends. */
|
|
2175
|
+
interface TwilioConnectedMessage {
|
|
2176
|
+
event: 'connected';
|
|
2177
|
+
protocol?: string;
|
|
2178
|
+
version?: string;
|
|
2179
|
+
[key: string]: unknown;
|
|
2180
|
+
}
|
|
2181
|
+
/**
|
|
2182
|
+
* The `start` frame — carries the `streamSid` every outbound frame must
|
|
2183
|
+
* echo, plus the `callSid` we resolve the phone mission from. Twilio
|
|
2184
|
+
* sends exactly one `start` per stream, right after `connected`.
|
|
2185
|
+
*/
|
|
2186
|
+
interface TwilioStartMessage {
|
|
2187
|
+
event: 'start';
|
|
2188
|
+
/** Stream identifier — required on every outbound media/mark/clear. */
|
|
2189
|
+
streamSid: string;
|
|
2190
|
+
/** The call SID — matches the `sid` from the Calls.json response. */
|
|
2191
|
+
callSid: string;
|
|
2192
|
+
accountSid?: string;
|
|
2193
|
+
mediaFormat?: {
|
|
2194
|
+
encoding?: string;
|
|
2195
|
+
sampleRate?: number;
|
|
2196
|
+
channels?: number;
|
|
2197
|
+
};
|
|
2198
|
+
tracks?: string[];
|
|
2199
|
+
/** `<Parameter>` values declared inside the TwiML `<Stream>`. */
|
|
2200
|
+
customParameters?: Record<string, string>;
|
|
2201
|
+
[key: string]: unknown;
|
|
2202
|
+
}
|
|
2203
|
+
/** A `media` frame — one small base64 µ-law audio chunk. */
|
|
2204
|
+
interface TwilioMediaMessage {
|
|
2205
|
+
event: 'media';
|
|
2206
|
+
/** Base64-encoded G.711 µ-law, 8 kHz mono. */
|
|
2207
|
+
payload: string;
|
|
2208
|
+
/** Inbound only: which leg the audio is from (`inbound`/`outbound`). */
|
|
2209
|
+
track?: string;
|
|
2210
|
+
}
|
|
2211
|
+
/** The `stop` frame — Twilio has stopped streaming this call's audio. */
|
|
2212
|
+
interface TwilioStopMessage {
|
|
2213
|
+
event: 'stop';
|
|
2214
|
+
callSid?: string;
|
|
2215
|
+
[key: string]: unknown;
|
|
2216
|
+
}
|
|
2217
|
+
/** A `mark` frame — a playback checkpoint echoed back when reached. */
|
|
2218
|
+
interface TwilioMarkMessage {
|
|
2219
|
+
event: 'mark';
|
|
2220
|
+
name: string;
|
|
2221
|
+
}
|
|
2222
|
+
type TwilioRealtimeInboundMessage = TwilioConnectedMessage | TwilioStartMessage | TwilioMediaMessage | TwilioStopMessage | TwilioMarkMessage;
|
|
2223
|
+
type TwilioRealtimeOutboundMessage = {
|
|
2224
|
+
event: 'media';
|
|
2225
|
+
streamSid: string;
|
|
2226
|
+
media: {
|
|
2227
|
+
payload: string;
|
|
2228
|
+
};
|
|
2229
|
+
} | {
|
|
2230
|
+
event: 'mark';
|
|
2231
|
+
streamSid: string;
|
|
2232
|
+
mark: {
|
|
2233
|
+
name: string;
|
|
2234
|
+
};
|
|
2235
|
+
} | {
|
|
2236
|
+
event: 'clear';
|
|
2237
|
+
streamSid: string;
|
|
2238
|
+
};
|
|
2239
|
+
/**
|
|
2240
|
+
* Parse one raw inbound Twilio Media Streams frame. Accepts a JSON
|
|
2241
|
+
* string or an already-parsed object. Throws on a malformed/unknown
|
|
2242
|
+
* frame — the caller (the bridge) swallows the throw and ignores the
|
|
2243
|
+
* frame, never tearing the call down for one bad message.
|
|
2244
|
+
*/
|
|
2245
|
+
declare function parseTwilioRealtimeMessage(input: unknown): TwilioRealtimeInboundMessage;
|
|
2246
|
+
/**
|
|
2247
|
+
* Build an outbound `media` frame — synthesised agent audio for Twilio
|
|
2248
|
+
* to play to the caller. `data` may be a base64 string or raw bytes.
|
|
2249
|
+
* `streamSid` is the id Twilio handed us in the `start` event.
|
|
2250
|
+
*/
|
|
2251
|
+
declare function buildTwilioMediaMessage(streamSid: string, data: string | Uint8Array): TwilioRealtimeOutboundMessage;
|
|
2252
|
+
/**
|
|
2253
|
+
* Build a `clear` frame — tells Twilio to flush any audio still
|
|
2254
|
+
* buffered for playback. This is how barge-in / interrupt works on a
|
|
2255
|
+
* Twilio call: when the caller starts talking the agent must stop
|
|
2256
|
+
* mid-sentence, so we drop everything queued. (46elks calls this
|
|
2257
|
+
* `interrupt`; Twilio calls it `clear`.)
|
|
2258
|
+
*/
|
|
2259
|
+
declare function buildTwilioClearMessage(streamSid: string): TwilioRealtimeOutboundMessage;
|
|
2260
|
+
/**
|
|
2261
|
+
* Build a `mark` frame — a named playback checkpoint. Twilio echoes the
|
|
2262
|
+
* mark back (as an inbound `mark` event) once the audio queued before
|
|
2263
|
+
* it has actually been played to the caller. Useful for end-of-turn
|
|
2264
|
+
* detection; the bridge does not require it but exposes the builder for
|
|
2265
|
+
* parity with the protocol surface.
|
|
2266
|
+
*/
|
|
2267
|
+
declare function buildTwilioMarkMessage(streamSid: string, name: string): TwilioRealtimeOutboundMessage;
|
|
2268
|
+
|
|
2269
|
+
/**
|
|
2270
|
+
* Twilio webhook helpers — request-signature validation and TwiML
|
|
2271
|
+
* generation.
|
|
2272
|
+
*
|
|
2273
|
+
* This is the call-control counterpart of the 46elks webhook handling
|
|
2274
|
+
* in `manager.ts`. Twilio secures every webhook it sends with an
|
|
2275
|
+
* `X-Twilio-Signature` header; we answer Twilio's voice webhook with
|
|
2276
|
+
* TwiML (an XML document) that connects the call's audio to the
|
|
2277
|
+
* realtime voice WebSocket.
|
|
2278
|
+
*
|
|
2279
|
+
* Everything here is pure (no I/O, no sockets) so it is fully
|
|
2280
|
+
* unit-testable and keeps `@agenticmail/core` dependency-light.
|
|
2281
|
+
*/
|
|
2282
|
+
/**
|
|
2283
|
+
* Compute the Twilio request signature for a webhook request.
|
|
2284
|
+
*
|
|
2285
|
+
* Twilio's scheme (per its Security docs): take the full request URL,
|
|
2286
|
+
* then for an `application/x-www-form-urlencoded` POST append every
|
|
2287
|
+
* POST parameter — sorted by key — as `key` immediately followed by
|
|
2288
|
+
* `value`, with no separators. HMAC-SHA1 that string keyed by the
|
|
2289
|
+
* account `AuthToken`, and base64-encode the digest. Twilio sends the
|
|
2290
|
+
* result in the `X-Twilio-Signature` header.
|
|
2291
|
+
*
|
|
2292
|
+
* > Verify the exact construction against Twilio's current Security
|
|
2293
|
+
* > documentation before the live smoke-test.
|
|
2294
|
+
*
|
|
2295
|
+
* @param authToken the Twilio account AuthToken (the signing key)
|
|
2296
|
+
* @param url the full URL Twilio requested, exactly as configured
|
|
2297
|
+
* @param params the POST body parameters (form-encoded fields)
|
|
2298
|
+
*/
|
|
2299
|
+
declare function buildTwilioSignature(authToken: string, url: string, params?: Record<string, string>): string;
|
|
2300
|
+
/**
|
|
2301
|
+
* Validate an `X-Twilio-Signature` header against the request, timing-
|
|
2302
|
+
* safe. Fails closed: a missing/empty signature or token, or any
|
|
2303
|
+
* mismatch, returns `false` — never throws. The comparison is constant-
|
|
2304
|
+
* time so a caller cannot probe the expected signature byte by byte.
|
|
2305
|
+
*/
|
|
2306
|
+
declare function validateTwilioSignature(authToken: string, url: string, params: Record<string, string>, providedSignature: string): boolean;
|
|
2307
|
+
/**
|
|
2308
|
+
* XML-escape a string for safe inclusion in a TwiML attribute or text
|
|
2309
|
+
* node. TwiML is XML, so any value we interpolate (a websocket URL with
|
|
2310
|
+
* query params, a `<Parameter>` value) must have its metacharacters
|
|
2311
|
+
* escaped or a stray `&`/`"` would produce malformed XML.
|
|
2312
|
+
*/
|
|
2313
|
+
declare function escapeXml(value: string): string;
|
|
2314
|
+
interface TwilioStreamTwiMLOptions {
|
|
2315
|
+
/** The `wss://…` URL Twilio should open a Media Stream to. */
|
|
2316
|
+
streamUrl: string;
|
|
2317
|
+
/**
|
|
2318
|
+
* `<Parameter>` name/value pairs nested in the `<Stream>` — Twilio
|
|
2319
|
+
* echoes them back inside the media-stream `start` event's
|
|
2320
|
+
* `customParameters`. Used to carry the mission id / token so the
|
|
2321
|
+
* media socket can be matched to its phone mission even though the
|
|
2322
|
+
* `<Stream>` URL itself is fixed.
|
|
2323
|
+
*/
|
|
2324
|
+
parameters?: Record<string, string>;
|
|
2325
|
+
}
|
|
2326
|
+
/**
|
|
2327
|
+
* Build the TwiML returned from the Twilio voice webhook: a
|
|
2328
|
+
* `<Connect><Stream>` document that hands the live call audio to our
|
|
2329
|
+
* realtime voice WebSocket.
|
|
2330
|
+
*
|
|
2331
|
+
* <?xml version="1.0" encoding="UTF-8"?>
|
|
2332
|
+
* <Response>
|
|
2333
|
+
* <Connect>
|
|
2334
|
+
* <Stream url="wss://host/path">
|
|
2335
|
+
* <Parameter name="missionId" value="…"/>
|
|
2336
|
+
* </Stream>
|
|
2337
|
+
* </Connect>
|
|
2338
|
+
* </Response>
|
|
2339
|
+
*
|
|
2340
|
+
* `<Connect><Stream>` (as opposed to `<Start><Stream>`) makes the
|
|
2341
|
+
* stream bidirectional and keeps the call alive for its duration — the
|
|
2342
|
+
* call ends when the stream ends. Every interpolated value is
|
|
2343
|
+
* XML-escaped.
|
|
2344
|
+
*
|
|
2345
|
+
* > The `<Connect><Stream>` / `<Parameter>` TwiML verbs are per
|
|
2346
|
+
* > Twilio's public Media Streams docs; verify against current docs
|
|
2347
|
+
* > before the live smoke-test.
|
|
2348
|
+
*/
|
|
2349
|
+
declare function buildTwilioStreamTwiML(opts: TwilioStreamTwiMLOptions): string;
|
|
2350
|
+
/**
|
|
2351
|
+
* Build a minimal `<Response><Say>…</Say></Response>` TwiML document —
|
|
2352
|
+
* the fallback Twilio voice response when the realtime voice runtime
|
|
2353
|
+
* cannot be connected (e.g. no OpenAI key). Mirrors the 46elks
|
|
2354
|
+
* voice-start `play` action.
|
|
2355
|
+
*/
|
|
2356
|
+
declare function buildTwilioSayTwiML(message: string): string;
|
|
2357
|
+
|
|
2358
|
+
/**
|
|
2359
|
+
* Realtime voice WebSocket endpoint paths.
|
|
2360
|
+
*
|
|
2361
|
+
* These are the URL paths a phone carrier's media socket connects to.
|
|
2362
|
+
* They live in `@agenticmail/core` (rather than only in the API
|
|
2363
|
+
* package) because the {@link PhoneManager} needs the Twilio path to
|
|
2364
|
+
* build the `<Stream url>` inside the TwiML it returns from the Twilio
|
|
2365
|
+
* voice webhook. The API package mounts its WebSocket servers on the
|
|
2366
|
+
* same constants, so the path is defined exactly once.
|
|
2367
|
+
*/
|
|
2368
|
+
/**
|
|
2369
|
+
* Path the 46elks websocket-number's `websocket_url` points at. 46elks
|
|
2370
|
+
* resolves the mission from the `hello` frame's `callid`, so the path
|
|
2371
|
+
* carries no mission identity itself.
|
|
2372
|
+
*/
|
|
2373
|
+
declare const ELKS_REALTIME_WS_PATH = "/api/agenticmail/calls/realtime";
|
|
2374
|
+
/**
|
|
2375
|
+
* Path a Twilio `<Connect><Stream>` connects to. The mission id + token
|
|
2376
|
+
* ride as query params (and as `<Parameter>` values in the TwiML), so
|
|
2377
|
+
* the media socket can be matched to its mission before the Twilio
|
|
2378
|
+
* `start` frame even arrives.
|
|
2379
|
+
*/
|
|
2380
|
+
declare const TWILIO_REALTIME_WS_PATH = "/api/agenticmail/calls/twilio-stream";
|
|
2381
|
+
|
|
2382
|
+
/**
|
|
2383
|
+
* Realtime transport adapter — the provider-pluggable seam of the
|
|
2384
|
+
* realtime voice bridge.
|
|
2385
|
+
*
|
|
2386
|
+
* `RealtimeVoiceBridge` is transport-agnostic on the *socket* side
|
|
2387
|
+
* (it only ever sees abstract {@link RealtimeBridgePort}s), but the
|
|
2388
|
+
* *messages* on the carrier side are provider-specific: 46elks speaks
|
|
2389
|
+
* `hello`/`audio`/`bye` JSON over linear PCM, while Twilio Media Streams
|
|
2390
|
+
* speaks `connected`/`start`/`media`/`stop` JSON over G.711 µ-law.
|
|
2391
|
+
*
|
|
2392
|
+
* A {@link RealtimeTransportAdapter} captures exactly those differences:
|
|
2393
|
+
* - how to interpret one inbound carrier frame ({@link parseInbound}),
|
|
2394
|
+
* - how to build the outbound control frames (handshake / audio /
|
|
2395
|
+
* interrupt / bye),
|
|
2396
|
+
* - and which OpenAI Realtime audio format the carrier's audio needs.
|
|
2397
|
+
*
|
|
2398
|
+
* Everything else — the OpenAI session lifecycle, function calling,
|
|
2399
|
+
* barge-in handling, transcript accumulation, teardown — is identical
|
|
2400
|
+
* across providers and lives once in the bridge. This is the
|
|
2401
|
+
* "generalise the bridge" design: one bridge, one OpenAI side, a thin
|
|
2402
|
+
* per-provider adapter, no duplicated conversation logic.
|
|
2403
|
+
*
|
|
2404
|
+
* Adapters are pure (no sockets, no I/O), so the bridge stays fully
|
|
2405
|
+
* unit-testable and `@agenticmail/core` stays dependency-light.
|
|
2406
|
+
*/
|
|
2407
|
+
|
|
2408
|
+
/** Provider identifier — kept in lockstep with `PhoneTransportProvider`. */
|
|
2409
|
+
type RealtimeTransportProvider = '46elks' | 'twilio';
|
|
2410
|
+
/**
|
|
2411
|
+
* A normalised inbound carrier event. Provider message shapes are
|
|
2412
|
+
* collapsed into this small, bridge-facing vocabulary so the bridge
|
|
2413
|
+
* never has to branch on the provider:
|
|
2414
|
+
* - `hello` — the call leg is live; carries the provider call id.
|
|
2415
|
+
* (46elks `hello`; Twilio `start`. Twilio's `connected`
|
|
2416
|
+
* frame is internal handshake noise → `ignore`.)
|
|
2417
|
+
* - `audio` — one inbound audio frame (base64), to relay to OpenAI.
|
|
2418
|
+
* - `bye` — the caller side ended the call.
|
|
2419
|
+
* - `ignore` — a known-but-uninteresting frame (e.g. Twilio `mark`).
|
|
2420
|
+
*/
|
|
2421
|
+
type RealtimeInboundEvent = {
|
|
2422
|
+
kind: 'hello';
|
|
2423
|
+
callId: string;
|
|
2424
|
+
from?: string;
|
|
2425
|
+
to?: string;
|
|
2426
|
+
} | {
|
|
2427
|
+
kind: 'audio';
|
|
2428
|
+
data: string;
|
|
2429
|
+
} | {
|
|
2430
|
+
kind: 'bye';
|
|
2431
|
+
reason?: string;
|
|
2432
|
+
message?: string;
|
|
2433
|
+
} | {
|
|
2434
|
+
kind: 'ignore';
|
|
2435
|
+
};
|
|
2436
|
+
/**
|
|
2437
|
+
* The provider-specific seam the bridge drives. One instance per call.
|
|
2438
|
+
* Adapters that need per-call state (Twilio's `streamSid`) are
|
|
2439
|
+
* stateful — {@link parseInbound} latches that state from the `hello`
|
|
2440
|
+
* frame and the outbound builders read it back.
|
|
2441
|
+
*/
|
|
2442
|
+
interface RealtimeTransportAdapter {
|
|
2443
|
+
/** Provider this adapter speaks for — used only for log/transcript text. */
|
|
2444
|
+
readonly provider: RealtimeTransportProvider;
|
|
2445
|
+
/**
|
|
2446
|
+
* Prefix for the carrier-side `onEnd` reason strings (`<prefix>-bye`,
|
|
2447
|
+
* `<prefix>-closed`, `<prefix>-error`). The 46elks adapter keeps the
|
|
2448
|
+
* historical `elks` prefix so existing call sites + tests that match
|
|
2449
|
+
* on `elks-bye` / `elks-closed` stay correct; Twilio uses `twilio`.
|
|
2450
|
+
*/
|
|
2451
|
+
readonly endReasonPrefix: string;
|
|
2452
|
+
/**
|
|
2453
|
+
* The OpenAI Realtime audio format the carrier's audio requires.
|
|
2454
|
+
* 46elks linear PCM → `audio/pcm` @ 24 kHz; Twilio G.711 µ-law →
|
|
2455
|
+
* `audio/pcmu` @ 8 kHz. Used by `buildRealtimeSessionConfig` so the
|
|
2456
|
+
* OpenAI session in/out format matches the carrier with no transcode.
|
|
2457
|
+
*/
|
|
2458
|
+
readonly openaiAudioFormat: {
|
|
2459
|
+
type: string;
|
|
2460
|
+
rate?: number;
|
|
2461
|
+
};
|
|
2462
|
+
/**
|
|
2463
|
+
* Normalise one raw inbound carrier frame. Throws on a malformed
|
|
2464
|
+
* frame (the bridge catches the throw and ignores that frame). A
|
|
2465
|
+
* well-formed but uninteresting frame returns `{ kind: 'ignore' }`.
|
|
2466
|
+
*/
|
|
2467
|
+
parseInbound(raw: string | Record<string, unknown>): RealtimeInboundEvent;
|
|
2468
|
+
/**
|
|
2469
|
+
* Frames to send the carrier the moment the call leg is live (right
|
|
2470
|
+
* after the `hello` event). 46elks needs a `listening`+`sending`
|
|
2471
|
+
* format handshake; Twilio needs nothing, so this is `[]`.
|
|
2472
|
+
*/
|
|
2473
|
+
buildHandshake(): Record<string, unknown>[];
|
|
2474
|
+
/** Build one outbound audio frame (synthesised agent speech, base64). */
|
|
2475
|
+
buildAudio(base64: string): Record<string, unknown>;
|
|
2476
|
+
/**
|
|
2477
|
+
* Build the barge-in frame — tells the carrier to drop buffered
|
|
2478
|
+
* playback so the agent stops mid-sentence. 46elks `interrupt`;
|
|
2479
|
+
* Twilio `clear`.
|
|
2480
|
+
*/
|
|
2481
|
+
buildInterrupt(): Record<string, unknown>;
|
|
2482
|
+
/**
|
|
2483
|
+
* Build the end-of-call frame for the carrier, or `null` if the
|
|
2484
|
+
* carrier has none (Twilio has no client-initiated stream-stop frame;
|
|
2485
|
+
* the bridge just closes the socket).
|
|
2486
|
+
*/
|
|
2487
|
+
buildBye(): Record<string, unknown> | null;
|
|
2488
|
+
}
|
|
2489
|
+
/**
|
|
2490
|
+
* The 46elks realtime-media adapter — the original transport, behaviour
|
|
2491
|
+
* unchanged. Audio is linear PCM, so the OpenAI session uses
|
|
2492
|
+
* `audio/pcm` @ 24 kHz (matching 46elks `pcm_24000`).
|
|
2493
|
+
*/
|
|
2494
|
+
declare class ElksRealtimeTransport implements RealtimeTransportAdapter {
|
|
2495
|
+
private readonly listenFormat;
|
|
2496
|
+
private readonly sendFormat;
|
|
2497
|
+
readonly provider: "46elks";
|
|
2498
|
+
readonly endReasonPrefix = "elks";
|
|
2499
|
+
readonly openaiAudioFormat: {
|
|
2500
|
+
type: string;
|
|
2501
|
+
};
|
|
2502
|
+
constructor(listenFormat?: ElksRealtimeAudioFormat, sendFormat?: ElksRealtimeAudioFormat);
|
|
2503
|
+
parseInbound(raw: string | Record<string, unknown>): RealtimeInboundEvent;
|
|
2504
|
+
buildHandshake(): Record<string, unknown>[];
|
|
2505
|
+
buildAudio(base64: string): Record<string, unknown>;
|
|
2506
|
+
buildInterrupt(): Record<string, unknown>;
|
|
2507
|
+
buildBye(): Record<string, unknown>;
|
|
2508
|
+
}
|
|
2509
|
+
/**
|
|
2510
|
+
* The Twilio Media Streams adapter. Audio is G.711 µ-law @ 8 kHz, which
|
|
2511
|
+
* the OpenAI GA Realtime API speaks natively (`audio/pcmu`) — so the
|
|
2512
|
+
* bridge does NO transcoding for a Twilio call.
|
|
2513
|
+
*
|
|
2514
|
+
* Stateful per call: Twilio assigns a `streamSid` in the `start` frame
|
|
2515
|
+
* that EVERY outbound frame must echo. {@link parseInbound} latches it
|
|
2516
|
+
* on `start`; the outbound builders read it back. Building an outbound
|
|
2517
|
+
* frame before `start` throws — but the bridge never does that (it only
|
|
2518
|
+
* sends audio after the `hello` event, which `start` produces).
|
|
2519
|
+
*/
|
|
2520
|
+
declare class TwilioRealtimeTransport implements RealtimeTransportAdapter {
|
|
2521
|
+
readonly provider: "twilio";
|
|
2522
|
+
readonly endReasonPrefix = "twilio";
|
|
2523
|
+
readonly openaiAudioFormat: {
|
|
2524
|
+
type: string;
|
|
2525
|
+
};
|
|
2526
|
+
/** Latched from the Twilio `start` frame; required on every outbound. */
|
|
2527
|
+
private streamSid;
|
|
2528
|
+
/** The active `streamSid`, once the `start` frame has been seen. */
|
|
2529
|
+
get currentStreamSid(): string;
|
|
2530
|
+
parseInbound(raw: string | Record<string, unknown>): RealtimeInboundEvent;
|
|
2531
|
+
buildHandshake(): Record<string, unknown>[];
|
|
2532
|
+
buildAudio(base64: string): Record<string, unknown>;
|
|
2533
|
+
buildInterrupt(): Record<string, unknown>;
|
|
2534
|
+
buildBye(): Record<string, unknown> | null;
|
|
2535
|
+
}
|
|
2536
|
+
/** Construct the transport adapter for a provider. */
|
|
2537
|
+
declare function createRealtimeTransport(provider: RealtimeTransportProvider): RealtimeTransportAdapter;
|
|
2538
|
+
|
|
2539
|
+
/**
|
|
2540
|
+
* Realtime voice tools — the tool-using layer on top of the v0.9.52
|
|
2541
|
+
* audio bridge (see `realtime-bridge.ts`).
|
|
2542
|
+
*
|
|
2543
|
+
* # What this file is
|
|
2544
|
+
*
|
|
2545
|
+
* v0.9.52 wired a 46elks ⇄ OpenAI Realtime audio bridge that could only
|
|
2546
|
+
* *converse*. v0.9.53 turns the voice agent into something that can
|
|
2547
|
+
* *act* on a live call: ask the human operator a question, look things
|
|
2548
|
+
* up, tell the time. The enabler is OpenAI Realtime **function
|
|
2549
|
+
* calling** — tools declared in `session.tools`, called by the model
|
|
2550
|
+
* mid-call, dispatched here, and answered back over the wire.
|
|
2551
|
+
*
|
|
2552
|
+
* This module is the transport-agnostic half of that:
|
|
2553
|
+
* - {@link RealtimeToolDefinition} — the JSON-schema tool shapes the
|
|
2554
|
+
* bridge declares to OpenAI.
|
|
2555
|
+
* - {@link ToolExecutor} — the interface the bridge dispatches calls
|
|
2556
|
+
* into; {@link createToolExecutor} builds one from a handler map.
|
|
2557
|
+
* - The *pure / fast* tool implementations ({@link getDatetime},
|
|
2558
|
+
* {@link recallMemory}, {@link webSearch}) — no sockets, no DB
|
|
2559
|
+
* handles passed in directly, fully unit-testable.
|
|
2560
|
+
* - {@link pollForOperatorAnswer} — the poll loop `ask_operator` uses
|
|
2561
|
+
* to block (with an injectable clock + sleep) until the operator
|
|
2562
|
+
* answers or the hard timeout elapses.
|
|
2563
|
+
* - {@link parseOperatorQueryReply} — parses an operator's *email*
|
|
2564
|
+
* reply back into a query answer (the channel-agnostic default
|
|
2565
|
+
* notifier path, see the plan §5).
|
|
2566
|
+
*
|
|
2567
|
+
* The *side-effecting* wiring — actually recording an operator query on
|
|
2568
|
+
* a mission, sending the notification email, calling a web-search API —
|
|
2569
|
+
* lives in `@agenticmail/api`'s `realtime-ws.ts`, which builds a
|
|
2570
|
+
* {@link ToolExecutor} per connection. Keeping that split means
|
|
2571
|
+
* `@agenticmail/core` stays dependency-light and every piece here is
|
|
2572
|
+
* testable with fakes.
|
|
2573
|
+
*
|
|
2574
|
+
* > NOTE on OpenAI Realtime GA event/field names: the function-calling
|
|
2575
|
+
* > wire shapes (`session.tools`, `tool_choice`, `function_call_output`,
|
|
2576
|
+
* > `response.function_call_arguments.done`) follow the plan. Any name
|
|
2577
|
+
* > that could not be verified against the v0.9.52 code is flagged with
|
|
2578
|
+
* > a comment in `realtime-bridge.ts` — verify against current OpenAI
|
|
2579
|
+
* > docs before the live smoke test (same discipline as v0.9.52's
|
|
2580
|
+
* > `response.output_audio.delta` vs legacy `response.audio.delta`).
|
|
2581
|
+
*/
|
|
2582
|
+
/**
|
|
2583
|
+
* A function tool as declared in the OpenAI Realtime `session.tools`
|
|
2584
|
+
* array. The shape is the GA `gpt-realtime` function-tool schema:
|
|
2585
|
+
* `{ type: 'function', name, description, parameters: <JSON Schema> }`.
|
|
2586
|
+
*/
|
|
2587
|
+
interface RealtimeToolDefinition {
|
|
2588
|
+
type: 'function';
|
|
2589
|
+
name: string;
|
|
2590
|
+
description: string;
|
|
2591
|
+
parameters: {
|
|
2592
|
+
type: 'object';
|
|
2593
|
+
properties: Record<string, unknown>;
|
|
2594
|
+
required?: string[];
|
|
2595
|
+
additionalProperties?: boolean;
|
|
2596
|
+
};
|
|
2597
|
+
}
|
|
2598
|
+
/** A function call parsed off the wire and ready to dispatch. */
|
|
2599
|
+
interface RealtimeToolCall {
|
|
2600
|
+
/** OpenAI `call_id` — echoed back on the `function_call_output`. */
|
|
2601
|
+
callId: string;
|
|
2602
|
+
/** The tool name the model chose. */
|
|
2603
|
+
name: string;
|
|
2604
|
+
/** Parsed `arguments` object (`{}` if the model sent none / invalid). */
|
|
2605
|
+
arguments: Record<string, unknown>;
|
|
2606
|
+
}
|
|
2607
|
+
/** The result of executing a tool — `output` is a string for the model. */
|
|
2608
|
+
interface RealtimeToolResult {
|
|
2609
|
+
/** Sent back verbatim as the `function_call_output` `output` field. */
|
|
2610
|
+
output: string;
|
|
2611
|
+
}
|
|
2612
|
+
/**
|
|
2613
|
+
* Dispatches a model tool call to its implementation. The bridge holds
|
|
2614
|
+
* one of these (injected) and calls {@link execute} for every
|
|
2615
|
+
* function call; it must always resolve (never reject) — a failing
|
|
2616
|
+
* tool resolves to an `output` the model can read and recover from.
|
|
2617
|
+
*/
|
|
2618
|
+
interface ToolExecutor {
|
|
2619
|
+
execute(call: RealtimeToolCall): Promise<RealtimeToolResult>;
|
|
2620
|
+
}
|
|
2621
|
+
/**
|
|
2622
|
+
* One tool's implementation. Receives the parsed arguments + the raw
|
|
2623
|
+
* call. May return a plain string or a value that is JSON-stringified.
|
|
2624
|
+
* It MAY throw — {@link createToolExecutor} catches and turns a thrown
|
|
2625
|
+
* error into a model-readable output, so a buggy tool never wedges the
|
|
2626
|
+
* call.
|
|
2627
|
+
*/
|
|
2628
|
+
type RealtimeToolHandler = (args: Record<string, unknown>, call: RealtimeToolCall) => Promise<string | Record<string, unknown>> | string | Record<string, unknown>;
|
|
2629
|
+
/**
|
|
2630
|
+
* Hard ceiling on how long `ask_operator` blocks waiting for an answer.
|
|
2631
|
+
* Per Ope (2026-05-19): a human caller will hold ~5 minutes; past that
|
|
2632
|
+
* the graceful path is a callback (plan §7), not an indefinite hold.
|
|
2633
|
+
*/
|
|
2634
|
+
declare const OPERATOR_QUERY_TIMEOUT_MS: number;
|
|
2635
|
+
/** How often `ask_operator` re-checks the query record for an answer. */
|
|
2636
|
+
declare const OPERATOR_QUERY_POLL_INTERVAL_MS = 3000;
|
|
2637
|
+
/**
|
|
2638
|
+
* Returned to the model as the `ask_operator` tool output when the
|
|
2639
|
+
* operator did not answer in time. Phrased as an instruction the model
|
|
2640
|
+
* can act on — it must NOT invent an answer, it should tell the caller
|
|
2641
|
+
* a follow-up / callback is coming (the bridge + API handle the actual
|
|
2642
|
+
* callback-on-disconnect, see the plan §7).
|
|
2643
|
+
*/
|
|
2644
|
+
declare const OPERATOR_QUERY_TIMEOUT_SENTINEL: string;
|
|
2645
|
+
/**
|
|
2646
|
+
* Subject-line tag used by the email notifier. The query id is embedded
|
|
2647
|
+
* so an operator's *reply* can be parsed straight back into an answer
|
|
2648
|
+
* ({@link parseOperatorQueryReply}) — the channel-agnostic default path.
|
|
2649
|
+
*/
|
|
2650
|
+
declare const OPERATOR_QUERY_SUBJECT_TAG = "AgenticMail Operator Query";
|
|
2651
|
+
/**
|
|
2652
|
+
* Phase 1 keystone — human-in-the-loop. The model calls this when it
|
|
2653
|
+
* needs information, a decision, or approval it does not have. It can
|
|
2654
|
+
* take minutes to answer, so the model is instructed (see
|
|
2655
|
+
* {@link buildRealtimeToolGuidance}) to put the caller on hold first.
|
|
2656
|
+
*/
|
|
2657
|
+
declare const ASK_OPERATOR_TOOL: RealtimeToolDefinition;
|
|
2658
|
+
/** Phase 2 — a web search. Returns the top results as text. */
|
|
2659
|
+
declare const WEB_SEARCH_TOOL: RealtimeToolDefinition;
|
|
2660
|
+
/** Phase 2 — searches the agent's own persistent memory. */
|
|
2661
|
+
declare const RECALL_MEMORY_TOOL: RealtimeToolDefinition;
|
|
2662
|
+
/** Phase 2 — the current date/time, for resolving "tomorrow" etc. */
|
|
2663
|
+
declare const GET_DATETIME_TOOL: RealtimeToolDefinition;
|
|
2664
|
+
/**
|
|
2665
|
+
* Phase 2 (optional) — searches the agent's AgenticMail inbox. Defined
|
|
2666
|
+
* here so the schema is canonical, but NOT wired into the default
|
|
2667
|
+
* executor in `realtime-ws.ts` (it needs IMAP access); a deployment can
|
|
2668
|
+
* opt in. Kept in the file so the tool surface is documented in one place.
|
|
2669
|
+
*/
|
|
2670
|
+
declare const SEARCH_EMAIL_TOOL: RealtimeToolDefinition;
|
|
2671
|
+
/** Every tool defined in this module, keyed by name. */
|
|
2672
|
+
declare const REALTIME_TOOL_DEFINITIONS: Record<string, RealtimeToolDefinition>;
|
|
2673
|
+
/**
|
|
2674
|
+
* Build the natural-language guidance appended to the Realtime session
|
|
2675
|
+
* `instructions` when tools are present. This is the *model-side* half
|
|
2676
|
+
* of "keep the line warm" (plan §6): the model announces a hold before
|
|
2677
|
+
* a slow tool and reassures the caller while it waits. The bridge-side
|
|
2678
|
+
* safety net (the tool-call timeout) is in `realtime-bridge.ts`.
|
|
2679
|
+
*/
|
|
2680
|
+
declare function buildRealtimeToolGuidance(tools: readonly RealtimeToolDefinition[]): string;
|
|
2681
|
+
/**
|
|
2682
|
+
* Build a {@link ToolExecutor} from a `name → handler` map. The
|
|
2683
|
+
* executor:
|
|
2684
|
+
* - resolves an unknown tool name to a model-readable "not available"
|
|
2685
|
+
* output (never rejects),
|
|
2686
|
+
* - catches a thrown / rejected handler and turns it into a
|
|
2687
|
+
* model-readable failure output,
|
|
2688
|
+
* - JSON-stringifies a non-string handler return.
|
|
2689
|
+
*
|
|
2690
|
+
* So a single buggy or missing tool can never crash the bridge or
|
|
2691
|
+
* wedge the call — the model just gets an output it can recover from.
|
|
2692
|
+
*/
|
|
2693
|
+
declare function createToolExecutor(handlers: Record<string, RealtimeToolHandler>): ToolExecutor;
|
|
2694
|
+
interface GetDatetimeOptions {
|
|
2695
|
+
/** IANA timezone; defaults to UTC. */
|
|
2696
|
+
timezone?: string;
|
|
2697
|
+
/** Clock override for tests. */
|
|
2698
|
+
now?: Date;
|
|
2699
|
+
}
|
|
2700
|
+
/**
|
|
2701
|
+
* `get_datetime` — current date/time as a human-readable line plus the
|
|
2702
|
+
* exact ISO timestamp. Pure: an injectable clock makes it deterministic
|
|
2703
|
+
* in tests. An invalid timezone falls back to UTC rather than throwing.
|
|
2704
|
+
*/
|
|
2705
|
+
declare function getDatetime(options?: GetDatetimeOptions): string;
|
|
2706
|
+
/**
|
|
2707
|
+
* Minimal structural interface for the bit of `AgentMemoryManager`
|
|
2708
|
+
* {@link recallMemory} needs. Declared here so this module does not
|
|
2709
|
+
* have to import the full memory manager — `AgentMemoryManager`
|
|
2710
|
+
* satisfies it structurally.
|
|
2711
|
+
*/
|
|
2712
|
+
interface MemoryRecaller {
|
|
2713
|
+
recall(agentId: string, query: string, limit?: number): Promise<Array<{
|
|
2714
|
+
title: string;
|
|
2715
|
+
content: string;
|
|
2716
|
+
}>>;
|
|
2717
|
+
}
|
|
2718
|
+
/**
|
|
2719
|
+
* `recall_memory` — query the agent's own persistent memory. Returns a
|
|
2720
|
+
* compact numbered list, or a clear "nothing found" line so the model
|
|
2721
|
+
* does not stall on an empty result.
|
|
2722
|
+
*/
|
|
2723
|
+
declare function recallMemory(memory: MemoryRecaller, agentId: string, query: string, limit?: number): Promise<string>;
|
|
2724
|
+
interface WebSearchOptions {
|
|
2725
|
+
/**
|
|
2726
|
+
* Search endpoint. Defaults to the DuckDuckGo HTML endpoint
|
|
2727
|
+
* ({@link DEFAULT_WEB_SEARCH_ENDPOINT}) — free, no API key, per the
|
|
2728
|
+
* scope decision (plan §13.1). Overridable for tests / a mirror.
|
|
2729
|
+
*/
|
|
2730
|
+
endpoint?: string;
|
|
2731
|
+
/** `fetch` override for tests. */
|
|
2732
|
+
fetchFn?: typeof fetch;
|
|
2733
|
+
/** Max results to fold into the output (default 5, capped at 10). */
|
|
2734
|
+
maxResults?: number;
|
|
2735
|
+
}
|
|
2736
|
+
/**
|
|
2737
|
+
* Default web-search endpoint — DuckDuckGo's keyless HTML results page.
|
|
2738
|
+
* Chosen because it needs no API key or account (plan §13.1: "web_search
|
|
2739
|
+
* → DuckDuckGo only, free, no key").
|
|
2740
|
+
*/
|
|
2741
|
+
declare const DEFAULT_WEB_SEARCH_ENDPOINT = "https://html.duckduckgo.com/html/";
|
|
2742
|
+
/**
|
|
2743
|
+
* Untrusted-content marker prefixed to every non-empty `web_search`
|
|
2744
|
+
* result block (v0.9.53 security review).
|
|
2745
|
+
*
|
|
2746
|
+
* Web-search results are scraped page titles + snippets — attacker-
|
|
2747
|
+
* controllable text that the model consumes verbatim as a
|
|
2748
|
+
* `function_call_output`. A page that ranks for the caller's query can
|
|
2749
|
+
* plant instructions in its `<title>` or snippet; without an explicit
|
|
2750
|
+
* delimiter the model may read them as commands rather than data — the
|
|
2751
|
+
* classic search-result prompt-injection vector. This marker tells the
|
|
2752
|
+
* model the block is strictly reference data and that any instructions
|
|
2753
|
+
* inside it must be ignored.
|
|
2754
|
+
*
|
|
2755
|
+
* (The enterprise web-search tool wrapped results the same way via
|
|
2756
|
+
* `wrapWebContent` / `externalContent.untrusted`; the fresh DuckDuckGo
|
|
2757
|
+
* helper here re-establishes that defense without copying the file.)
|
|
2758
|
+
*/
|
|
2759
|
+
declare const WEB_SEARCH_UNTRUSTED_PREFIX: string;
|
|
2760
|
+
/**
|
|
2761
|
+
* `web_search` — a keyless DuckDuckGo lookup returning the top results
|
|
2762
|
+
* as text. Reimplemented fresh as a small HTML-scrape helper (plan
|
|
2763
|
+
* §13.1 / the host's provenance note: do not copy the enterprise
|
|
2764
|
+
* file). No API key is needed, so unlike the other tools this one is
|
|
2765
|
+
* always available.
|
|
2766
|
+
*
|
|
2767
|
+
* Fails *soft*: a non-OK response, an unreadable body, or a thrown
|
|
2768
|
+
* fetch all resolve to a model-readable line rather than throwing — a
|
|
2769
|
+
* search outage must never kill a live call.
|
|
2770
|
+
*
|
|
2771
|
+
* > DuckDuckGo's HTML page is an unversioned scrape target, not a
|
|
2772
|
+
* > documented API: the `result__a` / `result__snippet` selectors and
|
|
2773
|
+
* > the `/l/?uddg=` redirect wrapper below match the endpoint today;
|
|
2774
|
+
* > verify them before the live smoke test (same "verify the wire
|
|
2775
|
+
* > shape" discipline as the OpenAI Realtime event names).
|
|
2776
|
+
*/
|
|
2777
|
+
declare function webSearch(query: string, options?: WebSearchOptions): Promise<string>;
|
|
2778
|
+
interface OperatorQueryPollOptions {
|
|
2779
|
+
/** Hard timeout (default {@link OPERATOR_QUERY_TIMEOUT_MS}). */
|
|
2780
|
+
timeoutMs?: number;
|
|
2781
|
+
/** Poll interval (default {@link OPERATOR_QUERY_POLL_INTERVAL_MS}). */
|
|
2782
|
+
pollIntervalMs?: number;
|
|
2783
|
+
/** Clock override for tests (returns ms). */
|
|
2784
|
+
now?: () => number;
|
|
2785
|
+
/** Sleep override for tests. */
|
|
2786
|
+
sleep?: (ms: number) => Promise<void>;
|
|
2787
|
+
/** Abort handle — when `aborted` flips true the poll resolves null. */
|
|
2788
|
+
signal?: {
|
|
2789
|
+
readonly aborted: boolean;
|
|
2790
|
+
};
|
|
2791
|
+
}
|
|
2792
|
+
/**
|
|
2793
|
+
* Poll `readAnswer` until it yields a non-empty answer or the timeout
|
|
2794
|
+
* elapses. Returns the trimmed answer, or `null` on timeout / abort.
|
|
2795
|
+
*
|
|
2796
|
+
* This is the loop `ask_operator` runs to *block* the tool call while
|
|
2797
|
+
* it waits for the human. The clock + sleep are injectable so tests run
|
|
2798
|
+
* the full timeout path in microseconds. The `signal` lets the caller
|
|
2799
|
+
* abandon the wait early — e.g. the call dropped, so there is no point
|
|
2800
|
+
* polling (the unanswered query then drives callback-on-disconnect).
|
|
2801
|
+
*/
|
|
2802
|
+
declare function pollForOperatorAnswer(readAnswer: () => Promise<string | null | undefined> | string | null | undefined, options?: OperatorQueryPollOptions): Promise<string | null>;
|
|
2803
|
+
/**
|
|
2804
|
+
* Build the notification email subject for an operator query. The query
|
|
2805
|
+
* id is embedded in a `[tag id]` token so the operator's reply (subject
|
|
2806
|
+
* `Re: [tag id] …`) can be parsed straight back into an answer.
|
|
2807
|
+
*/
|
|
2808
|
+
declare function operatorQuerySubject(queryId: string, callContext?: string): string;
|
|
2809
|
+
/**
|
|
2810
|
+
* Parse an operator's email reply into `{ queryId, answer }`, or `null`
|
|
2811
|
+
* if the email is not an operator-query reply (no id token in the
|
|
2812
|
+
* subject) or carries no usable answer text.
|
|
2813
|
+
*
|
|
2814
|
+
* This is the pure half of the channel-agnostic notifier (plan §5): the
|
|
2815
|
+
* default notifier emails the operator; their reply lands back in the
|
|
2816
|
+
* agent's inbox; the inbound mail hook runs this and posts the answer
|
|
2817
|
+
* to the same query record the HTTP endpoint writes to.
|
|
2818
|
+
*/
|
|
2819
|
+
declare function parseOperatorQueryReply(input: {
|
|
2820
|
+
subject?: string;
|
|
2821
|
+
text?: string;
|
|
2822
|
+
}): {
|
|
2823
|
+
queryId: string;
|
|
2824
|
+
answer: string;
|
|
2825
|
+
} | null;
|
|
2826
|
+
/**
|
|
2827
|
+
* Extract the bare email address from a `From`-style value, lowercased
|
|
2828
|
+
* and trimmed. Accepts `"Name" <addr@host>`, `<addr@host>`, or a plain
|
|
2829
|
+
* `addr@host`; returns `''` for an empty / unusable input.
|
|
2830
|
+
*/
|
|
2831
|
+
declare function extractEmailAddress(value: string | null | undefined): string;
|
|
2832
|
+
/**
|
|
2833
|
+
* Fail-closed sender check for the operator email-reply answer path
|
|
2834
|
+
* (plan §5; added in the v0.9.53 security review).
|
|
2835
|
+
*
|
|
2836
|
+
* An operator-query answer arriving by email is gated by the query id
|
|
2837
|
+
* embedded in the subject — an unguessable 122-bit v4 UUID, but one that
|
|
2838
|
+
* rides in *plaintext* subject lines through quoting, forwarding, and
|
|
2839
|
+
* relay/provider logs. The id alone is therefore a materially weaker
|
|
2840
|
+
* gate than the HMAC per-mission token used on the phone webhook (which
|
|
2841
|
+
* only 46elks ever sees). So an emailed answer is accepted ONLY when its
|
|
2842
|
+
* `From` address matches the configured operator address (case-
|
|
2843
|
+
* insensitive, address-only).
|
|
2844
|
+
*
|
|
2845
|
+
* Returns `false` when no `operatorEmail` is configured — no operator
|
|
2846
|
+
* means nobody is trusted (fail closed), so the email-reply path is
|
|
2847
|
+
* simply inert on a deployment without one.
|
|
2848
|
+
*
|
|
2849
|
+
* NOTE: ultimate strength still depends on inbound SPF/DKIM rejecting a
|
|
2850
|
+
* spoofed `From`; this check closes the casual-leak path — a leaked or
|
|
2851
|
+
* forwarded subject token replied to from an arbitrary address.
|
|
2852
|
+
*/
|
|
2853
|
+
declare function isOperatorReplySender(from: string | null | undefined, operatorEmail: string | null | undefined): boolean;
|
|
2854
|
+
|
|
2855
|
+
/**
|
|
2856
|
+
* Realtime voice bridge — wires the OpenAI Realtime API to a phone
|
|
2857
|
+
* carrier's realtime-media WebSocket so a phone mission can actually
|
|
2858
|
+
* *converse*.
|
|
2859
|
+
*
|
|
2860
|
+
* # Shape of the integration
|
|
2861
|
+
*
|
|
2862
|
+
* caller ⇄ carrier ⇄ (carrier media WebSocket) ⇄ AgenticMail
|
|
2863
|
+
* │
|
|
2864
|
+
* RealtimeVoiceBridge
|
|
2865
|
+
* │
|
|
2866
|
+
* (OpenAI Realtime WebSocket)
|
|
2867
|
+
* │
|
|
2868
|
+
* gpt-realtime
|
|
2869
|
+
*
|
|
2870
|
+
* The carrier streams the live call audio to AgenticMail as JSON frames
|
|
2871
|
+
* (base64 audio); AgenticMail relays them to OpenAI as
|
|
2872
|
+
* `input_audio_buffer.append`; OpenAI streams synthesised speech back
|
|
2873
|
+
* as `response.output_audio.delta`; AgenticMail relays that to the
|
|
2874
|
+
* carrier. Server-side VAD on the OpenAI session handles turn-taking —
|
|
2875
|
+
* no manual commit / response.create.
|
|
2876
|
+
*
|
|
2877
|
+
* # Provider-pluggable transport
|
|
2878
|
+
*
|
|
2879
|
+
* The carrier side speaks a provider-specific wire protocol — 46elks
|
|
2880
|
+
* (`hello`/`audio`/`bye`, linear PCM) and Twilio Media Streams
|
|
2881
|
+
* (`connected`/`start`/`media`/`stop`, G.711 µ-law). Those differences
|
|
2882
|
+
* are isolated behind a {@link RealtimeTransportAdapter}: the bridge
|
|
2883
|
+
* itself — OpenAI session lifecycle, function calling, barge-in,
|
|
2884
|
+
* transcript, teardown — is identical across providers and lives here
|
|
2885
|
+
* once. The bridge defaults to the 46elks adapter, so existing callers
|
|
2886
|
+
* that pass no `transport` keep their exact prior behaviour.
|
|
2887
|
+
*
|
|
2888
|
+
* # Memory injection — the whole point
|
|
2889
|
+
*
|
|
2890
|
+
* Before the OpenAI session starts, the agent's persistent memory is
|
|
2891
|
+
* rendered (`AgentMemoryManager.generateMemoryContext()`) and folded
|
|
2892
|
+
* into the Realtime session `instructions`. The model is told to treat
|
|
2893
|
+
* that block as *its own* long-term knowledge — so on the call it acts
|
|
2894
|
+
* with full continuity, as if it had always known those things.
|
|
2895
|
+
*
|
|
2896
|
+
* # Why this file is transport-agnostic
|
|
2897
|
+
*
|
|
2898
|
+
* `RealtimeVoiceBridge` never touches a socket. It takes two abstract
|
|
2899
|
+
* {@link RealtimeBridgePort}s (one per side) and is driven by
|
|
2900
|
+
* `handle*Message` / `handle*Open` / `handle*Close` calls. The real
|
|
2901
|
+
* WebSocket plumbing lives in `@agenticmail/api` (which has the `ws`
|
|
2902
|
+
* dependency); tests drive the bridge with in-memory fake ports. This
|
|
2903
|
+
* keeps `@agenticmail/core` dependency-free and the bridge logic fully
|
|
2904
|
+
* unit-testable without a live OpenAI key or a carrier websocket number.
|
|
2905
|
+
*
|
|
2906
|
+
* The exact OpenAI Realtime wire shapes below are the GA `gpt-realtime`
|
|
2907
|
+
* protocol (session config nested under `audio.input` / `audio.output`,
|
|
2908
|
+
* `format` as an object, `response.output_audio.delta` for output). The
|
|
2909
|
+
* legacy beta output event name `response.audio.delta` is also handled
|
|
2910
|
+
* defensively — some `gpt-realtime` deployments still emit it.
|
|
2911
|
+
*/
|
|
2912
|
+
|
|
2913
|
+
/** OpenAI Realtime WebSocket base URL (model passed as `?model=`). */
|
|
2914
|
+
declare const OPENAI_REALTIME_URL = "wss://api.openai.com/v1/realtime";
|
|
2915
|
+
/** GA Realtime model. */
|
|
2916
|
+
declare const DEFAULT_REALTIME_MODEL = "gpt-realtime";
|
|
2917
|
+
/** Default GA Realtime voice. */
|
|
2918
|
+
declare const DEFAULT_REALTIME_VOICE = "marin";
|
|
2919
|
+
/** PCM sample rate shared by 46elks `pcm_24000` and the OpenAI session. */
|
|
2920
|
+
declare const REALTIME_AUDIO_SAMPLE_RATE = 24000;
|
|
2921
|
+
/**
|
|
2922
|
+
* #46-H1 — hard ceiling on a single inbound audio frame, measured in
|
|
2923
|
+
* base64 characters. Realtime frames are tiny (20–100 ms of audio); a
|
|
2924
|
+
* frame larger than this is either a buggy or a hostile peer trying to
|
|
2925
|
+
* push an unbounded allocation through the bridge. Oversized frames are
|
|
2926
|
+
* dropped, never forwarded. ~256 KiB of base64 ≈ 4 s of 24 kHz PCM16 —
|
|
2927
|
+
* far above any legitimate realtime frame, so this never trims real
|
|
2928
|
+
* speech, it only fences off abuse.
|
|
2929
|
+
*/
|
|
2930
|
+
declare const REALTIME_MAX_AUDIO_FRAME_BASE64: number;
|
|
2931
|
+
/**
|
|
2932
|
+
* Bridge-side safety net for a slow tool call (plan §6). The model-side
|
|
2933
|
+
* keeps the line warm (it announces a hold and reassures the caller);
|
|
2934
|
+
* this is the floor under that — if a tool's `execute()` never settles
|
|
2935
|
+
* (a hung `ask_operator`, a wedged search) the bridge still answers the
|
|
2936
|
+
* model after this long so the call cannot be wedged forever. It is set
|
|
2937
|
+
* deliberately ABOVE `OPERATOR_QUERY_TIMEOUT_MS` (5 min) so the tool's
|
|
2938
|
+
* own graceful timeout sentinel normally wins the race; this only fires
|
|
2939
|
+
* if the executor itself hangs.
|
|
2940
|
+
*/
|
|
2941
|
+
declare const REALTIME_TOOL_CALL_TIMEOUT_MS: number;
|
|
2942
|
+
interface RealtimeInstructionOptions {
|
|
2943
|
+
/** The concrete objective of this call. */
|
|
2944
|
+
task: string;
|
|
2945
|
+
/** Rendered agent memory block (from `generateMemoryContext()`). */
|
|
2946
|
+
memoryContext?: string;
|
|
2947
|
+
/** The agent's display name, used in the persona line. */
|
|
2948
|
+
agentName?: string;
|
|
2949
|
+
/** Override the default persona preamble. */
|
|
2950
|
+
persona?: string;
|
|
2951
|
+
/**
|
|
2952
|
+
* Natural-language tool-use guidance, appended as its own section.
|
|
2953
|
+
* Normally produced by `buildRealtimeToolGuidance()` and folded in
|
|
2954
|
+
* automatically by `buildRealtimeSessionConfig()` when tools are set.
|
|
2955
|
+
*/
|
|
2956
|
+
toolGuidance?: string;
|
|
2957
|
+
}
|
|
2958
|
+
/**
|
|
2959
|
+
* Compose the Realtime session `instructions` string. The agent's
|
|
2960
|
+
* memory is presented as the model's *own* knowledge — not as external
|
|
2961
|
+
* notes — so the call feels continuous with everything the agent has
|
|
2962
|
+
* learned elsewhere.
|
|
2963
|
+
*/
|
|
2964
|
+
declare function buildRealtimeInstructions(opts: RealtimeInstructionOptions): string;
|
|
2965
|
+
interface RealtimeSessionConfigOptions extends RealtimeInstructionOptions {
|
|
2966
|
+
/** OpenAI Realtime voice (default {@link DEFAULT_REALTIME_VOICE}). */
|
|
2967
|
+
voice?: string;
|
|
2968
|
+
/** OpenAI Realtime model (default {@link DEFAULT_REALTIME_MODEL}). */
|
|
2969
|
+
model?: string;
|
|
2970
|
+
/** Provide a fully-formed instruction string instead of composing one. */
|
|
2971
|
+
instructions?: string;
|
|
2972
|
+
/**
|
|
2973
|
+
* Function tools to declare on the session. When present they are
|
|
2974
|
+
* emitted under `session.tools` with a `tool_choice`, and (unless
|
|
2975
|
+
* `instructions` is overridden) tool-use guidance is folded into the
|
|
2976
|
+
* composed instructions automatically.
|
|
2977
|
+
*/
|
|
2978
|
+
tools?: RealtimeToolDefinition[];
|
|
2979
|
+
/** `tool_choice` override — defaults to `'auto'` when tools are set. */
|
|
2980
|
+
toolChoice?: 'auto' | 'none' | 'required';
|
|
2981
|
+
/**
|
|
2982
|
+
* OpenAI Realtime audio format for the session's input AND output.
|
|
2983
|
+
* Defaults to linear PCM @ 24 kHz (`{ type: 'audio/pcm', rate: 24000 }`)
|
|
2984
|
+
* — the format a 46elks `pcm_24000` call needs. A Twilio call must
|
|
2985
|
+
* pass `{ type: 'audio/pcmu', rate: 8000 }` (G.711 µ-law @ 8 kHz) so
|
|
2986
|
+
* the OpenAI session speaks the carrier's native codec with NO
|
|
2987
|
+
* transcoding. Normally sourced from a {@link RealtimeTransportAdapter}'s
|
|
2988
|
+
* `openaiAudioFormat`.
|
|
2989
|
+
*/
|
|
2990
|
+
audioFormat?: {
|
|
2991
|
+
type: string;
|
|
2992
|
+
rate?: number;
|
|
2993
|
+
};
|
|
2994
|
+
}
|
|
2995
|
+
/** Default OpenAI Realtime audio format — linear PCM @ 24 kHz (46elks). */
|
|
2996
|
+
declare const DEFAULT_REALTIME_AUDIO_FORMAT: {
|
|
2997
|
+
readonly type: "audio/pcm";
|
|
2998
|
+
};
|
|
2999
|
+
/**
|
|
3000
|
+
* Build the `session.update` client event for the GA `gpt-realtime`
|
|
3001
|
+
* API. Audio in/out default to PCM16 @ 24 kHz (matches 46elks
|
|
3002
|
+
* `pcm_24000`); a Twilio call passes `audioFormat: { type: 'audio/pcmu',
|
|
3003
|
+
* rate: 8000 }` so the session speaks G.711 µ-law natively. Turn-taking
|
|
3004
|
+
* is server-side VAD, and the agent's memory is folded into
|
|
3005
|
+
* `instructions`.
|
|
3006
|
+
*
|
|
3007
|
+
* When `tools` are supplied they are declared under `session.tools`
|
|
3008
|
+
* with a `tool_choice` (default `'auto'`), and — unless the caller
|
|
3009
|
+
* passed an explicit `instructions` string — natural-language tool-use
|
|
3010
|
+
* guidance is appended to the composed instructions so the model knows
|
|
3011
|
+
* to put the caller on hold before a slow tool (plan §6).
|
|
3012
|
+
*
|
|
3013
|
+
* > The `session.tools` / `tool_choice` field names follow the OpenAI
|
|
3014
|
+
* > Realtime function-calling protocol per the plan §3, and the
|
|
3015
|
+
* > `audio/pcmu` µ-law format token follows the OpenAI GA Realtime
|
|
3016
|
+
* > audio-format protocol; verify both against current OpenAI docs
|
|
3017
|
+
* > before the live smoke test.
|
|
3018
|
+
*/
|
|
3019
|
+
declare function buildRealtimeSessionConfig(opts: RealtimeSessionConfigOptions): Record<string, unknown>;
|
|
3020
|
+
/** Build the `wss://…/v1/realtime?model=…` URL for a model. */
|
|
3021
|
+
declare function buildOpenAIRealtimeUrl(model?: string): string;
|
|
3022
|
+
/** One side of the bridge — a JSON message sink that can be closed. */
|
|
3023
|
+
interface RealtimeBridgePort {
|
|
3024
|
+
/** Send one JSON message to the peer. Must not throw. */
|
|
3025
|
+
send(message: Record<string, unknown>): void;
|
|
3026
|
+
/** Close the underlying connection. Must be idempotent. */
|
|
3027
|
+
close(): void;
|
|
3028
|
+
}
|
|
3029
|
+
interface RealtimeBridgeTranscriptEntry {
|
|
3030
|
+
source: 'system' | 'provider' | 'agent';
|
|
3031
|
+
text: string;
|
|
3032
|
+
metadata?: Record<string, unknown>;
|
|
3033
|
+
}
|
|
3034
|
+
interface RealtimeVoiceBridgeOptions {
|
|
3035
|
+
/**
|
|
3036
|
+
* Port to the carrier realtime-media side. Named `elks` for backward
|
|
3037
|
+
* compatibility (it predates the Twilio transport); `carrier` is the
|
|
3038
|
+
* provider-neutral alias and takes precedence if both are given.
|
|
3039
|
+
*/
|
|
3040
|
+
elks?: RealtimeBridgePort;
|
|
3041
|
+
/** Provider-neutral alias for {@link elks} — the carrier media port. */
|
|
3042
|
+
carrier?: RealtimeBridgePort;
|
|
3043
|
+
/** Port to the OpenAI Realtime side. */
|
|
3044
|
+
openai: RealtimeBridgePort;
|
|
3045
|
+
/** `session.update` payload — sent to OpenAI once its socket opens. */
|
|
3046
|
+
sessionConfig: Record<string, unknown>;
|
|
3047
|
+
/**
|
|
3048
|
+
* Carrier transport adapter — encodes/decodes the provider's wire
|
|
3049
|
+
* protocol (46elks vs Twilio Media Streams). Defaults to the 46elks
|
|
3050
|
+
* adapter, so callers that omit it keep the original behaviour.
|
|
3051
|
+
*/
|
|
3052
|
+
transport?: RealtimeTransportAdapter;
|
|
3053
|
+
/**
|
|
3054
|
+
* 46elks-only: audio format we ask 46elks to send us (default
|
|
3055
|
+
* `pcm_24000`). Ignored unless the default 46elks transport is used
|
|
3056
|
+
* and no explicit `transport` was supplied.
|
|
3057
|
+
*/
|
|
3058
|
+
listenFormat?: ElksRealtimeAudioFormat;
|
|
3059
|
+
/**
|
|
3060
|
+
* 46elks-only: audio format we declare for the audio we send 46elks
|
|
3061
|
+
* (default `pcm_24000`). Ignored unless the default 46elks transport
|
|
3062
|
+
* is used and no explicit `transport` was supplied.
|
|
3063
|
+
*/
|
|
3064
|
+
sendFormat?: ElksRealtimeAudioFormat;
|
|
3065
|
+
/** Per-frame base64 ceiling (default {@link REALTIME_MAX_AUDIO_FRAME_BASE64}). */
|
|
3066
|
+
maxAudioFrameBase64?: number;
|
|
3067
|
+
/**
|
|
3068
|
+
* Dispatches the model's function calls. When omitted the bridge has
|
|
3069
|
+
* no tools — a function call from the model is acknowledged with a
|
|
3070
|
+
* "no tools available" output rather than left to wedge the model.
|
|
3071
|
+
*/
|
|
3072
|
+
toolExecutor?: ToolExecutor;
|
|
3073
|
+
/**
|
|
3074
|
+
* Bridge-side safety-net timeout for a single tool call (default
|
|
3075
|
+
* {@link REALTIME_TOOL_CALL_TIMEOUT_MS}). If the executor does not
|
|
3076
|
+
* settle within this window the bridge answers the model itself so
|
|
3077
|
+
* the call cannot be wedged by a hung tool.
|
|
3078
|
+
*/
|
|
3079
|
+
maxToolCallMs?: number;
|
|
3080
|
+
/** Sink for transcript / lifecycle entries worth persisting on the mission. */
|
|
3081
|
+
onTranscript?: (entry: RealtimeBridgeTranscriptEntry) => void;
|
|
3082
|
+
/**
|
|
3083
|
+
* Called exactly once when the bridge has fully ended. `pendingToolCalls`
|
|
3084
|
+
* is how many tool calls were still in flight at teardown — non-zero
|
|
3085
|
+
* means the call dropped mid-tool (e.g. an unanswered `ask_operator`),
|
|
3086
|
+
* which is the signal the API layer uses to arm callback-on-disconnect
|
|
3087
|
+
* (plan §7).
|
|
3088
|
+
*/
|
|
3089
|
+
onEnd?: (summary: {
|
|
3090
|
+
reason: string;
|
|
3091
|
+
pendingToolCalls: number;
|
|
3092
|
+
}) => void;
|
|
3093
|
+
}
|
|
3094
|
+
/**
|
|
3095
|
+
* Bridges a phone carrier's realtime-media connection to an OpenAI
|
|
3096
|
+
* Realtime connection. Provider-pluggable: the carrier wire protocol
|
|
3097
|
+
* (46elks vs Twilio Media Streams) is isolated in a
|
|
3098
|
+
* {@link RealtimeTransportAdapter}; the conversation logic here is
|
|
3099
|
+
* provider-neutral. The caller pumps raw messages in via
|
|
3100
|
+
* `handleCarrierMessage` / `handleOpenAIMessage` and connection
|
|
3101
|
+
* lifecycle via `handle*Open` / `handle*Close`. Every public method is
|
|
3102
|
+
* safe to call after the bridge has ended (they become no-ops).
|
|
3103
|
+
*
|
|
3104
|
+
* The legacy `handleElks*` method names remain as thin aliases for the
|
|
3105
|
+
* `handleCarrier*` methods so existing 46elks call sites and tests do
|
|
3106
|
+
* not break.
|
|
3107
|
+
*/
|
|
3108
|
+
declare class RealtimeVoiceBridge {
|
|
3109
|
+
private readonly carrier;
|
|
3110
|
+
private readonly openai;
|
|
3111
|
+
private readonly sessionConfig;
|
|
3112
|
+
private readonly transport;
|
|
3113
|
+
private readonly maxAudioFrameBase64;
|
|
3114
|
+
private readonly toolExecutor?;
|
|
3115
|
+
private readonly maxToolCallMs;
|
|
3116
|
+
private readonly onTranscript?;
|
|
3117
|
+
private readonly onEnd?;
|
|
3118
|
+
/** Carrier `hello`/`start` received — the call leg is live. */
|
|
3119
|
+
private helloSeen;
|
|
3120
|
+
/** OpenAI socket open + `session.update` sent. */
|
|
3121
|
+
private openaiReady;
|
|
3122
|
+
/** Bridge has ended — all further input is ignored. */
|
|
3123
|
+
private ended;
|
|
3124
|
+
/** Carrier call id from the `hello` event (46elks `callid` / Twilio `callSid`). */
|
|
3125
|
+
private callId;
|
|
3126
|
+
/** Audio frames received before OpenAI was ready, flushed on open. */
|
|
3127
|
+
private readonly pendingAudio;
|
|
3128
|
+
/** Oversized-frame counter — reported once, not per frame. */
|
|
3129
|
+
private droppedFrames;
|
|
3130
|
+
private droppedFramesReported;
|
|
3131
|
+
/** Accumulated assistant speech transcript for the current response. */
|
|
3132
|
+
private assistantTranscript;
|
|
3133
|
+
/**
|
|
3134
|
+
* Function-call name keyed by `call_id`, captured from
|
|
3135
|
+
* `response.output_item.added`. The later `*.arguments.done` event is
|
|
3136
|
+
* not guaranteed to echo the tool name, so we remember it here.
|
|
3137
|
+
*/
|
|
3138
|
+
private readonly toolCallNames;
|
|
3139
|
+
/** `call_id`s whose tool call is currently executing. */
|
|
3140
|
+
private readonly inFlightToolCalls;
|
|
3141
|
+
constructor(opts: RealtimeVoiceBridgeOptions);
|
|
3142
|
+
/** True once the bridge has ended. */
|
|
3143
|
+
get isEnded(): boolean;
|
|
3144
|
+
/** The carrier call id, once the `hello`/`start` event has been seen. */
|
|
3145
|
+
get currentCallId(): string;
|
|
3146
|
+
/** The carrier transport provider this bridge is running for. */
|
|
3147
|
+
get provider(): string;
|
|
3148
|
+
/** How many tool calls are executing right now. */
|
|
3149
|
+
get pendingToolCalls(): number;
|
|
3150
|
+
/** Call when the OpenAI socket opens — sends `session.update`. */
|
|
3151
|
+
handleOpenAIOpen(): void;
|
|
3152
|
+
/** Call when the OpenAI socket closes. */
|
|
3153
|
+
handleOpenAIClose(): void;
|
|
3154
|
+
/** Call when the OpenAI socket errors. */
|
|
3155
|
+
handleOpenAIError(err: unknown): void;
|
|
3156
|
+
/**
|
|
3157
|
+
* Call when the carrier media socket closes. The `onEnd` reason is
|
|
3158
|
+
* `<prefix>-closed`, where the prefix comes from the transport adapter
|
|
3159
|
+
* (`elks` for 46elks, `twilio` for Twilio) — so historical 46elks
|
|
3160
|
+
* reason strings (`elks-closed`) are preserved.
|
|
3161
|
+
*/
|
|
3162
|
+
handleCarrierClose(): void;
|
|
3163
|
+
/** Call when the carrier media socket errors. */
|
|
3164
|
+
handleCarrierError(err: unknown): void;
|
|
3165
|
+
/** @deprecated 46elks-era alias for {@link handleCarrierClose}. */
|
|
3166
|
+
handleElksClose(): void;
|
|
3167
|
+
/** @deprecated 46elks-era alias for {@link handleCarrierError}. */
|
|
3168
|
+
handleElksError(err: unknown): void;
|
|
3169
|
+
/**
|
|
3170
|
+
* Feed one raw message from the carrier media socket. Accepts a JSON
|
|
3171
|
+
* string or an already-parsed object. The transport adapter
|
|
3172
|
+
* normalises the provider-specific frame; malformed frames throw out
|
|
3173
|
+
* of the adapter and are ignored here (the bridge is never torn down
|
|
3174
|
+
* for one bad frame).
|
|
3175
|
+
*/
|
|
3176
|
+
handleCarrierMessage(raw: string | Record<string, unknown>): void;
|
|
3177
|
+
/** @deprecated 46elks-era alias for {@link handleCarrierMessage}. */
|
|
3178
|
+
handleElksMessage(raw: string | Record<string, unknown>): void;
|
|
3179
|
+
/** Relay caller audio to OpenAI, enforcing the per-frame size cap. */
|
|
3180
|
+
private forwardInboundAudio;
|
|
3181
|
+
/**
|
|
3182
|
+
* Feed one raw message from the OpenAI Realtime socket. Accepts a
|
|
3183
|
+
* JSON string or an already-parsed object. Unknown event types are
|
|
3184
|
+
* ignored.
|
|
3185
|
+
*/
|
|
3186
|
+
handleOpenAIMessage(raw: string | Record<string, unknown>): void;
|
|
3187
|
+
/** Relay synthesised agent audio to the carrier, enforcing the size cap. */
|
|
3188
|
+
private forwardOutboundAudio;
|
|
3189
|
+
/**
|
|
3190
|
+
* Parse a `response.function_call_arguments.done` event and dispatch
|
|
3191
|
+
* the tool call. Resolves `name` from the event or the map captured
|
|
3192
|
+
* on `response.output_item.added`; parses `arguments` (a JSON string)
|
|
3193
|
+
* defensively. Always answers the model — an unknown name, missing
|
|
3194
|
+
* executor, or oversized fan-out each gets a model-readable output
|
|
3195
|
+
* rather than being dropped (a dropped `call_id` wedges the model,
|
|
3196
|
+
* which waits forever for its `function_call_output`).
|
|
3197
|
+
*/
|
|
3198
|
+
private dispatchToolCall;
|
|
3199
|
+
/** Execute one tool call, racing the executor against the safety-net timeout. */
|
|
3200
|
+
private runToolCall;
|
|
3201
|
+
/**
|
|
3202
|
+
* Send a tool result back to OpenAI: a `function_call_output`
|
|
3203
|
+
* conversation item, then `response.create` so the model resumes
|
|
3204
|
+
* speaking with the result in hand.
|
|
3205
|
+
*
|
|
3206
|
+
* > `conversation.item.create` with `{ type: 'function_call_output',
|
|
3207
|
+
* > call_id, output }` followed by `response.create` is the OpenAI
|
|
3208
|
+
* > Realtime function-calling return path per the plan §3. Verify
|
|
3209
|
+
* > against current OpenAI docs before the live smoke test.
|
|
3210
|
+
*/
|
|
3211
|
+
private answerToolCall;
|
|
3212
|
+
/**
|
|
3213
|
+
* End the bridge. Idempotent — the first call wins, later calls are
|
|
3214
|
+
* no-ops. Sends the carrier's end-of-call frame (if it has one — 46elks
|
|
3215
|
+
* `bye`; Twilio has none), closes both ports, fires `onEnd`.
|
|
3216
|
+
*/
|
|
3217
|
+
end(reason: string): void;
|
|
3218
|
+
private noteDroppedFrame;
|
|
3219
|
+
private emitTranscript;
|
|
3220
|
+
private safeSend;
|
|
3221
|
+
}
|
|
3222
|
+
|
|
1740
3223
|
declare const PHONE_REGION_SCOPES: readonly ["AT", "DE", "EU", "WORLD"];
|
|
1741
3224
|
type PhoneRegionScope = typeof PHONE_REGION_SCOPES[number];
|
|
1742
3225
|
declare const TELEPHONY_TRANSPORT_CAPABILITIES: readonly ["sms", "call_control", "realtime_media", "recording_supported"];
|
|
@@ -1839,7 +3322,9 @@ declare function validatePhoneMissionStart(input: unknown, transport: unknown, o
|
|
|
1839
3322
|
allowPremiumOrSpecialNumbers?: boolean;
|
|
1840
3323
|
}): PhoneMissionStartValidationResult;
|
|
1841
3324
|
|
|
1842
|
-
type PhoneTransportProvider = '46elks';
|
|
3325
|
+
type PhoneTransportProvider = '46elks' | 'twilio';
|
|
3326
|
+
/** Providers that support starting outbound call-control missions. */
|
|
3327
|
+
declare const PHONE_CALL_CONTROL_PROVIDERS: readonly PhoneTransportProvider[];
|
|
1843
3328
|
/**
|
|
1844
3329
|
* Abuse / cost controls for the call-control surface. A phone mission
|
|
1845
3330
|
* places a real, billed outbound call, so /calls/start needs hard limits
|
|
@@ -1881,6 +3366,29 @@ interface PhoneMissionTranscriptEntry {
|
|
|
1881
3366
|
text: string;
|
|
1882
3367
|
metadata?: Record<string, unknown>;
|
|
1883
3368
|
}
|
|
3369
|
+
type OperatorQueryUrgency = 'normal' | 'high';
|
|
3370
|
+
/**
|
|
3371
|
+
* A question the voice agent put to its human operator mid-call via the
|
|
3372
|
+
* `ask_operator` tool (plan §4 / §5). Persisted on the mission under
|
|
3373
|
+
* `metadata.operatorQueries[]` and exposed by the operator-query API
|
|
3374
|
+
* endpoints. The bridge's `ask_operator` tool blocks polling this
|
|
3375
|
+
* record until `answer` is set or the hard timeout elapses.
|
|
3376
|
+
*/
|
|
3377
|
+
interface PhoneOperatorQuery {
|
|
3378
|
+
/** `oq_<uuid>` — unique across all missions. */
|
|
3379
|
+
id: string;
|
|
3380
|
+
/** The question, sanitised + length-bounded. */
|
|
3381
|
+
question: string;
|
|
3382
|
+
/** One-line context on the call, if the agent supplied it. */
|
|
3383
|
+
callContext?: string;
|
|
3384
|
+
urgency: OperatorQueryUrgency;
|
|
3385
|
+
askedAt: string;
|
|
3386
|
+
/** The operator's answer — set once, never overwritten (idempotent). */
|
|
3387
|
+
answer?: string;
|
|
3388
|
+
answeredAt?: string;
|
|
3389
|
+
/** Channel the answer arrived on (e.g. `api`, `email`). */
|
|
3390
|
+
answeredVia?: string;
|
|
3391
|
+
}
|
|
1884
3392
|
interface PhoneCallMission {
|
|
1885
3393
|
id: string;
|
|
1886
3394
|
agentId: string;
|
|
@@ -1951,6 +3459,52 @@ declare class PhoneManager {
|
|
|
1951
3459
|
private authenticateWebhook;
|
|
1952
3460
|
handleVoiceStartWebhook(missionId: string, providedToken: string, payload?: Record<string, unknown>): PhoneWebhookResult;
|
|
1953
3461
|
handleHangupWebhook(missionId: string, providedToken: string, payload?: Record<string, unknown>): PhoneCallMission;
|
|
3462
|
+
/**
|
|
3463
|
+
* Handle Twilio's voice webhook — the `Url` Twilio fetches when the
|
|
3464
|
+
* outbound call connects. The mirror of {@link handleVoiceStartWebhook}
|
|
3465
|
+
* for Twilio: it authenticates the per-mission token, transitions the
|
|
3466
|
+
* mission to `connected`, and returns the TwiML to send back.
|
|
3467
|
+
*
|
|
3468
|
+
* `twiml` is a `<Connect><Stream>` document that wires the call's
|
|
3469
|
+
* audio to the realtime voice WebSocket — the same realtime path the
|
|
3470
|
+
* 46elks websocket-number uses. The route serves it with
|
|
3471
|
+
* `Content-Type: text/xml`.
|
|
3472
|
+
*
|
|
3473
|
+
* Like the 46elks handler this is terminal-state-guarded (#43-H5,
|
|
3474
|
+
* a late/replayed webhook cannot resurrect a finished mission) and
|
|
3475
|
+
* idempotent (a duplicate is acknowledged with the same TwiML but
|
|
3476
|
+
* changes nothing).
|
|
3477
|
+
*/
|
|
3478
|
+
handleTwilioVoiceWebhook(missionId: string, providedToken: string, payload?: Record<string, unknown>): {
|
|
3479
|
+
mission: PhoneCallMission;
|
|
3480
|
+
twiml: string;
|
|
3481
|
+
};
|
|
3482
|
+
/**
|
|
3483
|
+
* Handle Twilio's status callback — the `StatusCallback` Twilio POSTs
|
|
3484
|
+
* with the terminal call status. The mirror of
|
|
3485
|
+
* {@link handleHangupWebhook} for Twilio. Idempotent + terminal-state
|
|
3486
|
+
* guarded; records the reported `CallDuration` and accumulates cost
|
|
3487
|
+
* from `Price` when Twilio supplied it (Twilio reports the final
|
|
3488
|
+
* price asynchronously, so it may be absent on the first callback —
|
|
3489
|
+
* the duration ceiling / rate limit / concurrency cap remain the
|
|
3490
|
+
* preventive cost controls, #43-H2).
|
|
3491
|
+
*/
|
|
3492
|
+
handleTwilioStatusWebhook(missionId: string, providedToken: string, payload?: Record<string, unknown>): PhoneCallMission;
|
|
3493
|
+
/**
|
|
3494
|
+
* Build the TwiML for the Twilio voice webhook — a `<Connect><Stream>`
|
|
3495
|
+
* pointing at the realtime voice WebSocket. The `<Stream>` URL is
|
|
3496
|
+
* derived from `webhookBaseUrl` (https → wss); the per-mission token
|
|
3497
|
+
* (#43-H7) rides as both a `<Parameter>` and a query param so the
|
|
3498
|
+
* media socket can be matched to its mission.
|
|
3499
|
+
*/
|
|
3500
|
+
private buildTwilioVoiceTwiML;
|
|
3501
|
+
/**
|
|
3502
|
+
* Read the call cost off a Twilio status callback (`Price`, a
|
|
3503
|
+
* negative or string number), add it to the mission's running total,
|
|
3504
|
+
* and flag a policy-cap breach (#43-H2). Twilio prices are reported
|
|
3505
|
+
* as a negative amount (a debit); we use the absolute value.
|
|
3506
|
+
*/
|
|
3507
|
+
private buildTwilioCostMetadataPatch;
|
|
1954
3508
|
/**
|
|
1955
3509
|
* Read the call cost off a 46elks hangup payload, add it to the
|
|
1956
3510
|
* mission's running total, and flag a policy-cap breach (#43-H2).
|
|
@@ -1961,7 +3515,117 @@ declare class PhoneManager {
|
|
|
1961
3515
|
private buildCostMetadataPatch;
|
|
1962
3516
|
private buildVoiceStartAction;
|
|
1963
3517
|
cancelMission(agentId: string, missionId: string): PhoneCallMission;
|
|
3518
|
+
/**
|
|
3519
|
+
* Resolve a mission by the provider's call id (the 46elks `callid`).
|
|
3520
|
+
* The realtime voice bridge uses this to match an inbound 46elks
|
|
3521
|
+
* realtime-media WebSocket — whose `hello` frame carries `callid` —
|
|
3522
|
+
* back to the mission that placed the call, so the right agent's
|
|
3523
|
+
* memory and task can be loaded into the OpenAI Realtime session.
|
|
3524
|
+
*/
|
|
3525
|
+
findMissionByProviderCallId(providerCallId: string, agentId?: string): PhoneCallMission | null;
|
|
3526
|
+
/**
|
|
3527
|
+
* Append transcript entries produced by the realtime voice bridge and
|
|
3528
|
+
* optionally transition the mission status. A mission already in a
|
|
3529
|
+
* terminal state keeps that state — a late bridge event must not
|
|
3530
|
+
* resurrect a completed/failed/cancelled mission (mirrors the
|
|
3531
|
+
* terminal-state guard on the webhook handlers). No-op if the mission
|
|
3532
|
+
* no longer exists.
|
|
3533
|
+
*/
|
|
3534
|
+
recordRealtimeActivity(missionId: string, entries: PhoneMissionTranscriptEntry[], status?: PhoneMissionState): PhoneCallMission | null;
|
|
3535
|
+
/**
|
|
3536
|
+
* Record an operator query against a mission — the first step of the
|
|
3537
|
+
* `ask_operator` tool (plan §4). Returns the persisted query; the
|
|
3538
|
+
* bridge then polls {@link getOperatorQuery} for an answer. Throws on
|
|
3539
|
+
* an unknown mission or an empty question.
|
|
3540
|
+
*/
|
|
3541
|
+
addOperatorQuery(missionId: string, input: {
|
|
3542
|
+
question: string;
|
|
3543
|
+
callContext?: string;
|
|
3544
|
+
urgency?: string;
|
|
3545
|
+
}): {
|
|
3546
|
+
mission: PhoneCallMission;
|
|
3547
|
+
query: PhoneOperatorQuery;
|
|
3548
|
+
};
|
|
3549
|
+
/** List the operator queries recorded on a mission. */
|
|
3550
|
+
listOperatorQueries(missionId: string, agentId?: string): PhoneOperatorQuery[];
|
|
3551
|
+
/** Read one operator query, or null if the mission/query is unknown. */
|
|
3552
|
+
getOperatorQuery(missionId: string, queryId: string, agentId?: string): PhoneOperatorQuery | null;
|
|
3553
|
+
/**
|
|
3554
|
+
* Resolve a mission + query by the query id alone — used by the
|
|
3555
|
+
* inbound email-reply hook, which only has the id parsed out of the
|
|
3556
|
+
* reply subject. A LIKE prefilter (id escaped so its `_`/`-` are
|
|
3557
|
+
* literal) narrows the scan; the match is then verified exactly.
|
|
3558
|
+
*/
|
|
3559
|
+
findMissionByOperatorQueryId(queryId: string): {
|
|
3560
|
+
mission: PhoneCallMission;
|
|
3561
|
+
query: PhoneOperatorQuery;
|
|
3562
|
+
} | null;
|
|
3563
|
+
/**
|
|
3564
|
+
* Record the operator's answer to a query. Idempotent — the first
|
|
3565
|
+
* answer wins; a later answer for the same query returns the existing
|
|
3566
|
+
* record unchanged with `alreadyAnswered: true`, so a duplicate
|
|
3567
|
+
* (e.g. an email reply AND an API POST) cannot fight. Returns null if
|
|
3568
|
+
* the mission/query is unknown; throws on an empty answer.
|
|
3569
|
+
*/
|
|
3570
|
+
answerOperatorQuery(missionId: string, queryId: string, answer: string, options?: {
|
|
3571
|
+
via?: string;
|
|
3572
|
+
agentId?: string;
|
|
3573
|
+
}): {
|
|
3574
|
+
mission: PhoneCallMission;
|
|
3575
|
+
query: PhoneOperatorQuery;
|
|
3576
|
+
alreadyAnswered: boolean;
|
|
3577
|
+
} | null;
|
|
3578
|
+
/**
|
|
3579
|
+
* Flag a mission for callback-on-disconnect: the call dropped while
|
|
3580
|
+
* an operator query was still unanswered, so once the operator
|
|
3581
|
+
* answers the API should dial the caller back. Returns the mission
|
|
3582
|
+
* unchanged (not flagged) if every query is already answered; null if
|
|
3583
|
+
* the mission is unknown.
|
|
3584
|
+
*/
|
|
3585
|
+
flagCallbackPending(missionId: string): PhoneCallMission | null;
|
|
3586
|
+
/** Missions currently flagged for callback-on-disconnect. */
|
|
3587
|
+
findCallbackPendingMissions(agentId?: string): PhoneCallMission[];
|
|
3588
|
+
/**
|
|
3589
|
+
* Trigger a callback (plan §7) when a callback-pending mission now has
|
|
3590
|
+
* an answered query: re-dial the same number with a continuation task
|
|
3591
|
+
* carrying the operator's answer. Returns the (updated) original
|
|
3592
|
+
* mission + the new callback mission, or null if no callback is due.
|
|
3593
|
+
*
|
|
3594
|
+
* `callbackPending` is cleared BEFORE dialing so a concurrent second
|
|
3595
|
+
* answer cannot double-dial; if the dial throws it is restored so the
|
|
3596
|
+
* callback is not silently lost, and the error is rethrown.
|
|
3597
|
+
*/
|
|
3598
|
+
triggerCallback(missionId: string, options?: StartPhoneCallOptions): Promise<{
|
|
3599
|
+
mission: PhoneCallMission;
|
|
3600
|
+
callbackMission: PhoneCallMission;
|
|
3601
|
+
} | null>;
|
|
1964
3602
|
private build46ElksCallRequest;
|
|
3603
|
+
/**
|
|
3604
|
+
* Build the Twilio outbound-call request — the mirror of
|
|
3605
|
+
* {@link build46ElksCallRequest} for Twilio's Calls.json endpoint:
|
|
3606
|
+
*
|
|
3607
|
+
* POST https://api.twilio.com/2010-04-01/Accounts/{AccountSid}/Calls.json
|
|
3608
|
+
*
|
|
3609
|
+
* with an `application/x-www-form-urlencoded` body. `From`/`To` are
|
|
3610
|
+
* the numbers; `Url` is a TwiML webhook Twilio fetches when the call
|
|
3611
|
+
* connects — it points at our voice-start webhook, which returns the
|
|
3612
|
+
* `<Connect><Stream>` TwiML that wires the call's audio to the
|
|
3613
|
+
* realtime voice WebSocket. `StatusCallback` is Twilio's hangup-
|
|
3614
|
+
* equivalent — fired with the final call status (the analogue of the
|
|
3615
|
+
* 46elks `whenhangup`). `TimeLimit` caps the call duration, re-clamped
|
|
3616
|
+
* to the server ceiling (#43-H6) exactly as the 46elks `timeout` is.
|
|
3617
|
+
*
|
|
3618
|
+
* Both webhook URLs carry the per-mission HMAC token (#43-H7), never
|
|
3619
|
+
* the raw `webhookSecret`. The Twilio `AccountSid` is `config.username`
|
|
3620
|
+
* and the `AuthToken` is `config.password` (HTTP Basic on the request,
|
|
3621
|
+
* and the key Twilio signs `X-Twilio-Signature` with).
|
|
3622
|
+
*
|
|
3623
|
+
* > The Calls.json endpoint path, the `From`/`To`/`Url`/
|
|
3624
|
+
* > `StatusCallback`/`TimeLimit` body fields, and the `<Connect>
|
|
3625
|
+
* > <Stream>` TwiML are per Twilio's public Programmable Voice docs;
|
|
3626
|
+
* > verify against current docs before the live smoke-test.
|
|
3627
|
+
*/
|
|
3628
|
+
private buildTwilioCallRequest;
|
|
1965
3629
|
private insertMission;
|
|
1966
3630
|
private updateProviderCall;
|
|
1967
3631
|
private updateMissionStatus;
|
|
@@ -1971,6 +3635,10 @@ declare function buildPhoneTransportConfig(input: {
|
|
|
1971
3635
|
phoneNumber?: unknown;
|
|
1972
3636
|
username?: unknown;
|
|
1973
3637
|
password?: unknown;
|
|
3638
|
+
/** Twilio alias for {@link username} — the account SID. */
|
|
3639
|
+
accountSid?: unknown;
|
|
3640
|
+
/** Twilio alias for {@link password} — the account auth token. */
|
|
3641
|
+
authToken?: unknown;
|
|
1974
3642
|
webhookBaseUrl?: unknown;
|
|
1975
3643
|
webhookSecret?: unknown;
|
|
1976
3644
|
apiUrl?: unknown;
|
|
@@ -2772,6 +4440,333 @@ declare class SetupManager {
|
|
|
2772
4440
|
isInitialized(): boolean;
|
|
2773
4441
|
}
|
|
2774
4442
|
|
|
4443
|
+
/** External binary a media operation depends on. */
|
|
4444
|
+
type MediaBinary = 'ffmpeg' | 'ffprobe' | 'imagemagick' | 'whisper' | 'python' | 'edge-tts';
|
|
4445
|
+
/**
|
|
4446
|
+
* Detection result for one external binary. `available: false` carries
|
|
4447
|
+
* an actionable `installHint` the agent can relay to the operator.
|
|
4448
|
+
*/
|
|
4449
|
+
interface MediaCapability {
|
|
4450
|
+
/** Binary identifier. */
|
|
4451
|
+
binary: MediaBinary;
|
|
4452
|
+
/** Whether the binary was found and is runnable. */
|
|
4453
|
+
available: boolean;
|
|
4454
|
+
/** Version string when detectable. */
|
|
4455
|
+
version?: string;
|
|
4456
|
+
/** Resolved path / command used to invoke it. */
|
|
4457
|
+
command?: string;
|
|
4458
|
+
/** Human-readable description of what it powers. */
|
|
4459
|
+
description: string;
|
|
4460
|
+
/** Install instructions, present when `available` is false. */
|
|
4461
|
+
installHint?: string;
|
|
4462
|
+
}
|
|
4463
|
+
/** Aggregate media capability report — one entry per binary. */
|
|
4464
|
+
interface MediaCapabilityReport {
|
|
4465
|
+
/** Per-binary detection results. */
|
|
4466
|
+
capabilities: MediaCapability[];
|
|
4467
|
+
/** True when at least ffmpeg + ffprobe are present (the baseline). */
|
|
4468
|
+
ready: boolean;
|
|
4469
|
+
/** When the report was generated. */
|
|
4470
|
+
checkedAt: string;
|
|
4471
|
+
}
|
|
4472
|
+
/** Result envelope for an operation that produced an output file. */
|
|
4473
|
+
interface MediaFileResult {
|
|
4474
|
+
ok: true;
|
|
4475
|
+
/** Absolute path of the produced file. */
|
|
4476
|
+
filePath: string;
|
|
4477
|
+
/** Size of the produced file in bytes. */
|
|
4478
|
+
sizeBytes: number;
|
|
4479
|
+
/** Output container/format, when meaningful. */
|
|
4480
|
+
format?: string;
|
|
4481
|
+
/** Operation-specific extra fields. */
|
|
4482
|
+
[key: string]: unknown;
|
|
4483
|
+
}
|
|
4484
|
+
interface TtsGenerateOptions {
|
|
4485
|
+
/** Text to synthesise. */
|
|
4486
|
+
text: string;
|
|
4487
|
+
/** Voice preset name or a full Edge voice id. */
|
|
4488
|
+
voice?: string;
|
|
4489
|
+
/** Speaking rate, e.g. "+20%" / "-10%". */
|
|
4490
|
+
rate?: string;
|
|
4491
|
+
/** Pitch shift, e.g. "+5Hz" / "-10Hz". */
|
|
4492
|
+
pitch?: string;
|
|
4493
|
+
}
|
|
4494
|
+
type ImageAction = 'resize' | 'crop' | 'rotate' | 'convert' | 'compress' | 'text_overlay' | 'flip' | 'blur' | 'sharpen' | 'grayscale';
|
|
4495
|
+
interface ImageEditOptions {
|
|
4496
|
+
/** Absolute path to the input image. */
|
|
4497
|
+
input: string;
|
|
4498
|
+
/** Edit action to perform. */
|
|
4499
|
+
action: ImageAction;
|
|
4500
|
+
width?: number;
|
|
4501
|
+
height?: number;
|
|
4502
|
+
angle?: number;
|
|
4503
|
+
format?: string;
|
|
4504
|
+
quality?: number;
|
|
4505
|
+
text?: string;
|
|
4506
|
+
position?: string;
|
|
4507
|
+
fontSize?: number;
|
|
4508
|
+
fontColor?: string;
|
|
4509
|
+
blurRadius?: number;
|
|
4510
|
+
direction?: 'horizontal' | 'vertical';
|
|
4511
|
+
offsetX?: number;
|
|
4512
|
+
offsetY?: number;
|
|
4513
|
+
}
|
|
4514
|
+
type VideoAction = 'trim' | 'extract_frame' | 'extract_frames' | 'convert' | 'gif' | 'compress' | 'resize' | 'add_audio' | 'remove_audio' | 'speed' | 'color_grade' | 'transition' | 'text_overlay' | 'picture_in_picture' | 'split_screen' | 'ken_burns' | 'slow_motion' | 'watermark' | 'concatenate' | 'audio_mix' | 'auto_caption';
|
|
4515
|
+
interface VideoEditOptions {
|
|
4516
|
+
/** Absolute path to the input video (or image for ken_burns). */
|
|
4517
|
+
input: string;
|
|
4518
|
+
/** Edit action to perform. */
|
|
4519
|
+
action: VideoAction;
|
|
4520
|
+
start?: string;
|
|
4521
|
+
end?: string;
|
|
4522
|
+
duration?: string;
|
|
4523
|
+
timestamp?: string;
|
|
4524
|
+
interval?: number;
|
|
4525
|
+
format?: string;
|
|
4526
|
+
width?: number;
|
|
4527
|
+
height?: number;
|
|
4528
|
+
fps?: number;
|
|
4529
|
+
crf?: number;
|
|
4530
|
+
audioPath?: string;
|
|
4531
|
+
speedFactor?: number;
|
|
4532
|
+
secondInput?: string;
|
|
4533
|
+
transitionType?: string;
|
|
4534
|
+
transitionDuration?: number;
|
|
4535
|
+
text?: string;
|
|
4536
|
+
fontSize?: number;
|
|
4537
|
+
fontColor?: string;
|
|
4538
|
+
textPosition?: string;
|
|
4539
|
+
textBg?: string;
|
|
4540
|
+
textStart?: string;
|
|
4541
|
+
textEnd?: string;
|
|
4542
|
+
overlayOpacity?: number;
|
|
4543
|
+
overlayScale?: number;
|
|
4544
|
+
watermarkPosition?: string;
|
|
4545
|
+
watermarkPath?: string;
|
|
4546
|
+
pipWidth?: number;
|
|
4547
|
+
pipPosition?: string;
|
|
4548
|
+
splitDirection?: 'horizontal' | 'vertical';
|
|
4549
|
+
zoomDirection?: string;
|
|
4550
|
+
zoomDuration?: number;
|
|
4551
|
+
zoomFactor?: number;
|
|
4552
|
+
files?: string[];
|
|
4553
|
+
bgVolume?: string;
|
|
4554
|
+
fgVolume?: string;
|
|
4555
|
+
colorPreset?: string;
|
|
4556
|
+
lutPath?: string;
|
|
4557
|
+
captionColor?: string;
|
|
4558
|
+
captionFontSize?: number;
|
|
4559
|
+
/** Path to a whisper.cpp model file (.bin) — required for auto_caption. */
|
|
4560
|
+
whisperModel?: string;
|
|
4561
|
+
}
|
|
4562
|
+
type AudioAction = 'trim' | 'convert' | 'merge' | 'volume' | 'speed' | 'extract' | 'reverse' | 'fade';
|
|
4563
|
+
interface AudioEditOptions {
|
|
4564
|
+
/** Absolute path to the input audio (or video for `extract`). */
|
|
4565
|
+
input?: string;
|
|
4566
|
+
/** Edit action to perform. */
|
|
4567
|
+
action: AudioAction;
|
|
4568
|
+
start?: string;
|
|
4569
|
+
end?: string;
|
|
4570
|
+
duration?: string;
|
|
4571
|
+
format?: string;
|
|
4572
|
+
files?: string[];
|
|
4573
|
+
volume?: string;
|
|
4574
|
+
speedFactor?: number;
|
|
4575
|
+
fadeType?: 'in' | 'out' | 'both';
|
|
4576
|
+
fadeDuration?: number;
|
|
4577
|
+
}
|
|
4578
|
+
interface MediaStreamInfo {
|
|
4579
|
+
type?: string;
|
|
4580
|
+
codec?: string;
|
|
4581
|
+
width?: number;
|
|
4582
|
+
height?: number;
|
|
4583
|
+
duration?: string;
|
|
4584
|
+
bitRate?: string;
|
|
4585
|
+
sampleRate?: string;
|
|
4586
|
+
channels?: number;
|
|
4587
|
+
fps?: string;
|
|
4588
|
+
}
|
|
4589
|
+
interface MediaInfoResult {
|
|
4590
|
+
ok: true;
|
|
4591
|
+
file: string;
|
|
4592
|
+
format?: string;
|
|
4593
|
+
duration?: string;
|
|
4594
|
+
sizeBytes: number;
|
|
4595
|
+
bitRate?: string;
|
|
4596
|
+
streams: MediaStreamInfo[];
|
|
4597
|
+
}
|
|
4598
|
+
interface VideoUnderstandOptions {
|
|
4599
|
+
/** Absolute path to the input video. */
|
|
4600
|
+
input: string;
|
|
4601
|
+
/** Seconds between extracted frames (default 3). */
|
|
4602
|
+
frameInterval?: number;
|
|
4603
|
+
/** Maximum number of frames to extract (default 30). */
|
|
4604
|
+
maxFrames?: number;
|
|
4605
|
+
/** Path to a whisper.cpp model file (.bin) — enables transcription. */
|
|
4606
|
+
whisperModel?: string;
|
|
4607
|
+
}
|
|
4608
|
+
interface VideoTimelineEntry {
|
|
4609
|
+
timeSeconds: number;
|
|
4610
|
+
timeDisplay: string;
|
|
4611
|
+
framePath: string;
|
|
4612
|
+
spokenText: string;
|
|
4613
|
+
}
|
|
4614
|
+
interface VideoUnderstandResult {
|
|
4615
|
+
ok: true;
|
|
4616
|
+
video: string;
|
|
4617
|
+
duration: number;
|
|
4618
|
+
resolution: string;
|
|
4619
|
+
totalFramesExtracted: number;
|
|
4620
|
+
transcriptSegments: number;
|
|
4621
|
+
timeline: VideoTimelineEntry[];
|
|
4622
|
+
frameDir: string;
|
|
4623
|
+
hint: string;
|
|
4624
|
+
}
|
|
4625
|
+
interface VoiceCloneOptions {
|
|
4626
|
+
/** Text to speak in the cloned voice. */
|
|
4627
|
+
text: string;
|
|
4628
|
+
/** Absolute path to the reference audio sample (required). */
|
|
4629
|
+
refAudio: string;
|
|
4630
|
+
/** Transcript of the reference audio (required). */
|
|
4631
|
+
refText: string;
|
|
4632
|
+
/** Optional path to the Python interpreter with F5-TTS installed. */
|
|
4633
|
+
pythonBin?: string;
|
|
4634
|
+
/** Compute device passed to F5-TTS (default 'cpu'). */
|
|
4635
|
+
device?: string;
|
|
4636
|
+
}
|
|
4637
|
+
|
|
4638
|
+
interface MediaManagerOptions {
|
|
4639
|
+
/**
|
|
4640
|
+
* Directory media output files are written to. Created on first use.
|
|
4641
|
+
* Defaults to `<dataDir>/media` when a dataDir is given, otherwise to
|
|
4642
|
+
* `<os-tmp>/agenticmail-media`.
|
|
4643
|
+
*/
|
|
4644
|
+
outputDir?: string;
|
|
4645
|
+
/** AgenticMail data directory — used to derive a default outputDir. */
|
|
4646
|
+
dataDir?: string;
|
|
4647
|
+
}
|
|
4648
|
+
declare class MediaManager {
|
|
4649
|
+
private readonly outputDir;
|
|
4650
|
+
constructor(options?: MediaManagerOptions);
|
|
4651
|
+
/** Ensure the output directory exists; returns it. */
|
|
4652
|
+
private ensureOutputDir;
|
|
4653
|
+
/** Build an output path inside the managed output dir. */
|
|
4654
|
+
private outPath;
|
|
4655
|
+
/** Build a sub-directory inside the managed output dir. */
|
|
4656
|
+
private outDir;
|
|
4657
|
+
/** Stat a produced file into a {@link MediaFileResult} envelope. */
|
|
4658
|
+
private fileResult;
|
|
4659
|
+
/** Run ffmpeg with an argument array. */
|
|
4660
|
+
private ffmpeg;
|
|
4661
|
+
/** Run ImageMagick with an argument array (handles magick/convert). */
|
|
4662
|
+
private magick;
|
|
4663
|
+
/** Run an `identify`-style probe via the ImageMagick binary. */
|
|
4664
|
+
private magickIdentify;
|
|
4665
|
+
/** Probe a media file with ffprobe, returning parsed JSON. */
|
|
4666
|
+
private ffprobe;
|
|
4667
|
+
/** Return the media binary capability report (graceful-degradation surface). */
|
|
4668
|
+
capabilities(opts?: {
|
|
4669
|
+
force?: boolean;
|
|
4670
|
+
}): MediaCapabilityReport;
|
|
4671
|
+
/** List the built-in Edge TTS voice presets. */
|
|
4672
|
+
listVoices(): {
|
|
4673
|
+
presets: Array<{
|
|
4674
|
+
name: string;
|
|
4675
|
+
full: string;
|
|
4676
|
+
}>;
|
|
4677
|
+
default: string;
|
|
4678
|
+
};
|
|
4679
|
+
/**
|
|
4680
|
+
* Synthesise speech with Edge TTS. node-edge-tts is an optional peer
|
|
4681
|
+
* dependency — when it is absent this throws a clear, actionable
|
|
4682
|
+
* error instead of crashing. The MP3 is transcoded to OGG/Opus when
|
|
4683
|
+
* ffmpeg is available (so it can be sent as a voice note); otherwise
|
|
4684
|
+
* the raw MP3 is returned.
|
|
4685
|
+
*/
|
|
4686
|
+
ttsGenerate(opts: TtsGenerateOptions): Promise<MediaFileResult>;
|
|
4687
|
+
/** Edit an image with ImageMagick. */
|
|
4688
|
+
imageEdit(opts: ImageEditOptions): Promise<MediaFileResult>;
|
|
4689
|
+
/** Edit audio with ffmpeg. */
|
|
4690
|
+
audioEdit(opts: AudioEditOptions): Promise<MediaFileResult>;
|
|
4691
|
+
/** Probe a media file's metadata with ffprobe. */
|
|
4692
|
+
mediaInfo(input: string): Promise<MediaInfoResult>;
|
|
4693
|
+
/** Edit a video with ffmpeg (+ ImageMagick for caption rendering). */
|
|
4694
|
+
videoEdit(opts: VideoEditOptions): Promise<MediaFileResult>;
|
|
4695
|
+
private videoColorGrade;
|
|
4696
|
+
private videoTransition;
|
|
4697
|
+
private videoTextOverlay;
|
|
4698
|
+
private videoPictureInPicture;
|
|
4699
|
+
private videoSplitScreen;
|
|
4700
|
+
private videoKenBurns;
|
|
4701
|
+
private videoSlowMotion;
|
|
4702
|
+
private videoWatermark;
|
|
4703
|
+
private videoConcatenate;
|
|
4704
|
+
private videoAudioMix;
|
|
4705
|
+
/**
|
|
4706
|
+
* Burn dynamic word-chunked captions onto a video. Needs ffmpeg,
|
|
4707
|
+
* ImageMagick, and whisper.cpp (with a model file). Mirrors the
|
|
4708
|
+
* source MCP's CapCut-style caption renderer.
|
|
4709
|
+
*/
|
|
4710
|
+
private videoAutoCaption;
|
|
4711
|
+
/**
|
|
4712
|
+
* Analyse a video — extract frames at intervals and (when a whisper
|
|
4713
|
+
* model is given) transcribe the audio — and return a merged
|
|
4714
|
+
* timeline of what is shown and said.
|
|
4715
|
+
*/
|
|
4716
|
+
videoUnderstand(opts: VideoUnderstandOptions): Promise<VideoUnderstandResult>;
|
|
4717
|
+
/**
|
|
4718
|
+
* Synthesise speech in a reference voice with F5-TTS. Needs a Python
|
|
4719
|
+
* interpreter that has the `f5-tts` and `soundfile` packages. The
|
|
4720
|
+
* reference audio + transcript MUST be supplied by the caller — no
|
|
4721
|
+
* built-in voice profile. The Python is run via execFile with an
|
|
4722
|
+
* argument array; the script and its inputs are written to a temp
|
|
4723
|
+
* file and passed by path, so no caller value is interpolated into a
|
|
4724
|
+
* command line.
|
|
4725
|
+
*/
|
|
4726
|
+
voiceClone(opts: VoiceCloneOptions): Promise<MediaFileResult>;
|
|
4727
|
+
/** Parse a whisper-produced SRT (located by stem) into timed segments. */
|
|
4728
|
+
private parseSrt;
|
|
4729
|
+
/** Unlink a file, swallowing any error (cleanup best-effort). */
|
|
4730
|
+
private tryUnlink;
|
|
4731
|
+
/** Remove the SRT(s) produced for a given stem. */
|
|
4732
|
+
private tryUnlinkSrt;
|
|
4733
|
+
/** Recursively remove a directory, swallowing errors. */
|
|
4734
|
+
private tryRmDir;
|
|
4735
|
+
}
|
|
4736
|
+
|
|
4737
|
+
/**
|
|
4738
|
+
* Detect whether a single binary is available. Result is cached; pass
|
|
4739
|
+
* `{ force: true }` to re-probe (e.g. after the operator installs it).
|
|
4740
|
+
*/
|
|
4741
|
+
declare function detectBinary(binary: MediaBinary, opts?: {
|
|
4742
|
+
force?: boolean;
|
|
4743
|
+
}): MediaCapability;
|
|
4744
|
+
/**
|
|
4745
|
+
* Resolve the command name to invoke for a binary. Throws a clear,
|
|
4746
|
+
* actionable error if the binary is not available — callers use this
|
|
4747
|
+
* at the top of every media operation so a missing dependency fails
|
|
4748
|
+
* fast with an install hint instead of an opaque ENOENT deep inside
|
|
4749
|
+
* an execFile call.
|
|
4750
|
+
*/
|
|
4751
|
+
declare function requireBinary(binary: MediaBinary): string;
|
|
4752
|
+
/**
|
|
4753
|
+
* Validate that a whisper.cpp model file exists on disk. whisper.cpp
|
|
4754
|
+
* needs an explicit model path; this surfaces a clear error rather
|
|
4755
|
+
* than letting the CLI fail cryptically.
|
|
4756
|
+
*/
|
|
4757
|
+
declare function requireWhisperModel(modelPath: string | undefined): string;
|
|
4758
|
+
/**
|
|
4759
|
+
* Build the full media capability report — one entry per binary, plus
|
|
4760
|
+
* a `ready` flag (true once ffmpeg + ffprobe, the baseline, are both
|
|
4761
|
+
* present). Used by the health surface and the `media_capabilities`
|
|
4762
|
+
* tool so an agent can see what is available before attempting an op.
|
|
4763
|
+
*/
|
|
4764
|
+
declare function getMediaCapabilities(opts?: {
|
|
4765
|
+
force?: boolean;
|
|
4766
|
+
}): MediaCapabilityReport;
|
|
4767
|
+
/** Clear the detection cache — used by tests and after a re-install. */
|
|
4768
|
+
declare function clearMediaCapabilityCache(): void;
|
|
4769
|
+
|
|
2775
4770
|
/**
|
|
2776
4771
|
* Stable thread-id derivation.
|
|
2777
4772
|
*
|
|
@@ -3231,4 +5226,4 @@ declare class MemorySearchIndex {
|
|
|
3231
5226
|
has(id: string): boolean;
|
|
3232
5227
|
}
|
|
3233
5228
|
|
|
3234
|
-
export { AGENT_ROLES, AccountManager, type AddressInfo, type Agent, AgentDeletionService, type AgentMemoryEntry, type AgentMemoryFields, AgentMemoryManager, type AgentMemoryOptions, type AgentMemoryRead, AgentMemoryStore, type AgentRole, AgenticMailClient, type AgenticMailClientOptions, type AgenticMailConfig, type ArchiveAndDeleteOptions, type ArchivedEmail, type Attachment, type AttachmentAdvisory, BRIDGE_OPERATOR_LIVE_WINDOW_MS, type BridgeMailContext, type BridgeWakeError, type BridgeWakePromptArgs, type BridgeWakeResult, type BridgeWakeRoute, type CachedMessage, CloudflareClient, type CreateAgentOptions, type CreateMemoryInput, DEFAULT_AGENT_NAME, DEFAULT_AGENT_ROLE, DEFAULT_SESSION_MAX_AGE_MS, DNSConfigurator, type Database, type DeletionReport, type DeletionSummary, DependencyChecker, DependencyInstaller, type DependencyStatus, type DnsRecord, type DnsSetupResult, type DomainInfo, DomainManager, type DomainModeConfig, type DomainPurchaseResult, DomainPurchaser, type DomainSearchResult, type DomainSetupResult, ELKS_REALTIME_AUDIO_FORMATS, type ElksRealtimeAudioFormat, type ElksRealtimeAudioMessage, type ElksRealtimeByeMessage, type ElksRealtimeHelloMessage, type ElksRealtimeInboundMessage, type ElksRealtimeOutboundMessage, type EmailEnvelope, type EmailRouteAction, type EmailRouteClass, type EmailRouteClassification, type EmailRouteInput, EmailSearchIndex, type FolderInfo, type GatewayConfig, GatewayManager, type GatewayManagerOptions, type GatewayMode, type GatewayStatus, type HostName, type HostSession, type HostSessionResumeMode, type InboundEmail, type InboundSmsEvent, type InboxEvent, type InboxExpungeEvent, type InboxFlagsEvent, type InboxNewEvent, InboxWatcher, type InboxWatcherOptions, type InstallProgress, type LinkAdvisory, type LocalSmtpConfig, MEMORY_CATEGORIES, MailReceiver, type MailReceiverOptions, MailSender, type MailSenderOptions, type MailboxInfo, type MemoryCategory, type MemoryImportance, type MemoryQueryOptions, MemorySearchIndex, type MemorySource, type MemoryStats, type OpenClawPhoneMissionPolicy, type OutboundCategory, type OutboundScanInput, type OutboundScanResult, type OutboundWarning, PHONE_MAX_CONCURRENT_MISSIONS, PHONE_MIN_WEBHOOK_SECRET_LENGTH, PHONE_MISSION_STATES, PHONE_RATE_LIMIT_PER_HOUR, PHONE_RATE_LIMIT_PER_MINUTE, PHONE_REGION_SCOPES, PHONE_SERVER_MAX_ATTEMPTS, PHONE_SERVER_MAX_CALL_DURATION_SECONDS, PHONE_SERVER_MAX_COST_PER_MISSION, PHONE_TASK_MAX_LENGTH, type ParsedAttachment, type ParsedEmail, type ParsedSms, PathTraversalError, type PhoneAlternativePolicy, type PhoneCallMission, type PhoneConfirmPolicy, PhoneManager, type PhoneMissionStartValidationResult, type PhoneMissionState, type PhoneMissionTranscriptEntry, type PhoneMissionValidationIssue, type PhoneMissionValidationResult, type PhoneNumberRisk, PhoneRateLimitError, type PhoneRegionScope, type PhoneTransportConfig, type PhoneTransportProfile, type PhoneTransportProvider, type PhoneTransportValidationResult, PhoneWebhookAuthError, type PhoneWebhookResult, type PlanBridgeWakeArgs, type PurchasedDomain, REDACTED, RELAY_PRESETS, RelayBridge, type RelayBridgeOptions, type RelayConfig, RelayGateway, type RelayProvider, type RelaySearchResult, type ResumeErrorClassificationOptions, SPAM_THRESHOLD, type SafeJoinOptions, type SanitizeDetection, type SanitizeResult, type SearchCriteria, type SearchableEmail, type SecurityAdvisory, type SendMailOptions, type SendResult, type SendResultWithRaw, type SendSmsInput, type SendSmsResult, ServiceManager, type ServiceStatus, type SetupConfig, SetupManager, type SetupResult, type Severity, type SmsConfig, SmsManager, type SmsMessage, SmsPoller, type SmsProvider, type SpamCategory, type SpamResult, type SpamRuleMatch, StalwartAdmin, type StalwartAdminOptions, type StalwartPrincipal, type StartPhoneCallOptions, type StartPhoneCallResult, type StartPhoneMissionInput, TELEPHONY_TRANSPORT_CAPABILITIES, type TelephonyTransportCapability, ThreadCache, type ThreadCacheEntry, type ThreadCacheOptions, type ThreadIdInput, type TunnelConfig, TunnelManager, UnsafeApiUrlError, type UpdateMemoryInput, type ValidatedPhoneMissionStart, WARNING_THRESHOLD, type WatcherOptions, assertWithinBase, bridgeWakeErrorMessage, bridgeWakeLastSeenAgeMs, buildApiUrl, buildElksAudioMessage, buildElksByeMessage, buildElksHandshakeMessages, buildElksInterruptMessage, buildElksListeningMessage, buildElksSendingMessage, buildInboundSecurityAdvisory, buildPhoneTransportConfig, classifyEmailRoute, classifyPhoneNumberRisk, classifyResumeError, closeDatabase, composeBridgeWakePrompt, createTestDatabase, debug, debugWarn, ensureDataDir, extractVerificationCode, flushTelemetry, forgetHostSession, getDatabase, getOperatorEmail, getSmsProvider, hostSessionStoragePath, inferPhoneRegion, isInternalEmail, isLoopbackMailHost, isPhoneRegionAllowed, isSessionFresh, isValidPhoneNumber, loadHostSession, mapProviderSmsStatus, normalizeAddress, normalizePhoneNumber, normalizeSubject, operatorPrefsStoragePath, parseElksRealtimeMessage, parseEmail, parseGoogleVoiceSms, planBridgeWake, recordToolCall, redactObject, redactPhoneTransportConfig, redactSecret, redactSmsConfig, resolveConfig, resolveTlsRejectUnauthorized, safeJoin, sanitizeEmail, saveConfig, saveHostSession, scanOutboundEmail, scoreEmail, setOperatorEmail, setTelemetryVersion, shouldSkipBridgeWakeForLiveOperator, startRelayBridge, stem, threadIdFor, tokenize, tryJoin, validateApiUrl, validatePhoneMissionPolicy, validatePhoneMissionStart, validatePhoneTransportProfile };
|
|
5229
|
+
export { AGENT_ROLES, ASK_OPERATOR_TOOL, AccountManager, type AddressInfo, type Agent, AgentDeletionService, type AgentMemoryEntry, type AgentMemoryFields, AgentMemoryManager, type AgentMemoryOptions, type AgentMemoryRead, AgentMemoryStore, type AgentRole, AgenticMailClient, type AgenticMailClientOptions, type AgenticMailConfig, type ArchiveAndDeleteOptions, type ArchivedEmail, type Attachment, type AttachmentAdvisory, type AudioAction, type AudioEditOptions, BRIDGE_OPERATOR_LIVE_WINDOW_MS, type BridgeMailContext, type BridgeWakeError, type BridgeWakePromptArgs, type BridgeWakeResult, type BridgeWakeRoute, type CachedMessage, CloudflareClient, type CreateAgentOptions, type CreateMemoryInput, DEFAULT_AGENT_NAME, DEFAULT_AGENT_ROLE, DEFAULT_REALTIME_AUDIO_FORMAT, DEFAULT_REALTIME_MODEL, DEFAULT_REALTIME_VOICE, DEFAULT_SESSION_MAX_AGE_MS, DEFAULT_WEB_SEARCH_ENDPOINT, DNSConfigurator, type Database, type DeletionReport, type DeletionSummary, DependencyChecker, DependencyInstaller, type DependencyStatus, type DnsRecord, type DnsSetupResult, type DomainInfo, DomainManager, type DomainModeConfig, type DomainPurchaseResult, DomainPurchaser, type DomainSearchResult, type DomainSetupResult, ELKS_REALTIME_AUDIO_FORMATS, ELKS_REALTIME_WS_PATH, type ElksRealtimeAudioFormat, type ElksRealtimeAudioMessage, type ElksRealtimeByeMessage, type ElksRealtimeHelloMessage, type ElksRealtimeInboundMessage, type ElksRealtimeOutboundMessage, ElksRealtimeTransport, type EmailEnvelope, type EmailRouteAction, type EmailRouteClass, type EmailRouteClassification, type EmailRouteInput, EmailSearchIndex, type FolderInfo, GET_DATETIME_TOOL, type GatewayConfig, GatewayManager, type GatewayManagerOptions, type GatewayMode, type GatewayStatus, type GetDatetimeOptions, type GetUpdatesOptions, type HostName, type HostSession, type HostSessionResumeMode, type ImageAction, type ImageEditOptions, type InboundEmail, type InboundSmsEvent, type InboxEvent, type InboxExpungeEvent, type InboxFlagsEvent, type InboxNewEvent, InboxWatcher, type InboxWatcherOptions, type InstallProgress, type LinkAdvisory, type LocalSmtpConfig, MEMORY_CATEGORIES, MailReceiver, type MailReceiverOptions, MailSender, type MailSenderOptions, type MailboxInfo, type MediaBinary, type MediaCapability, type MediaCapabilityReport, type MediaFileResult, type MediaInfoResult, MediaManager, type MediaManagerOptions, type MediaStreamInfo, type MemoryCategory, type MemoryImportance, type MemoryQueryOptions, type MemoryRecaller, MemorySearchIndex, type MemorySource, type MemoryStats, OPENAI_REALTIME_URL, OPERATOR_QUERY_POLL_INTERVAL_MS, OPERATOR_QUERY_SUBJECT_TAG, OPERATOR_QUERY_TIMEOUT_MS, OPERATOR_QUERY_TIMEOUT_SENTINEL, type OpenClawPhoneMissionPolicy, type OperatorQueryNotificationInput, type OperatorQueryPollOptions, type OperatorQueryUrgency, type OperatorReplyKind, type OutboundCategory, type OutboundScanInput, type OutboundScanResult, type OutboundWarning, PHONE_CALL_CONTROL_PROVIDERS, PHONE_MAX_CONCURRENT_MISSIONS, PHONE_MIN_WEBHOOK_SECRET_LENGTH, PHONE_MISSION_STATES, PHONE_RATE_LIMIT_PER_HOUR, PHONE_RATE_LIMIT_PER_MINUTE, PHONE_REGION_SCOPES, PHONE_SERVER_MAX_ATTEMPTS, PHONE_SERVER_MAX_CALL_DURATION_SECONDS, PHONE_SERVER_MAX_COST_PER_MISSION, PHONE_TASK_MAX_LENGTH, type ParsedAttachment, type ParsedEmail, type ParsedOperatorReply, type ParsedSms, type ParsedTelegramMessage, PathTraversalError, type PhoneAlternativePolicy, type PhoneCallMission, type PhoneConfirmPolicy, PhoneManager, type PhoneMissionStartValidationResult, type PhoneMissionState, type PhoneMissionTranscriptEntry, type PhoneMissionValidationIssue, type PhoneMissionValidationResult, type PhoneNumberRisk, type PhoneOperatorQuery, PhoneRateLimitError, type PhoneRegionScope, type PhoneTransportConfig, type PhoneTransportProfile, type PhoneTransportProvider, type PhoneTransportValidationResult, PhoneWebhookAuthError, type PhoneWebhookResult, type PlanBridgeWakeArgs, type PurchasedDomain, REALTIME_AUDIO_SAMPLE_RATE, REALTIME_MAX_AUDIO_FRAME_BASE64, REALTIME_TOOL_CALL_TIMEOUT_MS, REALTIME_TOOL_DEFINITIONS, RECALL_MEMORY_TOOL, REDACTED, RELAY_PRESETS, type RealtimeBridgePort, type RealtimeBridgeTranscriptEntry, type RealtimeInboundEvent, type RealtimeInstructionOptions, type RealtimeSessionConfigOptions, type RealtimeToolCall, type RealtimeToolDefinition, type RealtimeToolHandler, type RealtimeToolResult, type RealtimeTransportAdapter, type RealtimeTransportProvider, RealtimeVoiceBridge, type RealtimeVoiceBridgeOptions, RelayBridge, type RelayBridgeOptions, type RelayConfig, RelayGateway, type RelayProvider, type RelaySearchResult, type ResumeErrorClassificationOptions, SEARCH_EMAIL_TOOL, SPAM_THRESHOLD, type SafeJoinOptions, type SanitizeDetection, type SanitizeResult, type SearchCriteria, type SearchableEmail, type SecurityAdvisory, type SendMailOptions, type SendResult, type SendResultWithRaw, type SendSmsInput, type SendSmsResult, type SendTelegramMessageOptions, type SendTelegramMessageResult, ServiceManager, type ServiceStatus, type SetWebhookOptions, type SetupConfig, SetupManager, type SetupResult, type Severity, type SmsConfig, SmsManager, type SmsMessage, SmsPoller, type SmsProvider, type SpamCategory, type SpamResult, type SpamRuleMatch, StalwartAdmin, type StalwartAdminOptions, type StalwartPrincipal, type StartPhoneCallOptions, type StartPhoneCallResult, type StartPhoneMissionInput, TELEGRAM_API_BASE, TELEGRAM_CHUNK_SIZE, TELEGRAM_MESSAGE_LIMIT, TELEGRAM_MIN_WEBHOOK_SECRET_LENGTH, TELEGRAM_OPERATOR_QUERY_TAG, TELEGRAM_STOP_WORDS, TELEGRAM_WEBHOOK_SECRET_RE, TELEPHONY_TRANSPORT_CAPABILITIES, TWILIO_MEDIA_SAMPLE_RATE, TWILIO_REALTIME_WS_PATH, TelegramApiError, type TelegramApiOptions, type TelegramBotInfo, type TelegramChatType, type TelegramConfig, TelegramManager, type TelegramMessage, type TelegramMode, type TelephonyTransportCapability, ThreadCache, type ThreadCacheEntry, type ThreadCacheOptions, type ThreadIdInput, type ToolExecutor, type TtsGenerateOptions, type TunnelConfig, TunnelManager, type TwilioConnectedMessage, type TwilioMarkMessage, type TwilioMediaMessage, type TwilioRealtimeInboundMessage, type TwilioRealtimeOutboundMessage, TwilioRealtimeTransport, type TwilioStartMessage, type TwilioStopMessage, type TwilioStreamTwiMLOptions, UnsafeApiUrlError, type UpdateMemoryInput, type ValidatedPhoneMissionStart, type VideoAction, type VideoEditOptions, type VideoTimelineEntry, type VideoUnderstandOptions, type VideoUnderstandResult, type VoiceCloneOptions, WARNING_THRESHOLD, WEB_SEARCH_TOOL, WEB_SEARCH_UNTRUSTED_PREFIX, type WatcherOptions, type WebSearchOptions, assertWithinBase, bridgeWakeErrorMessage, bridgeWakeLastSeenAgeMs, buildApiUrl, buildElksAudioMessage, buildElksByeMessage, buildElksHandshakeMessages, buildElksInterruptMessage, buildElksListeningMessage, buildElksSendingMessage, buildInboundSecurityAdvisory, buildOpenAIRealtimeUrl, buildPhoneTransportConfig, buildRealtimeInstructions, buildRealtimeSessionConfig, buildRealtimeToolGuidance, buildTwilioClearMessage, buildTwilioMarkMessage, buildTwilioMediaMessage, buildTwilioSayTwiML, buildTwilioSignature, buildTwilioStreamTwiML, callTelegramApi, classifyEmailRoute, classifyPhoneNumberRisk, classifyResumeError, clearMediaCapabilityCache, closeDatabase, composeBridgeWakePrompt, createRealtimeTransport, createTestDatabase, createToolExecutor, debug, debugWarn, deleteTelegramWebhook, detectBinary, ensureDataDir, escapeXml, extractEmailAddress, extractVerificationCode, flushTelemetry, forgetHostSession, formatOperatorQueryTelegramMessage, getDatabase, getDatetime, getMediaCapabilities, getOperatorEmail, getSmsProvider, getTelegramChat, getTelegramMe, getTelegramUpdates, getTelegramWebhookInfo, hostSessionStoragePath, inferPhoneRegion, isInternalEmail, isLoopbackMailHost, isOperatorReplySender, isPhoneRegionAllowed, isSessionFresh, isTelegramChatAllowed, isTelegramStopCommand, isValidPhoneNumber, loadHostSession, mapProviderSmsStatus, nextTelegramOffset, normalizeAddress, normalizePhoneNumber, normalizeSubject, operatorPrefsStoragePath, operatorQuerySubject, parseElksRealtimeMessage, parseEmail, parseGoogleVoiceSms, parseOperatorQueryReply, parseTelegramOperatorReply, parseTelegramUpdate, parseTwilioRealtimeMessage, planBridgeWake, pollForOperatorAnswer, recallMemory, recordToolCall, redactBotToken, redactObject, redactPhoneTransportConfig, redactSecret, redactSmsConfig, redactTelegramConfig, requireBinary, requireWhisperModel, resolveConfig, resolveTlsRejectUnauthorized, safeJoin, sanitizeEmail, saveConfig, saveHostSession, scanOutboundEmail, scoreEmail, sendTelegramMessage, setOperatorEmail, setTelegramWebhook, setTelemetryVersion, shouldSkipBridgeWakeForLiveOperator, splitTelegramMessage, startRelayBridge, stem, stripTelegramMarkdown, threadIdFor, tokenize, tryJoin, validateApiUrl, validatePhoneMissionPolicy, validatePhoneMissionStart, validatePhoneTransportProfile, validateTwilioSignature, webSearch };
|