retell-sdk 4.57.0 → 4.58.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -271,6 +271,14 @@ export interface PhoneCallResponse {
271
271
  * what was the result. Available after call ends.
272
272
  */
273
273
  transcript_with_tool_calls?: Array<PhoneCallResponse.Utterance | PhoneCallResponse.ToolCallInvocationUtterance | PhoneCallResponse.ToolCallResultUtterance | PhoneCallResponse.DtmfUtterance>;
274
+ /**
275
+ * The destination number or identifier where the call was transferred to. Only
276
+ * populated when the disconnection reason was `call_transfer`. Can be a phone
277
+ * number or a SIP URI. SIP URIs are prefixed with "sip:" and may include a
278
+ * ";transport=..." portion (if transport is known) where the transport type can be
279
+ * "tls", "tcp", "udp", or "auto".
280
+ */
281
+ transfer_destination?: string | null;
274
282
  }
275
283
  export declare namespace PhoneCallResponse {
276
284
  /**
@@ -1002,6 +1010,14 @@ export interface WebCallResponse {
1002
1010
  * what was the result. Available after call ends.
1003
1011
  */
1004
1012
  transcript_with_tool_calls?: Array<WebCallResponse.Utterance | WebCallResponse.ToolCallInvocationUtterance | WebCallResponse.ToolCallResultUtterance | WebCallResponse.DtmfUtterance>;
1013
+ /**
1014
+ * The destination number or identifier where the call was transferred to. Only
1015
+ * populated when the disconnection reason was `call_transfer`. Can be a phone
1016
+ * number or a SIP URI. SIP URIs are prefixed with "sip:" and may include a
1017
+ * ";transport=..." portion (if transport is known) where the transport type can be
1018
+ * "tls", "tcp", "udp", or "auto".
1019
+ */
1020
+ transfer_destination?: string | null;
1005
1021
  }
1006
1022
  export declare namespace WebCallResponse {
1007
1023
  /**
@@ -1696,6 +1712,11 @@ export interface CallCreatePhoneCallParams {
1696
1712
  * Retell, only US numbers are supported as destination.
1697
1713
  */
1698
1714
  to_number: string;
1715
+ /**
1716
+ * Override configuration for agent, retell LLM, or conversation flow settings for
1717
+ * a specific call.
1718
+ */
1719
+ agent_override?: CallCreatePhoneCallParams.AgentOverride;
1699
1720
  /**
1700
1721
  * Add optional custom SIP headers to the call.
1701
1722
  */
@@ -1735,12 +1756,618 @@ export interface CallCreatePhoneCallParams {
1735
1756
  [key: string]: unknown;
1736
1757
  };
1737
1758
  }
1759
+ export declare namespace CallCreatePhoneCallParams {
1760
+ /**
1761
+ * Override configuration for agent, retell LLM, or conversation flow settings for
1762
+ * a specific call.
1763
+ */
1764
+ interface AgentOverride {
1765
+ /**
1766
+ * Override agent configuration settings. Any properties specified here will
1767
+ * override the base agent configuration for this call.
1768
+ */
1769
+ agent?: AgentOverride.Agent;
1770
+ /**
1771
+ * Override conversation flow configuration settings. Only applicable when using
1772
+ * conversation flow as the response engine. Supported attributes - model_choice,
1773
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
1774
+ * begin_after_user_silence_ms.
1775
+ */
1776
+ conversation_flow?: AgentOverride.ConversationFlow;
1777
+ /**
1778
+ * Override Retell LLM configuration settings. Only applicable when using Retell
1779
+ * LLM as the response engine. Supported attributes - model, s2s_model,
1780
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
1781
+ * begin_after_user_silence_ms, begin_message.
1782
+ */
1783
+ retell_llm?: AgentOverride.RetellLlm;
1784
+ }
1785
+ namespace AgentOverride {
1786
+ /**
1787
+ * Override agent configuration settings. Any properties specified here will
1788
+ * override the base agent configuration for this call.
1789
+ */
1790
+ interface Agent {
1791
+ /**
1792
+ * The name of the agent. Only used for your own reference.
1793
+ */
1794
+ agent_name?: string | null;
1795
+ /**
1796
+ * If set to true, DTMF input will be accepted and processed. If false, any DTMF
1797
+ * input will be ignored. Default to true.
1798
+ */
1799
+ allow_user_dtmf?: boolean;
1800
+ /**
1801
+ * If set, will add ambient environment sound to the call to make experience more
1802
+ * realistic. Currently supports the following options:
1803
+ *
1804
+ * - `coffee-shop`: Coffee shop ambience with people chatting in background.
1805
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/coffee-shop.wav)
1806
+ *
1807
+ * - `convention-hall`: Convention hall ambience, with some echo and people
1808
+ * chatting in background.
1809
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/convention-hall.wav)
1810
+ *
1811
+ * - `summer-outdoor`: Summer outdoor ambience with cicada chirping.
1812
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/summer-outdoor.wav)
1813
+ *
1814
+ * - `mountain-outdoor`: Mountain outdoor ambience with birds singing.
1815
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/mountain-outdoor.wav)
1816
+ *
1817
+ * - `static-noise`: Constant static noise.
1818
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/static-noise.wav)
1819
+ *
1820
+ * - `call-center`: Call center work noise.
1821
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/call-center.wav)
1822
+ *
1823
+ * Set to `null` to remove ambient sound from this agent.
1824
+ */
1825
+ ambient_sound?: 'coffee-shop' | 'convention-hall' | 'summer-outdoor' | 'mountain-outdoor' | 'static-noise' | 'call-center' | null;
1826
+ /**
1827
+ * If set, will control the volume of the ambient sound. Value ranging from [0,2].
1828
+ * Lower value means quieter ambient sound, while higher value means louder ambient
1829
+ * sound. If unset, default value 1 will apply.
1830
+ */
1831
+ ambient_sound_volume?: number;
1832
+ /**
1833
+ * Only applicable when enable_backchannel is true. Controls how often the agent
1834
+ * would backchannel when a backchannel is possible. Value ranging from [0,1].
1835
+ * Lower value means less frequent backchannel, while higher value means more
1836
+ * frequent backchannel. If unset, default value 0.8 will apply.
1837
+ */
1838
+ backchannel_frequency?: number;
1839
+ /**
1840
+ * Only applicable when enable_backchannel is true. A list of words that the agent
1841
+ * would use as backchannel. If not set, default backchannel words will apply.
1842
+ * Check out
1843
+ * [backchannel default words](/agent/interaction-configuration#backchannel) for
1844
+ * more details. Note that certain voices do not work too well with certain words,
1845
+ * so it's recommended to experiment before adding any words.
1846
+ */
1847
+ backchannel_words?: Array<string> | null;
1848
+ /**
1849
+ * If set, will delay the first message by the specified amount of milliseconds, so
1850
+ * that it gives user more time to prepare to take the call. Valid range is [0,
1851
+ * 5000]. If not set or set to 0, agent will speak immediately. Only applicable
1852
+ * when agent speaks first.
1853
+ */
1854
+ begin_message_delay_ms?: number;
1855
+ /**
1856
+ * Provide a customized list of keywords to bias the transcriber model, so that
1857
+ * these words are more likely to get transcribed. Commonly used for names, brands,
1858
+ * street, etc.
1859
+ */
1860
+ boosted_keywords?: Array<string> | null;
1861
+ /**
1862
+ * Granular setting to manage how Retell stores sensitive data (transcripts,
1863
+ * recordings, logs, etc.). This replaces the deprecated
1864
+ * `opt_out_sensitive_data_storage` field.
1865
+ *
1866
+ * - `everything`: Store all data including transcripts, recordings, and logs.
1867
+ * - `everything_except_pii`: Store data without PII when PII is detected.
1868
+ * - `basic_attributes_only`: Store only basic attributes; no
1869
+ * transcripts/recordings/logs. If not set, default value of "everything" will
1870
+ * apply.
1871
+ */
1872
+ data_storage_setting?: 'everything' | 'everything_except_pii' | 'basic_attributes_only';
1873
+ /**
1874
+ * If set, determines what denoising mode to use. Default to noise-cancellation.
1875
+ */
1876
+ denoising_mode?: 'noise-cancellation' | 'noise-and-background-speech-cancellation';
1877
+ /**
1878
+ * Controls whether the agent would backchannel (agent interjects the speaker with
1879
+ * phrases like "yeah", "uh-huh" to signify interest and engagement). Backchannel
1880
+ * when enabled tends to show up more in longer user utterances. If not set, agent
1881
+ * will not backchannel.
1882
+ */
1883
+ enable_backchannel?: boolean;
1884
+ /**
1885
+ * If users stay silent for a period after agent speech, end the call. The minimum
1886
+ * value allowed is 10,000 ms (10 s). By default, this is set to 600000 (10 min).
1887
+ */
1888
+ end_call_after_silence_ms?: number;
1889
+ /**
1890
+ * When TTS provider for the selected voice is experiencing outages, we would use
1891
+ * fallback voices listed here for the agent. Voice id and the fallback voice ids
1892
+ * must be from different TTS providers. The system would go through the list in
1893
+ * order, if the first one in the list is also having outage, it would use the next
1894
+ * one. Set to null to remove voice fallback for the agent.
1895
+ */
1896
+ fallback_voice_ids?: Array<string> | null;
1897
+ /**
1898
+ * Controls how sensitive the agent is to user interruptions. Value ranging from
1899
+ * [0,1]. Lower value means it will take longer / more words for user to interrupt
1900
+ * agent, while higher value means it's easier for user to interrupt agent. If
1901
+ * unset, default value 1 will apply. When this is set to 0, agent would never be
1902
+ * interrupted.
1903
+ */
1904
+ interruption_sensitivity?: number;
1905
+ /**
1906
+ * Specifies what language (and dialect) the speech recognition will operate in.
1907
+ * For instance, selecting `en-GB` optimizes speech recognition for British
1908
+ * English. If unset, will use default value `en-US`. Select `multi` for
1909
+ * multilingual support, currently this supports Spanish and English.
1910
+ */
1911
+ language?: 'en-US' | 'en-IN' | 'en-GB' | 'en-AU' | 'en-NZ' | 'de-DE' | 'es-ES' | 'es-419' | 'hi-IN' | 'fr-FR' | 'fr-CA' | 'ja-JP' | 'pt-PT' | 'pt-BR' | 'zh-CN' | 'ru-RU' | 'it-IT' | 'ko-KR' | 'nl-NL' | 'nl-BE' | 'pl-PL' | 'tr-TR' | 'th-TH' | 'vi-VN' | 'ro-RO' | 'bg-BG' | 'ca-ES' | 'da-DK' | 'fi-FI' | 'el-GR' | 'hu-HU' | 'id-ID' | 'no-NO' | 'sk-SK' | 'sv-SE' | 'multi';
1912
+ /**
1913
+ * Maximum allowed length for the call, will force end the call if reached. The
1914
+ * minimum value allowed is 60,000 ms (1 min), and maximum value allowed is
1915
+ * 7,200,000 (2 hours). By default, this is set to 3,600,000 (1 hour).
1916
+ */
1917
+ max_call_duration_ms?: number;
1918
+ /**
1919
+ * If set to true, will normalize the some part of text (number, currency, date,
1920
+ * etc) to spoken to its spoken form for more consistent speech synthesis
1921
+ * (sometimes the voice synthesize system itself might read these wrong with the
1922
+ * raw text). For example, it will convert "Call my number 2137112342 on Jul 5th,
1923
+ * 2024 for the $24.12 payment" to "Call my number two one three seven one one two
1924
+ * three four two on july fifth, twenty twenty four for the twenty four dollars
1925
+ * twelve cents payment" before starting audio generation.
1926
+ */
1927
+ normalize_for_speech?: boolean;
1928
+ /**
1929
+ * Whether this agent opts in for signed URLs for public logs and recordings. When
1930
+ * enabled, the generated URLs will include security signatures that restrict
1931
+ * access and automatically expire after 24 hours.
1932
+ */
1933
+ opt_in_signed_url?: boolean;
1934
+ /**
1935
+ * Configuration for PII scrubbing from transcripts and recordings.
1936
+ */
1937
+ pii_config?: Agent.PiiConfig;
1938
+ /**
1939
+ * Post call analysis data to extract from the call. This data will augment the
1940
+ * pre-defined variables extracted in the call analysis. This will be available
1941
+ * after the call ends.
1942
+ */
1943
+ post_call_analysis_data?: Array<Agent.StringAnalysisData | Agent.EnumAnalysisData | Agent.BooleanAnalysisData | Agent.NumberAnalysisData> | null;
1944
+ /**
1945
+ * The model to use for post call analysis. Default to gpt-4o-mini.
1946
+ */
1947
+ post_call_analysis_model?: 'gpt-4o' | 'gpt-4o-mini' | 'gpt-4.1' | 'gpt-4.1-mini' | 'gpt-4.1-nano' | 'gpt-5' | 'gpt-5-mini' | 'gpt-5-nano' | 'claude-4.5-sonnet' | 'claude-4.0-sonnet' | 'claude-3.7-sonnet' | 'claude-3.5-haiku' | 'gemini-2.0-flash' | 'gemini-2.0-flash-lite' | 'gemini-2.5-flash' | 'gemini-2.5-flash-lite';
1948
+ /**
1949
+ * A list of words / phrases and their pronunciation to be used to guide the audio
1950
+ * synthesize for consistent pronunciation. Currently only supported for English &
1951
+ * 11labs voices. Set to null to remove pronunciation dictionary from this agent.
1952
+ */
1953
+ pronunciation_dictionary?: Array<Agent.PronunciationDictionary> | null;
1954
+ /**
1955
+ * If set, controls how many times agent would remind user when user is
1956
+ * unresponsive. Must be a non negative integer. If unset, default value of 1 will
1957
+ * apply (remind once). Set to 0 to disable agent from reminding.
1958
+ */
1959
+ reminder_max_count?: number;
1960
+ /**
1961
+ * If set (in milliseconds), will trigger a reminder to the agent to speak if the
1962
+ * user has been silent for the specified duration after some agent speech. Must be
1963
+ * a positive number. If unset, default value of 10000 ms (10 s) will apply.
1964
+ */
1965
+ reminder_trigger_ms?: number;
1966
+ /**
1967
+ * The Response Engine to attach to the agent. It is used to generate responses for
1968
+ * the agent. You need to create a Response Engine first before attaching it to an
1969
+ * agent.
1970
+ */
1971
+ response_engine?: Agent.ResponseEngineRetellLm | Agent.ResponseEngineCustomLm | Agent.ResponseEngineConversationFlow;
1972
+ /**
1973
+ * Controls how responsive is the agent. Value ranging from [0,1]. Lower value
1974
+ * means less responsive agent (wait more, respond slower), while higher value
1975
+ * means faster exchanges (respond when it can). If unset, default value 1 will
1976
+ * apply.
1977
+ */
1978
+ responsiveness?: number;
1979
+ /**
1980
+ * If set, the phone ringing will last for the specified amount of milliseconds.
1981
+ * This applies for both outbound call ringtime, and call transfer ringtime.
1982
+ * Default to 30000 (30 s). Valid range is [5000, 90000].
1983
+ */
1984
+ ring_duration_ms?: number;
1985
+ /**
1986
+ * If set, determines whether speech to text should focus on latency or accuracy.
1987
+ * Default to fast mode.
1988
+ */
1989
+ stt_mode?: 'fast' | 'accurate';
1990
+ user_dtmf_options?: Agent.UserDtmfOptions | null;
1991
+ /**
1992
+ * If set, determines the vocabulary set to use for transcription. This setting
1993
+ * only applies for English agents, for non English agent, this setting is a no-op.
1994
+ * Default to general.
1995
+ */
1996
+ vocab_specialization?: 'general' | 'medical';
1997
+ /**
1998
+ * Unique voice id used for the agent. Find list of available voices and their
1999
+ * preview in Dashboard.
2000
+ */
2001
+ voice_id?: string;
2002
+ /**
2003
+ * Optionally set the voice model used for the selected voice. Currently only
2004
+ * elevenlab voices have voice model selections. Set to null to remove voice model
2005
+ * selection, and default ones will apply. Check out the dashboard for details on
2006
+ * each voice model.
2007
+ */
2008
+ voice_model?: 'eleven_turbo_v2' | 'eleven_flash_v2' | 'eleven_turbo_v2_5' | 'eleven_flash_v2_5' | 'eleven_multilingual_v2' | 'tts-1' | 'gpt-4o-mini-tts' | null;
2009
+ /**
2010
+ * Controls speed of voice. Value ranging from [0.5,2]. Lower value means slower
2011
+ * speech, while higher value means faster speech rate. If unset, default value 1
2012
+ * will apply.
2013
+ */
2014
+ voice_speed?: number;
2015
+ /**
2016
+ * Controls how stable the voice is. Value ranging from [0,2]. Lower value means
2017
+ * more stable, and higher value means more variant speech generation. Currently
2018
+ * this setting only applies to `11labs` voices. If unset, default value 1 will
2019
+ * apply.
2020
+ */
2021
+ voice_temperature?: number;
2022
+ /**
2023
+ * If this option is set, the call will try to detect voicemail in the first 3
2024
+ * minutes of the call. Actions defined (hangup, or leave a message) will be
2025
+ * applied when the voicemail is detected. Set this to null to disable voicemail
2026
+ * detection.
2027
+ */
2028
+ voicemail_option?: Agent.VoicemailOption | null;
2029
+ /**
2030
+ * If set, will control the volume of the agent. Value ranging from [0,2]. Lower
2031
+ * value means quieter agent speech, while higher value means louder agent speech.
2032
+ * If unset, default value 1 will apply.
2033
+ */
2034
+ volume?: number;
2035
+ /**
2036
+ * The timeout for the webhook in milliseconds. If not set, default value of 10000
2037
+ * will apply.
2038
+ */
2039
+ webhook_timeout_ms?: number;
2040
+ /**
2041
+ * The webhook for agent to listen to call events. See what events it would get at
2042
+ * [webhook doc](/features/webhook). If set, will binds webhook events for this
2043
+ * agent to the specified url, and will ignore the account level webhook for this
2044
+ * agent. Set to `null` to remove webhook url from this agent.
2045
+ */
2046
+ webhook_url?: string | null;
2047
+ }
2048
+ namespace Agent {
2049
+ /**
2050
+ * Configuration for PII scrubbing from transcripts and recordings.
2051
+ */
2052
+ interface PiiConfig {
2053
+ /**
2054
+ * List of PII categories to scrub from transcripts and recordings.
2055
+ */
2056
+ categories: Array<'person_name' | 'address' | 'email' | 'phone_number' | 'ssn' | 'passport' | 'driver_license' | 'credit_card' | 'bank_account' | 'password' | 'pin' | 'medical_id' | 'date_of_birth'>;
2057
+ /**
2058
+ * The processing mode for PII scrubbing. Currently only post-call is supported.
2059
+ */
2060
+ mode: 'post_call';
2061
+ }
2062
+ interface StringAnalysisData {
2063
+ /**
2064
+ * Description of the variable.
2065
+ */
2066
+ description: string;
2067
+ /**
2068
+ * Name of the variable.
2069
+ */
2070
+ name: string;
2071
+ /**
2072
+ * Type of the variable to extract.
2073
+ */
2074
+ type: 'string';
2075
+ /**
2076
+ * Examples of the variable value to teach model the style and syntax.
2077
+ */
2078
+ examples?: Array<string>;
2079
+ }
2080
+ interface EnumAnalysisData {
2081
+ /**
2082
+ * The possible values of the variable, must be non empty array.
2083
+ */
2084
+ choices: Array<string>;
2085
+ /**
2086
+ * Description of the variable.
2087
+ */
2088
+ description: string;
2089
+ /**
2090
+ * Name of the variable.
2091
+ */
2092
+ name: string;
2093
+ /**
2094
+ * Type of the variable to extract.
2095
+ */
2096
+ type: 'enum';
2097
+ }
2098
+ interface BooleanAnalysisData {
2099
+ /**
2100
+ * Description of the variable.
2101
+ */
2102
+ description: string;
2103
+ /**
2104
+ * Name of the variable.
2105
+ */
2106
+ name: string;
2107
+ /**
2108
+ * Type of the variable to extract.
2109
+ */
2110
+ type: 'boolean';
2111
+ }
2112
+ interface NumberAnalysisData {
2113
+ /**
2114
+ * Description of the variable.
2115
+ */
2116
+ description: string;
2117
+ /**
2118
+ * Name of the variable.
2119
+ */
2120
+ name: string;
2121
+ /**
2122
+ * Type of the variable to extract.
2123
+ */
2124
+ type: 'number';
2125
+ }
2126
+ interface PronunciationDictionary {
2127
+ /**
2128
+ * The phonetic alphabet to be used for pronunciation.
2129
+ */
2130
+ alphabet: 'ipa' | 'cmu';
2131
+ /**
2132
+ * Pronunciation of the word in the format of a IPA / CMU pronunciation.
2133
+ */
2134
+ phoneme: string;
2135
+ /**
2136
+ * The string of word / phrase to be annotated with pronunciation.
2137
+ */
2138
+ word: string;
2139
+ }
2140
+ interface ResponseEngineRetellLm {
2141
+ /**
2142
+ * id of the Retell LLM Response Engine.
2143
+ */
2144
+ llm_id: string;
2145
+ /**
2146
+ * type of the Response Engine.
2147
+ */
2148
+ type: 'retell-llm';
2149
+ /**
2150
+ * Version of the Retell LLM Response Engine.
2151
+ */
2152
+ version?: number | null;
2153
+ }
2154
+ interface ResponseEngineCustomLm {
2155
+ /**
2156
+ * LLM websocket url of the custom LLM.
2157
+ */
2158
+ llm_websocket_url: string;
2159
+ /**
2160
+ * type of the Response Engine.
2161
+ */
2162
+ type: 'custom-llm';
2163
+ }
2164
+ interface ResponseEngineConversationFlow {
2165
+ /**
2166
+ * ID of the Conversation Flow Response Engine.
2167
+ */
2168
+ conversation_flow_id: string;
2169
+ /**
2170
+ * type of the Response Engine.
2171
+ */
2172
+ type: 'conversation-flow';
2173
+ /**
2174
+ * Version of the Conversation Flow Response Engine.
2175
+ */
2176
+ version?: number | null;
2177
+ }
2178
+ interface UserDtmfOptions {
2179
+ /**
2180
+ * The maximum number of digits allowed in the user's DTMF (Dual-Tone
2181
+ * Multi-Frequency) input per turn. Once this limit is reached, the input is
2182
+ * considered complete and a response will be generated immediately.
2183
+ */
2184
+ digit_limit?: number | null;
2185
+ /**
2186
+ * A single key that signals the end of DTMF input. Acceptable values include any
2187
+ * digit (0–9), the pound/hash symbol (#), or the asterisk (\*).
2188
+ */
2189
+ termination_key?: string | null;
2190
+ /**
2191
+ * The time (in milliseconds) to wait for user DTMF input before timing out. The
2192
+ * timer resets with each digit received.
2193
+ */
2194
+ timeout_ms?: number;
2195
+ }
2196
+ /**
2197
+ * If this option is set, the call will try to detect voicemail in the first 3
2198
+ * minutes of the call. Actions defined (hangup, or leave a message) will be
2199
+ * applied when the voicemail is detected. Set this to null to disable voicemail
2200
+ * detection.
2201
+ */
2202
+ interface VoicemailOption {
2203
+ action: VoicemailOption.VoicemailActionPrompt | VoicemailOption.VoicemailActionStaticText | VoicemailOption.VoicemailActionHangup;
2204
+ }
2205
+ namespace VoicemailOption {
2206
+ interface VoicemailActionPrompt {
2207
+ /**
2208
+ * The prompt used to generate the text to be spoken when the call is detected to
2209
+ * be in voicemail.
2210
+ */
2211
+ text: string;
2212
+ type: 'prompt';
2213
+ }
2214
+ interface VoicemailActionStaticText {
2215
+ /**
2216
+ * The text to be spoken when the call is detected to be in voicemail.
2217
+ */
2218
+ text: string;
2219
+ type: 'static_text';
2220
+ }
2221
+ interface VoicemailActionHangup {
2222
+ type: 'hangup';
2223
+ }
2224
+ }
2225
+ }
2226
+ /**
2227
+ * Override conversation flow configuration settings. Only applicable when using
2228
+ * conversation flow as the response engine. Supported attributes - model_choice,
2229
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
2230
+ * begin_after_user_silence_ms.
2231
+ */
2232
+ interface ConversationFlow {
2233
+ /**
2234
+ * If set, the AI will begin the conversation after waiting for the user for the
2235
+ * duration (in milliseconds) specified by this attribute. This only applies if the
2236
+ * agent is configured to wait for the user to speak first. If not set, the agent
2237
+ * will wait indefinitely for the user to speak.
2238
+ */
2239
+ begin_after_user_silence_ms?: number | null;
2240
+ /**
2241
+ * Knowledge base configuration for RAG retrieval.
2242
+ */
2243
+ kb_config?: ConversationFlow.KBConfig;
2244
+ /**
2245
+ * Knowledge base IDs for RAG (Retrieval-Augmented Generation).
2246
+ */
2247
+ knowledge_base_ids?: Array<string> | null;
2248
+ /**
2249
+ * The model choice for the conversation flow.
2250
+ */
2251
+ model_choice?: ConversationFlow.ModelChoice;
2252
+ /**
2253
+ * Controls the randomness of the model's responses. Lower values make responses
2254
+ * more deterministic.
2255
+ */
2256
+ model_temperature?: number | null;
2257
+ /**
2258
+ * Who starts the conversation - user or agent.
2259
+ */
2260
+ start_speaker?: 'user' | 'agent';
2261
+ }
2262
+ namespace ConversationFlow {
2263
+ /**
2264
+ * Knowledge base configuration for RAG retrieval.
2265
+ */
2266
+ interface KBConfig {
2267
+ /**
2268
+ * Similarity threshold for filtering search results
2269
+ */
2270
+ filter_score?: number;
2271
+ /**
2272
+ * Max number of knowledge base chunks to retrieve
2273
+ */
2274
+ top_k?: number;
2275
+ }
2276
+ /**
2277
+ * The model choice for the conversation flow.
2278
+ */
2279
+ interface ModelChoice {
2280
+ /**
2281
+ * The LLM model to use
2282
+ */
2283
+ model: 'gpt-5' | 'gpt-5-mini' | 'gpt-5-nano' | 'gpt-4o' | 'gpt-4o-mini' | 'gpt-4.1' | 'gpt-4.1-mini' | 'gpt-4.1-nano' | 'claude-3.7-sonnet' | 'claude-3.5-haiku' | 'gemini-2.0-flash' | 'gemini-2.0-flash-lite' | 'gemini-2.5-flash' | 'gemini-2.5-flash-lite';
2284
+ /**
2285
+ * Type of model choice
2286
+ */
2287
+ type: 'cascading';
2288
+ /**
2289
+ * Whether to use high priority pool with more dedicated resource, default false
2290
+ */
2291
+ high_priority?: boolean;
2292
+ }
2293
+ }
2294
+ /**
2295
+ * Override Retell LLM configuration settings. Only applicable when using Retell
2296
+ * LLM as the response engine. Supported attributes - model, s2s_model,
2297
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
2298
+ * begin_after_user_silence_ms, begin_message.
2299
+ */
2300
+ interface RetellLlm {
2301
+ /**
2302
+ * If set, the AI will begin the conversation after waiting for the user for the
2303
+ * duration (in milliseconds) specified by this attribute. This only applies if the
2304
+ * agent is configured to wait for the user to speak first. If not set, the agent
2305
+ * will wait indefinitely for the user to speak.
2306
+ */
2307
+ begin_after_user_silence_ms?: number | null;
2308
+ /**
2309
+ * First utterance said by the agent in the call. If not set, LLM will dynamically
2310
+ * generate a message. If set to "", agent will wait for user to speak first.
2311
+ */
2312
+ begin_message?: string | null;
2313
+ /**
2314
+ * Knowledge base configuration for RAG retrieval.
2315
+ */
2316
+ kb_config?: RetellLlm.KBConfig | null;
2317
+ /**
2318
+ * A list of knowledge base ids to use for this resource.
2319
+ */
2320
+ knowledge_base_ids?: Array<string> | null;
2321
+ /**
2322
+ * Select the underlying text LLM. If not set, would default to gpt-4.1.
2323
+ */
2324
+ model?: 'gpt-5' | 'gpt-5-mini' | 'gpt-5-nano' | 'gpt-4o' | 'gpt-4o-mini' | 'gpt-4.1' | 'gpt-4.1-mini' | 'gpt-4.1-nano' | 'claude-3.7-sonnet' | 'claude-3.5-haiku' | 'gemini-2.0-flash' | 'gemini-2.0-flash-lite' | 'gemini-2.5-flash' | 'gemini-2.5-flash-lite' | null;
2325
+ /**
2326
+ * If set, will control the randomness of the response. Value ranging from [0,1].
2327
+ * Lower value means more deterministic, while higher value means more random. If
2328
+ * unset, default value 0 will apply. Note that for tool calling, a lower value is
2329
+ * recommended.
2330
+ */
2331
+ model_temperature?: number;
2332
+ /**
2333
+ * Select the underlying speech to speech model. Can only set this or model, not
2334
+ * both.
2335
+ */
2336
+ s2s_model?: 'gpt-4o-realtime' | 'gpt-4o-mini-realtime' | 'gpt-realtime' | null;
2337
+ /**
2338
+ * The speaker who starts the conversation. Required. Must be either 'user' or
2339
+ * 'agent'.
2340
+ */
2341
+ start_speaker?: 'user' | 'agent';
2342
+ }
2343
+ namespace RetellLlm {
2344
+ /**
2345
+ * Knowledge base configuration for RAG retrieval.
2346
+ */
2347
+ interface KBConfig {
2348
+ /**
2349
+ * Similarity threshold for filtering search results
2350
+ */
2351
+ filter_score?: number;
2352
+ /**
2353
+ * Max number of knowledge base chunks to retrieve
2354
+ */
2355
+ top_k?: number;
2356
+ }
2357
+ }
2358
+ }
2359
+ }
1738
2360
  export interface CallCreateWebCallParams {
1739
2361
  /**
1740
2362
  * Unique id of agent used for the call. Your agent would contain the LLM Websocket
1741
2363
  * url used for this call.
1742
2364
  */
1743
2365
  agent_id: string;
2366
+ /**
2367
+ * Override configuration for agent, retell LLM, or conversation flow settings for
2368
+ * a specific call.
2369
+ */
2370
+ agent_override?: CallCreateWebCallParams.AgentOverride;
1744
2371
  /**
1745
2372
  * The version of the agent to use for the call.
1746
2373
  */
@@ -1760,11 +2387,617 @@ export interface CallCreateWebCallParams {
1760
2387
  [key: string]: unknown;
1761
2388
  };
1762
2389
  }
2390
+ export declare namespace CallCreateWebCallParams {
2391
+ /**
2392
+ * Override configuration for agent, retell LLM, or conversation flow settings for
2393
+ * a specific call.
2394
+ */
2395
+ interface AgentOverride {
2396
+ /**
2397
+ * Override agent configuration settings. Any properties specified here will
2398
+ * override the base agent configuration for this call.
2399
+ */
2400
+ agent?: AgentOverride.Agent;
2401
+ /**
2402
+ * Override conversation flow configuration settings. Only applicable when using
2403
+ * conversation flow as the response engine. Supported attributes - model_choice,
2404
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
2405
+ * begin_after_user_silence_ms.
2406
+ */
2407
+ conversation_flow?: AgentOverride.ConversationFlow;
2408
+ /**
2409
+ * Override Retell LLM configuration settings. Only applicable when using Retell
2410
+ * LLM as the response engine. Supported attributes - model, s2s_model,
2411
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
2412
+ * begin_after_user_silence_ms, begin_message.
2413
+ */
2414
+ retell_llm?: AgentOverride.RetellLlm;
2415
+ }
2416
+ namespace AgentOverride {
2417
+ /**
2418
+ * Override agent configuration settings. Any properties specified here will
2419
+ * override the base agent configuration for this call.
2420
+ */
2421
+ interface Agent {
2422
+ /**
2423
+ * The name of the agent. Only used for your own reference.
2424
+ */
2425
+ agent_name?: string | null;
2426
+ /**
2427
+ * If set to true, DTMF input will be accepted and processed. If false, any DTMF
2428
+ * input will be ignored. Default to true.
2429
+ */
2430
+ allow_user_dtmf?: boolean;
2431
+ /**
2432
+ * If set, will add ambient environment sound to the call to make experience more
2433
+ * realistic. Currently supports the following options:
2434
+ *
2435
+ * - `coffee-shop`: Coffee shop ambience with people chatting in background.
2436
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/coffee-shop.wav)
2437
+ *
2438
+ * - `convention-hall`: Convention hall ambience, with some echo and people
2439
+ * chatting in background.
2440
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/convention-hall.wav)
2441
+ *
2442
+ * - `summer-outdoor`: Summer outdoor ambience with cicada chirping.
2443
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/summer-outdoor.wav)
2444
+ *
2445
+ * - `mountain-outdoor`: Mountain outdoor ambience with birds singing.
2446
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/mountain-outdoor.wav)
2447
+ *
2448
+ * - `static-noise`: Constant static noise.
2449
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/static-noise.wav)
2450
+ *
2451
+ * - `call-center`: Call center work noise.
2452
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/call-center.wav)
2453
+ *
2454
+ * Set to `null` to remove ambient sound from this agent.
2455
+ */
2456
+ ambient_sound?: 'coffee-shop' | 'convention-hall' | 'summer-outdoor' | 'mountain-outdoor' | 'static-noise' | 'call-center' | null;
2457
+ /**
2458
+ * If set, will control the volume of the ambient sound. Value ranging from [0,2].
2459
+ * Lower value means quieter ambient sound, while higher value means louder ambient
2460
+ * sound. If unset, default value 1 will apply.
2461
+ */
2462
+ ambient_sound_volume?: number;
2463
+ /**
2464
+ * Only applicable when enable_backchannel is true. Controls how often the agent
2465
+ * would backchannel when a backchannel is possible. Value ranging from [0,1].
2466
+ * Lower value means less frequent backchannel, while higher value means more
2467
+ * frequent backchannel. If unset, default value 0.8 will apply.
2468
+ */
2469
+ backchannel_frequency?: number;
2470
+ /**
2471
+ * Only applicable when enable_backchannel is true. A list of words that the agent
2472
+ * would use as backchannel. If not set, default backchannel words will apply.
2473
+ * Check out
2474
+ * [backchannel default words](/agent/interaction-configuration#backchannel) for
2475
+ * more details. Note that certain voices do not work too well with certain words,
2476
+ * so it's recommended to experiment before adding any words.
2477
+ */
2478
+ backchannel_words?: Array<string> | null;
2479
+ /**
2480
+ * If set, will delay the first message by the specified amount of milliseconds, so
2481
+ * that it gives user more time to prepare to take the call. Valid range is [0,
2482
+ * 5000]. If not set or set to 0, agent will speak immediately. Only applicable
2483
+ * when agent speaks first.
2484
+ */
2485
+ begin_message_delay_ms?: number;
2486
+ /**
2487
+ * Provide a customized list of keywords to bias the transcriber model, so that
2488
+ * these words are more likely to get transcribed. Commonly used for names, brands,
2489
+ * street, etc.
2490
+ */
2491
+ boosted_keywords?: Array<string> | null;
2492
+ /**
2493
+ * Granular setting to manage how Retell stores sensitive data (transcripts,
2494
+ * recordings, logs, etc.). This replaces the deprecated
2495
+ * `opt_out_sensitive_data_storage` field.
2496
+ *
2497
+ * - `everything`: Store all data including transcripts, recordings, and logs.
2498
+ * - `everything_except_pii`: Store data without PII when PII is detected.
2499
+ * - `basic_attributes_only`: Store only basic attributes; no
2500
+ * transcripts/recordings/logs. If not set, default value of "everything" will
2501
+ * apply.
2502
+ */
2503
+ data_storage_setting?: 'everything' | 'everything_except_pii' | 'basic_attributes_only';
2504
+ /**
2505
+ * If set, determines what denoising mode to use. Default to noise-cancellation.
2506
+ */
2507
+ denoising_mode?: 'noise-cancellation' | 'noise-and-background-speech-cancellation';
2508
+ /**
2509
+ * Controls whether the agent would backchannel (agent interjects the speaker with
2510
+ * phrases like "yeah", "uh-huh" to signify interest and engagement). Backchannel
2511
+ * when enabled tends to show up more in longer user utterances. If not set, agent
2512
+ * will not backchannel.
2513
+ */
2514
+ enable_backchannel?: boolean;
2515
+ /**
2516
+ * If users stay silent for a period after agent speech, end the call. The minimum
2517
+ * value allowed is 10,000 ms (10 s). By default, this is set to 600000 (10 min).
2518
+ */
2519
+ end_call_after_silence_ms?: number;
2520
+ /**
2521
+ * When TTS provider for the selected voice is experiencing outages, we would use
2522
+ * fallback voices listed here for the agent. Voice id and the fallback voice ids
2523
+ * must be from different TTS providers. The system would go through the list in
2524
+ * order, if the first one in the list is also having outage, it would use the next
2525
+ * one. Set to null to remove voice fallback for the agent.
2526
+ */
2527
+ fallback_voice_ids?: Array<string> | null;
2528
+ /**
2529
+ * Controls how sensitive the agent is to user interruptions. Value ranging from
2530
+ * [0,1]. Lower value means it will take longer / more words for user to interrupt
2531
+ * agent, while higher value means it's easier for user to interrupt agent. If
2532
+ * unset, default value 1 will apply. When this is set to 0, agent would never be
2533
+ * interrupted.
2534
+ */
2535
+ interruption_sensitivity?: number;
2536
+ /**
2537
+ * Specifies what language (and dialect) the speech recognition will operate in.
2538
+ * For instance, selecting `en-GB` optimizes speech recognition for British
2539
+ * English. If unset, will use default value `en-US`. Select `multi` for
2540
+ * multilingual support, currently this supports Spanish and English.
2541
+ */
2542
+ language?: 'en-US' | 'en-IN' | 'en-GB' | 'en-AU' | 'en-NZ' | 'de-DE' | 'es-ES' | 'es-419' | 'hi-IN' | 'fr-FR' | 'fr-CA' | 'ja-JP' | 'pt-PT' | 'pt-BR' | 'zh-CN' | 'ru-RU' | 'it-IT' | 'ko-KR' | 'nl-NL' | 'nl-BE' | 'pl-PL' | 'tr-TR' | 'th-TH' | 'vi-VN' | 'ro-RO' | 'bg-BG' | 'ca-ES' | 'da-DK' | 'fi-FI' | 'el-GR' | 'hu-HU' | 'id-ID' | 'no-NO' | 'sk-SK' | 'sv-SE' | 'multi';
2543
+ /**
2544
+ * Maximum allowed length for the call, will force end the call if reached. The
2545
+ * minimum value allowed is 60,000 ms (1 min), and maximum value allowed is
2546
+ * 7,200,000 (2 hours). By default, this is set to 3,600,000 (1 hour).
2547
+ */
2548
+ max_call_duration_ms?: number;
2549
+ /**
2550
+ * If set to true, will normalize the some part of text (number, currency, date,
2551
+ * etc) to spoken to its spoken form for more consistent speech synthesis
2552
+ * (sometimes the voice synthesize system itself might read these wrong with the
2553
+ * raw text). For example, it will convert "Call my number 2137112342 on Jul 5th,
2554
+ * 2024 for the $24.12 payment" to "Call my number two one three seven one one two
2555
+ * three four two on july fifth, twenty twenty four for the twenty four dollars
2556
+ * twelve cents payment" before starting audio generation.
2557
+ */
2558
+ normalize_for_speech?: boolean;
2559
+ /**
2560
+ * Whether this agent opts in for signed URLs for public logs and recordings. When
2561
+ * enabled, the generated URLs will include security signatures that restrict
2562
+ * access and automatically expire after 24 hours.
2563
+ */
2564
+ opt_in_signed_url?: boolean;
2565
+ /**
2566
+ * Configuration for PII scrubbing from transcripts and recordings.
2567
+ */
2568
+ pii_config?: Agent.PiiConfig;
2569
+ /**
2570
+ * Post call analysis data to extract from the call. This data will augment the
2571
+ * pre-defined variables extracted in the call analysis. This will be available
2572
+ * after the call ends.
2573
+ */
2574
+ post_call_analysis_data?: Array<Agent.StringAnalysisData | Agent.EnumAnalysisData | Agent.BooleanAnalysisData | Agent.NumberAnalysisData> | null;
2575
+ /**
2576
+ * The model to use for post call analysis. Default to gpt-4o-mini.
2577
+ */
2578
+ post_call_analysis_model?: 'gpt-4o' | 'gpt-4o-mini' | 'gpt-4.1' | 'gpt-4.1-mini' | 'gpt-4.1-nano' | 'gpt-5' | 'gpt-5-mini' | 'gpt-5-nano' | 'claude-4.5-sonnet' | 'claude-4.0-sonnet' | 'claude-3.7-sonnet' | 'claude-3.5-haiku' | 'gemini-2.0-flash' | 'gemini-2.0-flash-lite' | 'gemini-2.5-flash' | 'gemini-2.5-flash-lite';
2579
+ /**
2580
+ * A list of words / phrases and their pronunciation to be used to guide the audio
2581
+ * synthesize for consistent pronunciation. Currently only supported for English &
2582
+ * 11labs voices. Set to null to remove pronunciation dictionary from this agent.
2583
+ */
2584
+ pronunciation_dictionary?: Array<Agent.PronunciationDictionary> | null;
2585
+ /**
2586
+ * If set, controls how many times agent would remind user when user is
2587
+ * unresponsive. Must be a non negative integer. If unset, default value of 1 will
2588
+ * apply (remind once). Set to 0 to disable agent from reminding.
2589
+ */
2590
+ reminder_max_count?: number;
2591
+ /**
2592
+ * If set (in milliseconds), will trigger a reminder to the agent to speak if the
2593
+ * user has been silent for the specified duration after some agent speech. Must be
2594
+ * a positive number. If unset, default value of 10000 ms (10 s) will apply.
2595
+ */
2596
+ reminder_trigger_ms?: number;
2597
+ /**
2598
+ * The Response Engine to attach to the agent. It is used to generate responses for
2599
+ * the agent. You need to create a Response Engine first before attaching it to an
2600
+ * agent.
2601
+ */
2602
+ response_engine?: Agent.ResponseEngineRetellLm | Agent.ResponseEngineCustomLm | Agent.ResponseEngineConversationFlow;
2603
+ /**
2604
+ * Controls how responsive is the agent. Value ranging from [0,1]. Lower value
2605
+ * means less responsive agent (wait more, respond slower), while higher value
2606
+ * means faster exchanges (respond when it can). If unset, default value 1 will
2607
+ * apply.
2608
+ */
2609
+ responsiveness?: number;
2610
+ /**
2611
+ * If set, the phone ringing will last for the specified amount of milliseconds.
2612
+ * This applies for both outbound call ringtime, and call transfer ringtime.
2613
+ * Default to 30000 (30 s). Valid range is [5000, 90000].
2614
+ */
2615
+ ring_duration_ms?: number;
2616
+ /**
2617
+ * If set, determines whether speech to text should focus on latency or accuracy.
2618
+ * Default to fast mode.
2619
+ */
2620
+ stt_mode?: 'fast' | 'accurate';
2621
+ user_dtmf_options?: Agent.UserDtmfOptions | null;
2622
+ /**
2623
+ * If set, determines the vocabulary set to use for transcription. This setting
2624
+ * only applies for English agents, for non English agent, this setting is a no-op.
2625
+ * Default to general.
2626
+ */
2627
+ vocab_specialization?: 'general' | 'medical';
2628
+ /**
2629
+ * Unique voice id used for the agent. Find list of available voices and their
2630
+ * preview in Dashboard.
2631
+ */
2632
+ voice_id?: string;
2633
+ /**
2634
+ * Optionally set the voice model used for the selected voice. Currently only
2635
+ * elevenlab voices have voice model selections. Set to null to remove voice model
2636
+ * selection, and default ones will apply. Check out the dashboard for details on
2637
+ * each voice model.
2638
+ */
2639
+ voice_model?: 'eleven_turbo_v2' | 'eleven_flash_v2' | 'eleven_turbo_v2_5' | 'eleven_flash_v2_5' | 'eleven_multilingual_v2' | 'tts-1' | 'gpt-4o-mini-tts' | null;
2640
+ /**
2641
+ * Controls speed of voice. Value ranging from [0.5,2]. Lower value means slower
2642
+ * speech, while higher value means faster speech rate. If unset, default value 1
2643
+ * will apply.
2644
+ */
2645
+ voice_speed?: number;
2646
+ /**
2647
+ * Controls how stable the voice is. Value ranging from [0,2]. Lower value means
2648
+ * more stable, and higher value means more variant speech generation. Currently
2649
+ * this setting only applies to `11labs` voices. If unset, default value 1 will
2650
+ * apply.
2651
+ */
2652
+ voice_temperature?: number;
2653
+ /**
2654
+ * If this option is set, the call will try to detect voicemail in the first 3
2655
+ * minutes of the call. Actions defined (hangup, or leave a message) will be
2656
+ * applied when the voicemail is detected. Set this to null to disable voicemail
2657
+ * detection.
2658
+ */
2659
+ voicemail_option?: Agent.VoicemailOption | null;
2660
+ /**
2661
+ * If set, will control the volume of the agent. Value ranging from [0,2]. Lower
2662
+ * value means quieter agent speech, while higher value means louder agent speech.
2663
+ * If unset, default value 1 will apply.
2664
+ */
2665
+ volume?: number;
2666
+ /**
2667
+ * The timeout for the webhook in milliseconds. If not set, default value of 10000
2668
+ * will apply.
2669
+ */
2670
+ webhook_timeout_ms?: number;
2671
+ /**
2672
+ * The webhook for agent to listen to call events. See what events it would get at
2673
+ * [webhook doc](/features/webhook). If set, will binds webhook events for this
2674
+ * agent to the specified url, and will ignore the account level webhook for this
2675
+ * agent. Set to `null` to remove webhook url from this agent.
2676
+ */
2677
+ webhook_url?: string | null;
2678
+ }
2679
+ namespace Agent {
2680
+ /**
2681
+ * Configuration for PII scrubbing from transcripts and recordings.
2682
+ */
2683
+ interface PiiConfig {
2684
+ /**
2685
+ * List of PII categories to scrub from transcripts and recordings.
2686
+ */
2687
+ categories: Array<'person_name' | 'address' | 'email' | 'phone_number' | 'ssn' | 'passport' | 'driver_license' | 'credit_card' | 'bank_account' | 'password' | 'pin' | 'medical_id' | 'date_of_birth'>;
2688
+ /**
2689
+ * The processing mode for PII scrubbing. Currently only post-call is supported.
2690
+ */
2691
+ mode: 'post_call';
2692
+ }
2693
+ interface StringAnalysisData {
2694
+ /**
2695
+ * Description of the variable.
2696
+ */
2697
+ description: string;
2698
+ /**
2699
+ * Name of the variable.
2700
+ */
2701
+ name: string;
2702
+ /**
2703
+ * Type of the variable to extract.
2704
+ */
2705
+ type: 'string';
2706
+ /**
2707
+ * Examples of the variable value to teach model the style and syntax.
2708
+ */
2709
+ examples?: Array<string>;
2710
+ }
2711
+ interface EnumAnalysisData {
2712
+ /**
2713
+ * The possible values of the variable, must be non empty array.
2714
+ */
2715
+ choices: Array<string>;
2716
+ /**
2717
+ * Description of the variable.
2718
+ */
2719
+ description: string;
2720
+ /**
2721
+ * Name of the variable.
2722
+ */
2723
+ name: string;
2724
+ /**
2725
+ * Type of the variable to extract.
2726
+ */
2727
+ type: 'enum';
2728
+ }
2729
+ interface BooleanAnalysisData {
2730
+ /**
2731
+ * Description of the variable.
2732
+ */
2733
+ description: string;
2734
+ /**
2735
+ * Name of the variable.
2736
+ */
2737
+ name: string;
2738
+ /**
2739
+ * Type of the variable to extract.
2740
+ */
2741
+ type: 'boolean';
2742
+ }
2743
+ interface NumberAnalysisData {
2744
+ /**
2745
+ * Description of the variable.
2746
+ */
2747
+ description: string;
2748
+ /**
2749
+ * Name of the variable.
2750
+ */
2751
+ name: string;
2752
+ /**
2753
+ * Type of the variable to extract.
2754
+ */
2755
+ type: 'number';
2756
+ }
2757
+ interface PronunciationDictionary {
2758
+ /**
2759
+ * The phonetic alphabet to be used for pronunciation.
2760
+ */
2761
+ alphabet: 'ipa' | 'cmu';
2762
+ /**
2763
+ * Pronunciation of the word in the format of a IPA / CMU pronunciation.
2764
+ */
2765
+ phoneme: string;
2766
+ /**
2767
+ * The string of word / phrase to be annotated with pronunciation.
2768
+ */
2769
+ word: string;
2770
+ }
2771
+ interface ResponseEngineRetellLm {
2772
+ /**
2773
+ * id of the Retell LLM Response Engine.
2774
+ */
2775
+ llm_id: string;
2776
+ /**
2777
+ * type of the Response Engine.
2778
+ */
2779
+ type: 'retell-llm';
2780
+ /**
2781
+ * Version of the Retell LLM Response Engine.
2782
+ */
2783
+ version?: number | null;
2784
+ }
2785
+ interface ResponseEngineCustomLm {
2786
+ /**
2787
+ * LLM websocket url of the custom LLM.
2788
+ */
2789
+ llm_websocket_url: string;
2790
+ /**
2791
+ * type of the Response Engine.
2792
+ */
2793
+ type: 'custom-llm';
2794
+ }
2795
+ interface ResponseEngineConversationFlow {
2796
+ /**
2797
+ * ID of the Conversation Flow Response Engine.
2798
+ */
2799
+ conversation_flow_id: string;
2800
+ /**
2801
+ * type of the Response Engine.
2802
+ */
2803
+ type: 'conversation-flow';
2804
+ /**
2805
+ * Version of the Conversation Flow Response Engine.
2806
+ */
2807
+ version?: number | null;
2808
+ }
2809
+ interface UserDtmfOptions {
2810
+ /**
2811
+ * The maximum number of digits allowed in the user's DTMF (Dual-Tone
2812
+ * Multi-Frequency) input per turn. Once this limit is reached, the input is
2813
+ * considered complete and a response will be generated immediately.
2814
+ */
2815
+ digit_limit?: number | null;
2816
+ /**
2817
+ * A single key that signals the end of DTMF input. Acceptable values include any
2818
+ * digit (0–9), the pound/hash symbol (#), or the asterisk (\*).
2819
+ */
2820
+ termination_key?: string | null;
2821
+ /**
2822
+ * The time (in milliseconds) to wait for user DTMF input before timing out. The
2823
+ * timer resets with each digit received.
2824
+ */
2825
+ timeout_ms?: number;
2826
+ }
2827
+ /**
2828
+ * If this option is set, the call will try to detect voicemail in the first 3
2829
+ * minutes of the call. Actions defined (hangup, or leave a message) will be
2830
+ * applied when the voicemail is detected. Set this to null to disable voicemail
2831
+ * detection.
2832
+ */
2833
+ interface VoicemailOption {
2834
+ action: VoicemailOption.VoicemailActionPrompt | VoicemailOption.VoicemailActionStaticText | VoicemailOption.VoicemailActionHangup;
2835
+ }
2836
+ namespace VoicemailOption {
2837
+ interface VoicemailActionPrompt {
2838
+ /**
2839
+ * The prompt used to generate the text to be spoken when the call is detected to
2840
+ * be in voicemail.
2841
+ */
2842
+ text: string;
2843
+ type: 'prompt';
2844
+ }
2845
+ interface VoicemailActionStaticText {
2846
+ /**
2847
+ * The text to be spoken when the call is detected to be in voicemail.
2848
+ */
2849
+ text: string;
2850
+ type: 'static_text';
2851
+ }
2852
+ interface VoicemailActionHangup {
2853
+ type: 'hangup';
2854
+ }
2855
+ }
2856
+ }
2857
+ /**
2858
+ * Override conversation flow configuration settings. Only applicable when using
2859
+ * conversation flow as the response engine. Supported attributes - model_choice,
2860
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
2861
+ * begin_after_user_silence_ms.
2862
+ */
2863
+ interface ConversationFlow {
2864
+ /**
2865
+ * If set, the AI will begin the conversation after waiting for the user for the
2866
+ * duration (in milliseconds) specified by this attribute. This only applies if the
2867
+ * agent is configured to wait for the user to speak first. If not set, the agent
2868
+ * will wait indefinitely for the user to speak.
2869
+ */
2870
+ begin_after_user_silence_ms?: number | null;
2871
+ /**
2872
+ * Knowledge base configuration for RAG retrieval.
2873
+ */
2874
+ kb_config?: ConversationFlow.KBConfig;
2875
+ /**
2876
+ * Knowledge base IDs for RAG (Retrieval-Augmented Generation).
2877
+ */
2878
+ knowledge_base_ids?: Array<string> | null;
2879
+ /**
2880
+ * The model choice for the conversation flow.
2881
+ */
2882
+ model_choice?: ConversationFlow.ModelChoice;
2883
+ /**
2884
+ * Controls the randomness of the model's responses. Lower values make responses
2885
+ * more deterministic.
2886
+ */
2887
+ model_temperature?: number | null;
2888
+ /**
2889
+ * Who starts the conversation - user or agent.
2890
+ */
2891
+ start_speaker?: 'user' | 'agent';
2892
+ }
2893
+ namespace ConversationFlow {
2894
+ /**
2895
+ * Knowledge base configuration for RAG retrieval.
2896
+ */
2897
+ interface KBConfig {
2898
+ /**
2899
+ * Similarity threshold for filtering search results
2900
+ */
2901
+ filter_score?: number;
2902
+ /**
2903
+ * Max number of knowledge base chunks to retrieve
2904
+ */
2905
+ top_k?: number;
2906
+ }
2907
+ /**
2908
+ * The model choice for the conversation flow.
2909
+ */
2910
+ interface ModelChoice {
2911
+ /**
2912
+ * The LLM model to use
2913
+ */
2914
+ model: 'gpt-5' | 'gpt-5-mini' | 'gpt-5-nano' | 'gpt-4o' | 'gpt-4o-mini' | 'gpt-4.1' | 'gpt-4.1-mini' | 'gpt-4.1-nano' | 'claude-3.7-sonnet' | 'claude-3.5-haiku' | 'gemini-2.0-flash' | 'gemini-2.0-flash-lite' | 'gemini-2.5-flash' | 'gemini-2.5-flash-lite';
2915
+ /**
2916
+ * Type of model choice
2917
+ */
2918
+ type: 'cascading';
2919
+ /**
2920
+ * Whether to use high priority pool with more dedicated resource, default false
2921
+ */
2922
+ high_priority?: boolean;
2923
+ }
2924
+ }
2925
+ /**
2926
+ * Override Retell LLM configuration settings. Only applicable when using Retell
2927
+ * LLM as the response engine. Supported attributes - model, s2s_model,
2928
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
2929
+ * begin_after_user_silence_ms, begin_message.
2930
+ */
2931
+ interface RetellLlm {
2932
+ /**
2933
+ * If set, the AI will begin the conversation after waiting for the user for the
2934
+ * duration (in milliseconds) specified by this attribute. This only applies if the
2935
+ * agent is configured to wait for the user to speak first. If not set, the agent
2936
+ * will wait indefinitely for the user to speak.
2937
+ */
2938
+ begin_after_user_silence_ms?: number | null;
2939
+ /**
2940
+ * First utterance said by the agent in the call. If not set, LLM will dynamically
2941
+ * generate a message. If set to "", agent will wait for user to speak first.
2942
+ */
2943
+ begin_message?: string | null;
2944
+ /**
2945
+ * Knowledge base configuration for RAG retrieval.
2946
+ */
2947
+ kb_config?: RetellLlm.KBConfig | null;
2948
+ /**
2949
+ * A list of knowledge base ids to use for this resource.
2950
+ */
2951
+ knowledge_base_ids?: Array<string> | null;
2952
+ /**
2953
+ * Select the underlying text LLM. If not set, would default to gpt-4.1.
2954
+ */
2955
+ model?: 'gpt-5' | 'gpt-5-mini' | 'gpt-5-nano' | 'gpt-4o' | 'gpt-4o-mini' | 'gpt-4.1' | 'gpt-4.1-mini' | 'gpt-4.1-nano' | 'claude-3.7-sonnet' | 'claude-3.5-haiku' | 'gemini-2.0-flash' | 'gemini-2.0-flash-lite' | 'gemini-2.5-flash' | 'gemini-2.5-flash-lite' | null;
2956
+ /**
2957
+ * If set, will control the randomness of the response. Value ranging from [0,1].
2958
+ * Lower value means more deterministic, while higher value means more random. If
2959
+ * unset, default value 0 will apply. Note that for tool calling, a lower value is
2960
+ * recommended.
2961
+ */
2962
+ model_temperature?: number;
2963
+ /**
2964
+ * Select the underlying speech to speech model. Can only set this or model, not
2965
+ * both.
2966
+ */
2967
+ s2s_model?: 'gpt-4o-realtime' | 'gpt-4o-mini-realtime' | 'gpt-realtime' | null;
2968
+ /**
2969
+ * The speaker who starts the conversation. Required. Must be either 'user' or
2970
+ * 'agent'.
2971
+ */
2972
+ start_speaker?: 'user' | 'agent';
2973
+ }
2974
+ namespace RetellLlm {
2975
+ /**
2976
+ * Knowledge base configuration for RAG retrieval.
2977
+ */
2978
+ interface KBConfig {
2979
+ /**
2980
+ * Similarity threshold for filtering search results
2981
+ */
2982
+ filter_score?: number;
2983
+ /**
2984
+ * Max number of knowledge base chunks to retrieve
2985
+ */
2986
+ top_k?: number;
2987
+ }
2988
+ }
2989
+ }
2990
+ }
1763
2991
  export interface CallRegisterPhoneCallParams {
1764
2992
  /**
1765
2993
  * The agent to use for the call.
1766
2994
  */
1767
2995
  agent_id: string;
2996
+ /**
2997
+ * Override configuration for agent, retell LLM, or conversation flow settings for
2998
+ * a specific call.
2999
+ */
3000
+ agent_override?: CallRegisterPhoneCallParams.AgentOverride;
1768
3001
  /**
1769
3002
  * The version of the agent to use for the call.
1770
3003
  */
@@ -1796,6 +3029,607 @@ export interface CallRegisterPhoneCallParams {
1796
3029
  */
1797
3030
  to_number?: string;
1798
3031
  }
3032
+ export declare namespace CallRegisterPhoneCallParams {
3033
+ /**
3034
+ * Override configuration for agent, retell LLM, or conversation flow settings for
3035
+ * a specific call.
3036
+ */
3037
+ interface AgentOverride {
3038
+ /**
3039
+ * Override agent configuration settings. Any properties specified here will
3040
+ * override the base agent configuration for this call.
3041
+ */
3042
+ agent?: AgentOverride.Agent;
3043
+ /**
3044
+ * Override conversation flow configuration settings. Only applicable when using
3045
+ * conversation flow as the response engine. Supported attributes - model_choice,
3046
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
3047
+ * begin_after_user_silence_ms.
3048
+ */
3049
+ conversation_flow?: AgentOverride.ConversationFlow;
3050
+ /**
3051
+ * Override Retell LLM configuration settings. Only applicable when using Retell
3052
+ * LLM as the response engine. Supported attributes - model, s2s_model,
3053
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
3054
+ * begin_after_user_silence_ms, begin_message.
3055
+ */
3056
+ retell_llm?: AgentOverride.RetellLlm;
3057
+ }
3058
+ namespace AgentOverride {
3059
+ /**
3060
+ * Override agent configuration settings. Any properties specified here will
3061
+ * override the base agent configuration for this call.
3062
+ */
3063
+ interface Agent {
3064
+ /**
3065
+ * The name of the agent. Only used for your own reference.
3066
+ */
3067
+ agent_name?: string | null;
3068
+ /**
3069
+ * If set to true, DTMF input will be accepted and processed. If false, any DTMF
3070
+ * input will be ignored. Default to true.
3071
+ */
3072
+ allow_user_dtmf?: boolean;
3073
+ /**
3074
+ * If set, will add ambient environment sound to the call to make experience more
3075
+ * realistic. Currently supports the following options:
3076
+ *
3077
+ * - `coffee-shop`: Coffee shop ambience with people chatting in background.
3078
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/coffee-shop.wav)
3079
+ *
3080
+ * - `convention-hall`: Convention hall ambience, with some echo and people
3081
+ * chatting in background.
3082
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/convention-hall.wav)
3083
+ *
3084
+ * - `summer-outdoor`: Summer outdoor ambience with cicada chirping.
3085
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/summer-outdoor.wav)
3086
+ *
3087
+ * - `mountain-outdoor`: Mountain outdoor ambience with birds singing.
3088
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/mountain-outdoor.wav)
3089
+ *
3090
+ * - `static-noise`: Constant static noise.
3091
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/static-noise.wav)
3092
+ *
3093
+ * - `call-center`: Call center work noise.
3094
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/call-center.wav)
3095
+ *
3096
+ * Set to `null` to remove ambient sound from this agent.
3097
+ */
3098
+ ambient_sound?: 'coffee-shop' | 'convention-hall' | 'summer-outdoor' | 'mountain-outdoor' | 'static-noise' | 'call-center' | null;
3099
+ /**
3100
+ * If set, will control the volume of the ambient sound. Value ranging from [0,2].
3101
+ * Lower value means quieter ambient sound, while higher value means louder ambient
3102
+ * sound. If unset, default value 1 will apply.
3103
+ */
3104
+ ambient_sound_volume?: number;
3105
+ /**
3106
+ * Only applicable when enable_backchannel is true. Controls how often the agent
3107
+ * would backchannel when a backchannel is possible. Value ranging from [0,1].
3108
+ * Lower value means less frequent backchannel, while higher value means more
3109
+ * frequent backchannel. If unset, default value 0.8 will apply.
3110
+ */
3111
+ backchannel_frequency?: number;
3112
+ /**
3113
+ * Only applicable when enable_backchannel is true. A list of words that the agent
3114
+ * would use as backchannel. If not set, default backchannel words will apply.
3115
+ * Check out
3116
+ * [backchannel default words](/agent/interaction-configuration#backchannel) for
3117
+ * more details. Note that certain voices do not work too well with certain words,
3118
+ * so it's recommended to experiment before adding any words.
3119
+ */
3120
+ backchannel_words?: Array<string> | null;
3121
+ /**
3122
+ * If set, will delay the first message by the specified amount of milliseconds, so
3123
+ * that it gives user more time to prepare to take the call. Valid range is [0,
3124
+ * 5000]. If not set or set to 0, agent will speak immediately. Only applicable
3125
+ * when agent speaks first.
3126
+ */
3127
+ begin_message_delay_ms?: number;
3128
+ /**
3129
+ * Provide a customized list of keywords to bias the transcriber model, so that
3130
+ * these words are more likely to get transcribed. Commonly used for names, brands,
3131
+ * street, etc.
3132
+ */
3133
+ boosted_keywords?: Array<string> | null;
3134
+ /**
3135
+ * Granular setting to manage how Retell stores sensitive data (transcripts,
3136
+ * recordings, logs, etc.). This replaces the deprecated
3137
+ * `opt_out_sensitive_data_storage` field.
3138
+ *
3139
+ * - `everything`: Store all data including transcripts, recordings, and logs.
3140
+ * - `everything_except_pii`: Store data without PII when PII is detected.
3141
+ * - `basic_attributes_only`: Store only basic attributes; no
3142
+ * transcripts/recordings/logs. If not set, default value of "everything" will
3143
+ * apply.
3144
+ */
3145
+ data_storage_setting?: 'everything' | 'everything_except_pii' | 'basic_attributes_only';
3146
+ /**
3147
+ * If set, determines what denoising mode to use. Default to noise-cancellation.
3148
+ */
3149
+ denoising_mode?: 'noise-cancellation' | 'noise-and-background-speech-cancellation';
3150
+ /**
3151
+ * Controls whether the agent would backchannel (agent interjects the speaker with
3152
+ * phrases like "yeah", "uh-huh" to signify interest and engagement). Backchannel
3153
+ * when enabled tends to show up more in longer user utterances. If not set, agent
3154
+ * will not backchannel.
3155
+ */
3156
+ enable_backchannel?: boolean;
3157
+ /**
3158
+ * If users stay silent for a period after agent speech, end the call. The minimum
3159
+ * value allowed is 10,000 ms (10 s). By default, this is set to 600000 (10 min).
3160
+ */
3161
+ end_call_after_silence_ms?: number;
3162
+ /**
3163
+ * When TTS provider for the selected voice is experiencing outages, we would use
3164
+ * fallback voices listed here for the agent. Voice id and the fallback voice ids
3165
+ * must be from different TTS providers. The system would go through the list in
3166
+ * order, if the first one in the list is also having outage, it would use the next
3167
+ * one. Set to null to remove voice fallback for the agent.
3168
+ */
3169
+ fallback_voice_ids?: Array<string> | null;
3170
+ /**
3171
+ * Controls how sensitive the agent is to user interruptions. Value ranging from
3172
+ * [0,1]. Lower value means it will take longer / more words for user to interrupt
3173
+ * agent, while higher value means it's easier for user to interrupt agent. If
3174
+ * unset, default value 1 will apply. When this is set to 0, agent would never be
3175
+ * interrupted.
3176
+ */
3177
+ interruption_sensitivity?: number;
3178
+ /**
3179
+ * Specifies what language (and dialect) the speech recognition will operate in.
3180
+ * For instance, selecting `en-GB` optimizes speech recognition for British
3181
+ * English. If unset, will use default value `en-US`. Select `multi` for
3182
+ * multilingual support, currently this supports Spanish and English.
3183
+ */
3184
+ language?: 'en-US' | 'en-IN' | 'en-GB' | 'en-AU' | 'en-NZ' | 'de-DE' | 'es-ES' | 'es-419' | 'hi-IN' | 'fr-FR' | 'fr-CA' | 'ja-JP' | 'pt-PT' | 'pt-BR' | 'zh-CN' | 'ru-RU' | 'it-IT' | 'ko-KR' | 'nl-NL' | 'nl-BE' | 'pl-PL' | 'tr-TR' | 'th-TH' | 'vi-VN' | 'ro-RO' | 'bg-BG' | 'ca-ES' | 'da-DK' | 'fi-FI' | 'el-GR' | 'hu-HU' | 'id-ID' | 'no-NO' | 'sk-SK' | 'sv-SE' | 'multi';
3185
+ /**
3186
+ * Maximum allowed length for the call, will force end the call if reached. The
3187
+ * minimum value allowed is 60,000 ms (1 min), and maximum value allowed is
3188
+ * 7,200,000 (2 hours). By default, this is set to 3,600,000 (1 hour).
3189
+ */
3190
+ max_call_duration_ms?: number;
3191
+ /**
3192
+ * If set to true, will normalize the some part of text (number, currency, date,
3193
+ * etc) to spoken to its spoken form for more consistent speech synthesis
3194
+ * (sometimes the voice synthesize system itself might read these wrong with the
3195
+ * raw text). For example, it will convert "Call my number 2137112342 on Jul 5th,
3196
+ * 2024 for the $24.12 payment" to "Call my number two one three seven one one two
3197
+ * three four two on july fifth, twenty twenty four for the twenty four dollars
3198
+ * twelve cents payment" before starting audio generation.
3199
+ */
3200
+ normalize_for_speech?: boolean;
3201
+ /**
3202
+ * Whether this agent opts in for signed URLs for public logs and recordings. When
3203
+ * enabled, the generated URLs will include security signatures that restrict
3204
+ * access and automatically expire after 24 hours.
3205
+ */
3206
+ opt_in_signed_url?: boolean;
3207
+ /**
3208
+ * Configuration for PII scrubbing from transcripts and recordings.
3209
+ */
3210
+ pii_config?: Agent.PiiConfig;
3211
+ /**
3212
+ * Post call analysis data to extract from the call. This data will augment the
3213
+ * pre-defined variables extracted in the call analysis. This will be available
3214
+ * after the call ends.
3215
+ */
3216
+ post_call_analysis_data?: Array<Agent.StringAnalysisData | Agent.EnumAnalysisData | Agent.BooleanAnalysisData | Agent.NumberAnalysisData> | null;
3217
+ /**
3218
+ * The model to use for post call analysis. Default to gpt-4o-mini.
3219
+ */
3220
+ post_call_analysis_model?: 'gpt-4o' | 'gpt-4o-mini' | 'gpt-4.1' | 'gpt-4.1-mini' | 'gpt-4.1-nano' | 'gpt-5' | 'gpt-5-mini' | 'gpt-5-nano' | 'claude-4.5-sonnet' | 'claude-4.0-sonnet' | 'claude-3.7-sonnet' | 'claude-3.5-haiku' | 'gemini-2.0-flash' | 'gemini-2.0-flash-lite' | 'gemini-2.5-flash' | 'gemini-2.5-flash-lite';
3221
+ /**
3222
+ * A list of words / phrases and their pronunciation to be used to guide the audio
3223
+ * synthesize for consistent pronunciation. Currently only supported for English &
3224
+ * 11labs voices. Set to null to remove pronunciation dictionary from this agent.
3225
+ */
3226
+ pronunciation_dictionary?: Array<Agent.PronunciationDictionary> | null;
3227
+ /**
3228
+ * If set, controls how many times agent would remind user when user is
3229
+ * unresponsive. Must be a non negative integer. If unset, default value of 1 will
3230
+ * apply (remind once). Set to 0 to disable agent from reminding.
3231
+ */
3232
+ reminder_max_count?: number;
3233
+ /**
3234
+ * If set (in milliseconds), will trigger a reminder to the agent to speak if the
3235
+ * user has been silent for the specified duration after some agent speech. Must be
3236
+ * a positive number. If unset, default value of 10000 ms (10 s) will apply.
3237
+ */
3238
+ reminder_trigger_ms?: number;
3239
+ /**
3240
+ * The Response Engine to attach to the agent. It is used to generate responses for
3241
+ * the agent. You need to create a Response Engine first before attaching it to an
3242
+ * agent.
3243
+ */
3244
+ response_engine?: Agent.ResponseEngineRetellLm | Agent.ResponseEngineCustomLm | Agent.ResponseEngineConversationFlow;
3245
+ /**
3246
+ * Controls how responsive is the agent. Value ranging from [0,1]. Lower value
3247
+ * means less responsive agent (wait more, respond slower), while higher value
3248
+ * means faster exchanges (respond when it can). If unset, default value 1 will
3249
+ * apply.
3250
+ */
3251
+ responsiveness?: number;
3252
+ /**
3253
+ * If set, the phone ringing will last for the specified amount of milliseconds.
3254
+ * This applies for both outbound call ringtime, and call transfer ringtime.
3255
+ * Default to 30000 (30 s). Valid range is [5000, 90000].
3256
+ */
3257
+ ring_duration_ms?: number;
3258
+ /**
3259
+ * If set, determines whether speech to text should focus on latency or accuracy.
3260
+ * Default to fast mode.
3261
+ */
3262
+ stt_mode?: 'fast' | 'accurate';
3263
+ user_dtmf_options?: Agent.UserDtmfOptions | null;
3264
+ /**
3265
+ * If set, determines the vocabulary set to use for transcription. This setting
3266
+ * only applies for English agents, for non English agent, this setting is a no-op.
3267
+ * Default to general.
3268
+ */
3269
+ vocab_specialization?: 'general' | 'medical';
3270
+ /**
3271
+ * Unique voice id used for the agent. Find list of available voices and their
3272
+ * preview in Dashboard.
3273
+ */
3274
+ voice_id?: string;
3275
+ /**
3276
+ * Optionally set the voice model used for the selected voice. Currently only
3277
+ * elevenlab voices have voice model selections. Set to null to remove voice model
3278
+ * selection, and default ones will apply. Check out the dashboard for details on
3279
+ * each voice model.
3280
+ */
3281
+ voice_model?: 'eleven_turbo_v2' | 'eleven_flash_v2' | 'eleven_turbo_v2_5' | 'eleven_flash_v2_5' | 'eleven_multilingual_v2' | 'tts-1' | 'gpt-4o-mini-tts' | null;
3282
+ /**
3283
+ * Controls speed of voice. Value ranging from [0.5,2]. Lower value means slower
3284
+ * speech, while higher value means faster speech rate. If unset, default value 1
3285
+ * will apply.
3286
+ */
3287
+ voice_speed?: number;
3288
+ /**
3289
+ * Controls how stable the voice is. Value ranging from [0,2]. Lower value means
3290
+ * more stable, and higher value means more variant speech generation. Currently
3291
+ * this setting only applies to `11labs` voices. If unset, default value 1 will
3292
+ * apply.
3293
+ */
3294
+ voice_temperature?: number;
3295
+ /**
3296
+ * If this option is set, the call will try to detect voicemail in the first 3
3297
+ * minutes of the call. Actions defined (hangup, or leave a message) will be
3298
+ * applied when the voicemail is detected. Set this to null to disable voicemail
3299
+ * detection.
3300
+ */
3301
+ voicemail_option?: Agent.VoicemailOption | null;
3302
+ /**
3303
+ * If set, will control the volume of the agent. Value ranging from [0,2]. Lower
3304
+ * value means quieter agent speech, while higher value means louder agent speech.
3305
+ * If unset, default value 1 will apply.
3306
+ */
3307
+ volume?: number;
3308
+ /**
3309
+ * The timeout for the webhook in milliseconds. If not set, default value of 10000
3310
+ * will apply.
3311
+ */
3312
+ webhook_timeout_ms?: number;
3313
+ /**
3314
+ * The webhook for agent to listen to call events. See what events it would get at
3315
+ * [webhook doc](/features/webhook). If set, will binds webhook events for this
3316
+ * agent to the specified url, and will ignore the account level webhook for this
3317
+ * agent. Set to `null` to remove webhook url from this agent.
3318
+ */
3319
+ webhook_url?: string | null;
3320
+ }
3321
+ namespace Agent {
3322
+ /**
3323
+ * Configuration for PII scrubbing from transcripts and recordings.
3324
+ */
3325
+ interface PiiConfig {
3326
+ /**
3327
+ * List of PII categories to scrub from transcripts and recordings.
3328
+ */
3329
+ categories: Array<'person_name' | 'address' | 'email' | 'phone_number' | 'ssn' | 'passport' | 'driver_license' | 'credit_card' | 'bank_account' | 'password' | 'pin' | 'medical_id' | 'date_of_birth'>;
3330
+ /**
3331
+ * The processing mode for PII scrubbing. Currently only post-call is supported.
3332
+ */
3333
+ mode: 'post_call';
3334
+ }
3335
+ interface StringAnalysisData {
3336
+ /**
3337
+ * Description of the variable.
3338
+ */
3339
+ description: string;
3340
+ /**
3341
+ * Name of the variable.
3342
+ */
3343
+ name: string;
3344
+ /**
3345
+ * Type of the variable to extract.
3346
+ */
3347
+ type: 'string';
3348
+ /**
3349
+ * Examples of the variable value to teach model the style and syntax.
3350
+ */
3351
+ examples?: Array<string>;
3352
+ }
3353
+ interface EnumAnalysisData {
3354
+ /**
3355
+ * The possible values of the variable, must be non empty array.
3356
+ */
3357
+ choices: Array<string>;
3358
+ /**
3359
+ * Description of the variable.
3360
+ */
3361
+ description: string;
3362
+ /**
3363
+ * Name of the variable.
3364
+ */
3365
+ name: string;
3366
+ /**
3367
+ * Type of the variable to extract.
3368
+ */
3369
+ type: 'enum';
3370
+ }
3371
+ interface BooleanAnalysisData {
3372
+ /**
3373
+ * Description of the variable.
3374
+ */
3375
+ description: string;
3376
+ /**
3377
+ * Name of the variable.
3378
+ */
3379
+ name: string;
3380
+ /**
3381
+ * Type of the variable to extract.
3382
+ */
3383
+ type: 'boolean';
3384
+ }
3385
+ interface NumberAnalysisData {
3386
+ /**
3387
+ * Description of the variable.
3388
+ */
3389
+ description: string;
3390
+ /**
3391
+ * Name of the variable.
3392
+ */
3393
+ name: string;
3394
+ /**
3395
+ * Type of the variable to extract.
3396
+ */
3397
+ type: 'number';
3398
+ }
3399
+ interface PronunciationDictionary {
3400
+ /**
3401
+ * The phonetic alphabet to be used for pronunciation.
3402
+ */
3403
+ alphabet: 'ipa' | 'cmu';
3404
+ /**
3405
+ * Pronunciation of the word in the format of a IPA / CMU pronunciation.
3406
+ */
3407
+ phoneme: string;
3408
+ /**
3409
+ * The string of word / phrase to be annotated with pronunciation.
3410
+ */
3411
+ word: string;
3412
+ }
3413
+ interface ResponseEngineRetellLm {
3414
+ /**
3415
+ * id of the Retell LLM Response Engine.
3416
+ */
3417
+ llm_id: string;
3418
+ /**
3419
+ * type of the Response Engine.
3420
+ */
3421
+ type: 'retell-llm';
3422
+ /**
3423
+ * Version of the Retell LLM Response Engine.
3424
+ */
3425
+ version?: number | null;
3426
+ }
3427
+ interface ResponseEngineCustomLm {
3428
+ /**
3429
+ * LLM websocket url of the custom LLM.
3430
+ */
3431
+ llm_websocket_url: string;
3432
+ /**
3433
+ * type of the Response Engine.
3434
+ */
3435
+ type: 'custom-llm';
3436
+ }
3437
+ interface ResponseEngineConversationFlow {
3438
+ /**
3439
+ * ID of the Conversation Flow Response Engine.
3440
+ */
3441
+ conversation_flow_id: string;
3442
+ /**
3443
+ * type of the Response Engine.
3444
+ */
3445
+ type: 'conversation-flow';
3446
+ /**
3447
+ * Version of the Conversation Flow Response Engine.
3448
+ */
3449
+ version?: number | null;
3450
+ }
3451
+ interface UserDtmfOptions {
3452
+ /**
3453
+ * The maximum number of digits allowed in the user's DTMF (Dual-Tone
3454
+ * Multi-Frequency) input per turn. Once this limit is reached, the input is
3455
+ * considered complete and a response will be generated immediately.
3456
+ */
3457
+ digit_limit?: number | null;
3458
+ /**
3459
+ * A single key that signals the end of DTMF input. Acceptable values include any
3460
+ * digit (0–9), the pound/hash symbol (#), or the asterisk (\*).
3461
+ */
3462
+ termination_key?: string | null;
3463
+ /**
3464
+ * The time (in milliseconds) to wait for user DTMF input before timing out. The
3465
+ * timer resets with each digit received.
3466
+ */
3467
+ timeout_ms?: number;
3468
+ }
3469
+ /**
3470
+ * If this option is set, the call will try to detect voicemail in the first 3
3471
+ * minutes of the call. Actions defined (hangup, or leave a message) will be
3472
+ * applied when the voicemail is detected. Set this to null to disable voicemail
3473
+ * detection.
3474
+ */
3475
+ interface VoicemailOption {
3476
+ action: VoicemailOption.VoicemailActionPrompt | VoicemailOption.VoicemailActionStaticText | VoicemailOption.VoicemailActionHangup;
3477
+ }
3478
+ namespace VoicemailOption {
3479
+ interface VoicemailActionPrompt {
3480
+ /**
3481
+ * The prompt used to generate the text to be spoken when the call is detected to
3482
+ * be in voicemail.
3483
+ */
3484
+ text: string;
3485
+ type: 'prompt';
3486
+ }
3487
+ interface VoicemailActionStaticText {
3488
+ /**
3489
+ * The text to be spoken when the call is detected to be in voicemail.
3490
+ */
3491
+ text: string;
3492
+ type: 'static_text';
3493
+ }
3494
+ interface VoicemailActionHangup {
3495
+ type: 'hangup';
3496
+ }
3497
+ }
3498
+ }
3499
+ /**
3500
+ * Override conversation flow configuration settings. Only applicable when using
3501
+ * conversation flow as the response engine. Supported attributes - model_choice,
3502
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
3503
+ * begin_after_user_silence_ms.
3504
+ */
3505
+ interface ConversationFlow {
3506
+ /**
3507
+ * If set, the AI will begin the conversation after waiting for the user for the
3508
+ * duration (in milliseconds) specified by this attribute. This only applies if the
3509
+ * agent is configured to wait for the user to speak first. If not set, the agent
3510
+ * will wait indefinitely for the user to speak.
3511
+ */
3512
+ begin_after_user_silence_ms?: number | null;
3513
+ /**
3514
+ * Knowledge base configuration for RAG retrieval.
3515
+ */
3516
+ kb_config?: ConversationFlow.KBConfig;
3517
+ /**
3518
+ * Knowledge base IDs for RAG (Retrieval-Augmented Generation).
3519
+ */
3520
+ knowledge_base_ids?: Array<string> | null;
3521
+ /**
3522
+ * The model choice for the conversation flow.
3523
+ */
3524
+ model_choice?: ConversationFlow.ModelChoice;
3525
+ /**
3526
+ * Controls the randomness of the model's responses. Lower values make responses
3527
+ * more deterministic.
3528
+ */
3529
+ model_temperature?: number | null;
3530
+ /**
3531
+ * Who starts the conversation - user or agent.
3532
+ */
3533
+ start_speaker?: 'user' | 'agent';
3534
+ }
3535
+ namespace ConversationFlow {
3536
+ /**
3537
+ * Knowledge base configuration for RAG retrieval.
3538
+ */
3539
+ interface KBConfig {
3540
+ /**
3541
+ * Similarity threshold for filtering search results
3542
+ */
3543
+ filter_score?: number;
3544
+ /**
3545
+ * Max number of knowledge base chunks to retrieve
3546
+ */
3547
+ top_k?: number;
3548
+ }
3549
+ /**
3550
+ * The model choice for the conversation flow.
3551
+ */
3552
+ interface ModelChoice {
3553
+ /**
3554
+ * The LLM model to use
3555
+ */
3556
+ model: 'gpt-5' | 'gpt-5-mini' | 'gpt-5-nano' | 'gpt-4o' | 'gpt-4o-mini' | 'gpt-4.1' | 'gpt-4.1-mini' | 'gpt-4.1-nano' | 'claude-3.7-sonnet' | 'claude-3.5-haiku' | 'gemini-2.0-flash' | 'gemini-2.0-flash-lite' | 'gemini-2.5-flash' | 'gemini-2.5-flash-lite';
3557
+ /**
3558
+ * Type of model choice
3559
+ */
3560
+ type: 'cascading';
3561
+ /**
3562
+ * Whether to use high priority pool with more dedicated resource, default false
3563
+ */
3564
+ high_priority?: boolean;
3565
+ }
3566
+ }
3567
+ /**
3568
+ * Override Retell LLM configuration settings. Only applicable when using Retell
3569
+ * LLM as the response engine. Supported attributes - model, s2s_model,
3570
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
3571
+ * begin_after_user_silence_ms, begin_message.
3572
+ */
3573
+ interface RetellLlm {
3574
+ /**
3575
+ * If set, the AI will begin the conversation after waiting for the user for the
3576
+ * duration (in milliseconds) specified by this attribute. This only applies if the
3577
+ * agent is configured to wait for the user to speak first. If not set, the agent
3578
+ * will wait indefinitely for the user to speak.
3579
+ */
3580
+ begin_after_user_silence_ms?: number | null;
3581
+ /**
3582
+ * First utterance said by the agent in the call. If not set, LLM will dynamically
3583
+ * generate a message. If set to "", agent will wait for user to speak first.
3584
+ */
3585
+ begin_message?: string | null;
3586
+ /**
3587
+ * Knowledge base configuration for RAG retrieval.
3588
+ */
3589
+ kb_config?: RetellLlm.KBConfig | null;
3590
+ /**
3591
+ * A list of knowledge base ids to use for this resource.
3592
+ */
3593
+ knowledge_base_ids?: Array<string> | null;
3594
+ /**
3595
+ * Select the underlying text LLM. If not set, would default to gpt-4.1.
3596
+ */
3597
+ model?: 'gpt-5' | 'gpt-5-mini' | 'gpt-5-nano' | 'gpt-4o' | 'gpt-4o-mini' | 'gpt-4.1' | 'gpt-4.1-mini' | 'gpt-4.1-nano' | 'claude-3.7-sonnet' | 'claude-3.5-haiku' | 'gemini-2.0-flash' | 'gemini-2.0-flash-lite' | 'gemini-2.5-flash' | 'gemini-2.5-flash-lite' | null;
3598
+ /**
3599
+ * If set, will control the randomness of the response. Value ranging from [0,1].
3600
+ * Lower value means more deterministic, while higher value means more random. If
3601
+ * unset, default value 0 will apply. Note that for tool calling, a lower value is
3602
+ * recommended.
3603
+ */
3604
+ model_temperature?: number;
3605
+ /**
3606
+ * Select the underlying speech to speech model. Can only set this or model, not
3607
+ * both.
3608
+ */
3609
+ s2s_model?: 'gpt-4o-realtime' | 'gpt-4o-mini-realtime' | 'gpt-realtime' | null;
3610
+ /**
3611
+ * The speaker who starts the conversation. Required. Must be either 'user' or
3612
+ * 'agent'.
3613
+ */
3614
+ start_speaker?: 'user' | 'agent';
3615
+ }
3616
+ namespace RetellLlm {
3617
+ /**
3618
+ * Knowledge base configuration for RAG retrieval.
3619
+ */
3620
+ interface KBConfig {
3621
+ /**
3622
+ * Similarity threshold for filtering search results
3623
+ */
3624
+ filter_score?: number;
3625
+ /**
3626
+ * Max number of knowledge base chunks to retrieve
3627
+ */
3628
+ top_k?: number;
3629
+ }
3630
+ }
3631
+ }
3632
+ }
1799
3633
  export declare namespace Call {
1800
3634
  export { type CallResponse as CallResponse, type PhoneCallResponse as PhoneCallResponse, type WebCallResponse as WebCallResponse, type CallListResponse as CallListResponse, type CallUpdateParams as CallUpdateParams, type CallListParams as CallListParams, type CallCreatePhoneCallParams as CallCreatePhoneCallParams, type CallCreateWebCallParams as CallCreateWebCallParams, type CallRegisterPhoneCallParams as CallRegisterPhoneCallParams, };
1801
3635
  }