retell-sdk 4.56.0 → 4.58.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -216,7 +216,7 @@ export interface PhoneCallResponse {
216
216
  data_storage_setting?: 'everything' | 'everything_except_pii' | 'basic_attributes_only' | null;
217
217
 
218
218
  /**
219
- * The reason for the disconnection of the call. Read details desciption about
219
+ * The reason for the disconnection of the call. Read detailed description about
220
220
  * reasons listed here at
221
221
  * [Disconnection Reason Doc](/reliability/debug-call-disconnect#understanding-disconnection-reasons).
222
222
  */
@@ -377,6 +377,15 @@ export interface PhoneCallResponse {
377
377
  | PhoneCallResponse.ToolCallResultUtterance
378
378
  | PhoneCallResponse.DtmfUtterance
379
379
  >;
380
+
381
+ /**
382
+ * The destination number or identifier where the call was transferred to. Only
383
+ * populated when the disconnection reason was `call_transfer`. Can be a phone
384
+ * number or a SIP URI. SIP URIs are prefixed with "sip:" and may include a
385
+ * ";transport=..." portion (if transport is known) where the transport type can be
386
+ * "tls", "tcp", "udp", or "auto".
387
+ */
388
+ transfer_destination?: string | null;
380
389
  }
381
390
 
382
391
  export namespace PhoneCallResponse {
@@ -1122,7 +1131,7 @@ export interface WebCallResponse {
1122
1131
  data_storage_setting?: 'everything' | 'everything_except_pii' | 'basic_attributes_only' | null;
1123
1132
 
1124
1133
  /**
1125
- * The reason for the disconnection of the call. Read details desciption about
1134
+ * The reason for the disconnection of the call. Read detailed description about
1126
1135
  * reasons listed here at
1127
1136
  * [Disconnection Reason Doc](/reliability/debug-call-disconnect#understanding-disconnection-reasons).
1128
1137
  */
@@ -1277,6 +1286,15 @@ export interface WebCallResponse {
1277
1286
  | WebCallResponse.ToolCallResultUtterance
1278
1287
  | WebCallResponse.DtmfUtterance
1279
1288
  >;
1289
+
1290
+ /**
1291
+ * The destination number or identifier where the call was transferred to. Only
1292
+ * populated when the disconnection reason was `call_transfer`. Can be a phone
1293
+ * number or a SIP URI. SIP URIs are prefixed with "sip:" and may include a
1294
+ * ";transport=..." portion (if transport is known) where the transport type can be
1295
+ * "tls", "tcp", "udp", or "auto".
1296
+ */
1297
+ transfer_destination?: string | null;
1280
1298
  }
1281
1299
 
1282
1300
  export namespace WebCallResponse {
@@ -2135,6 +2153,12 @@ export interface CallCreatePhoneCallParams {
2135
2153
  */
2136
2154
  to_number: string;
2137
2155
 
2156
+ /**
2157
+ * Override configuration for agent, retell LLM, or conversation flow settings for
2158
+ * a specific call.
2159
+ */
2160
+ agent_override?: CallCreatePhoneCallParams.AgentOverride;
2161
+
2138
2162
  /**
2139
2163
  * Add optional custom SIP headers to the call.
2140
2164
  */
@@ -2176,6 +2200,826 @@ export interface CallCreatePhoneCallParams {
2176
2200
  retell_llm_dynamic_variables?: { [key: string]: unknown };
2177
2201
  }
2178
2202
 
2203
+ export namespace CallCreatePhoneCallParams {
2204
+ /**
2205
+ * Override configuration for agent, retell LLM, or conversation flow settings for
2206
+ * a specific call.
2207
+ */
2208
+ export interface AgentOverride {
2209
+ /**
2210
+ * Override agent configuration settings. Any properties specified here will
2211
+ * override the base agent configuration for this call.
2212
+ */
2213
+ agent?: AgentOverride.Agent;
2214
+
2215
+ /**
2216
+ * Override conversation flow configuration settings. Only applicable when using
2217
+ * conversation flow as the response engine. Supported attributes - model_choice,
2218
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
2219
+ * begin_after_user_silence_ms.
2220
+ */
2221
+ conversation_flow?: AgentOverride.ConversationFlow;
2222
+
2223
+ /**
2224
+ * Override Retell LLM configuration settings. Only applicable when using Retell
2225
+ * LLM as the response engine. Supported attributes - model, s2s_model,
2226
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
2227
+ * begin_after_user_silence_ms, begin_message.
2228
+ */
2229
+ retell_llm?: AgentOverride.RetellLlm;
2230
+ }
2231
+
2232
+ export namespace AgentOverride {
2233
+ /**
2234
+ * Override agent configuration settings. Any properties specified here will
2235
+ * override the base agent configuration for this call.
2236
+ */
2237
+ export interface Agent {
2238
+ /**
2239
+ * The name of the agent. Only used for your own reference.
2240
+ */
2241
+ agent_name?: string | null;
2242
+
2243
+ /**
2244
+ * If set to true, DTMF input will be accepted and processed. If false, any DTMF
2245
+ * input will be ignored. Default to true.
2246
+ */
2247
+ allow_user_dtmf?: boolean;
2248
+
2249
+ /**
2250
+ * If set, will add ambient environment sound to the call to make experience more
2251
+ * realistic. Currently supports the following options:
2252
+ *
2253
+ * - `coffee-shop`: Coffee shop ambience with people chatting in background.
2254
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/coffee-shop.wav)
2255
+ *
2256
+ * - `convention-hall`: Convention hall ambience, with some echo and people
2257
+ * chatting in background.
2258
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/convention-hall.wav)
2259
+ *
2260
+ * - `summer-outdoor`: Summer outdoor ambience with cicada chirping.
2261
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/summer-outdoor.wav)
2262
+ *
2263
+ * - `mountain-outdoor`: Mountain outdoor ambience with birds singing.
2264
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/mountain-outdoor.wav)
2265
+ *
2266
+ * - `static-noise`: Constant static noise.
2267
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/static-noise.wav)
2268
+ *
2269
+ * - `call-center`: Call center work noise.
2270
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/call-center.wav)
2271
+ *
2272
+ * Set to `null` to remove ambient sound from this agent.
2273
+ */
2274
+ ambient_sound?:
2275
+ | 'coffee-shop'
2276
+ | 'convention-hall'
2277
+ | 'summer-outdoor'
2278
+ | 'mountain-outdoor'
2279
+ | 'static-noise'
2280
+ | 'call-center'
2281
+ | null;
2282
+
2283
+ /**
2284
+ * If set, will control the volume of the ambient sound. Value ranging from [0,2].
2285
+ * Lower value means quieter ambient sound, while higher value means louder ambient
2286
+ * sound. If unset, default value 1 will apply.
2287
+ */
2288
+ ambient_sound_volume?: number;
2289
+
2290
+ /**
2291
+ * Only applicable when enable_backchannel is true. Controls how often the agent
2292
+ * would backchannel when a backchannel is possible. Value ranging from [0,1].
2293
+ * Lower value means less frequent backchannel, while higher value means more
2294
+ * frequent backchannel. If unset, default value 0.8 will apply.
2295
+ */
2296
+ backchannel_frequency?: number;
2297
+
2298
+ /**
2299
+ * Only applicable when enable_backchannel is true. A list of words that the agent
2300
+ * would use as backchannel. If not set, default backchannel words will apply.
2301
+ * Check out
2302
+ * [backchannel default words](/agent/interaction-configuration#backchannel) for
2303
+ * more details. Note that certain voices do not work too well with certain words,
2304
+ * so it's recommended to experiment before adding any words.
2305
+ */
2306
+ backchannel_words?: Array<string> | null;
2307
+
2308
+ /**
2309
+ * If set, will delay the first message by the specified amount of milliseconds, so
2310
+ * that it gives user more time to prepare to take the call. Valid range is [0,
2311
+ * 5000]. If not set or set to 0, agent will speak immediately. Only applicable
2312
+ * when agent speaks first.
2313
+ */
2314
+ begin_message_delay_ms?: number;
2315
+
2316
+ /**
2317
+ * Provide a customized list of keywords to bias the transcriber model, so that
2318
+ * these words are more likely to get transcribed. Commonly used for names, brands,
2319
+ * street, etc.
2320
+ */
2321
+ boosted_keywords?: Array<string> | null;
2322
+
2323
+ /**
2324
+ * Granular setting to manage how Retell stores sensitive data (transcripts,
2325
+ * recordings, logs, etc.). This replaces the deprecated
2326
+ * `opt_out_sensitive_data_storage` field.
2327
+ *
2328
+ * - `everything`: Store all data including transcripts, recordings, and logs.
2329
+ * - `everything_except_pii`: Store data without PII when PII is detected.
2330
+ * - `basic_attributes_only`: Store only basic attributes; no
2331
+ * transcripts/recordings/logs. If not set, default value of "everything" will
2332
+ * apply.
2333
+ */
2334
+ data_storage_setting?: 'everything' | 'everything_except_pii' | 'basic_attributes_only';
2335
+
2336
+ /**
2337
+ * If set, determines what denoising mode to use. Default to noise-cancellation.
2338
+ */
2339
+ denoising_mode?: 'noise-cancellation' | 'noise-and-background-speech-cancellation';
2340
+
2341
+ /**
2342
+ * Controls whether the agent would backchannel (agent interjects the speaker with
2343
+ * phrases like "yeah", "uh-huh" to signify interest and engagement). Backchannel
2344
+ * when enabled tends to show up more in longer user utterances. If not set, agent
2345
+ * will not backchannel.
2346
+ */
2347
+ enable_backchannel?: boolean;
2348
+
2349
+ /**
2350
+ * If users stay silent for a period after agent speech, end the call. The minimum
2351
+ * value allowed is 10,000 ms (10 s). By default, this is set to 600000 (10 min).
2352
+ */
2353
+ end_call_after_silence_ms?: number;
2354
+
2355
+ /**
2356
+ * When TTS provider for the selected voice is experiencing outages, we would use
2357
+ * fallback voices listed here for the agent. Voice id and the fallback voice ids
2358
+ * must be from different TTS providers. The system would go through the list in
2359
+ * order, if the first one in the list is also having outage, it would use the next
2360
+ * one. Set to null to remove voice fallback for the agent.
2361
+ */
2362
+ fallback_voice_ids?: Array<string> | null;
2363
+
2364
+ /**
2365
+ * Controls how sensitive the agent is to user interruptions. Value ranging from
2366
+ * [0,1]. Lower value means it will take longer / more words for user to interrupt
2367
+ * agent, while higher value means it's easier for user to interrupt agent. If
2368
+ * unset, default value 1 will apply. When this is set to 0, agent would never be
2369
+ * interrupted.
2370
+ */
2371
+ interruption_sensitivity?: number;
2372
+
2373
+ /**
2374
+ * Specifies what language (and dialect) the speech recognition will operate in.
2375
+ * For instance, selecting `en-GB` optimizes speech recognition for British
2376
+ * English. If unset, will use default value `en-US`. Select `multi` for
2377
+ * multilingual support, currently this supports Spanish and English.
2378
+ */
2379
+ language?:
2380
+ | 'en-US'
2381
+ | 'en-IN'
2382
+ | 'en-GB'
2383
+ | 'en-AU'
2384
+ | 'en-NZ'
2385
+ | 'de-DE'
2386
+ | 'es-ES'
2387
+ | 'es-419'
2388
+ | 'hi-IN'
2389
+ | 'fr-FR'
2390
+ | 'fr-CA'
2391
+ | 'ja-JP'
2392
+ | 'pt-PT'
2393
+ | 'pt-BR'
2394
+ | 'zh-CN'
2395
+ | 'ru-RU'
2396
+ | 'it-IT'
2397
+ | 'ko-KR'
2398
+ | 'nl-NL'
2399
+ | 'nl-BE'
2400
+ | 'pl-PL'
2401
+ | 'tr-TR'
2402
+ | 'th-TH'
2403
+ | 'vi-VN'
2404
+ | 'ro-RO'
2405
+ | 'bg-BG'
2406
+ | 'ca-ES'
2407
+ | 'da-DK'
2408
+ | 'fi-FI'
2409
+ | 'el-GR'
2410
+ | 'hu-HU'
2411
+ | 'id-ID'
2412
+ | 'no-NO'
2413
+ | 'sk-SK'
2414
+ | 'sv-SE'
2415
+ | 'multi';
2416
+
2417
+ /**
2418
+ * Maximum allowed length for the call, will force end the call if reached. The
2419
+ * minimum value allowed is 60,000 ms (1 min), and maximum value allowed is
2420
+ * 7,200,000 (2 hours). By default, this is set to 3,600,000 (1 hour).
2421
+ */
2422
+ max_call_duration_ms?: number;
2423
+
2424
+ /**
2425
+ * If set to true, will normalize the some part of text (number, currency, date,
2426
+ * etc) to spoken to its spoken form for more consistent speech synthesis
2427
+ * (sometimes the voice synthesize system itself might read these wrong with the
2428
+ * raw text). For example, it will convert "Call my number 2137112342 on Jul 5th,
2429
+ * 2024 for the $24.12 payment" to "Call my number two one three seven one one two
2430
+ * three four two on july fifth, twenty twenty four for the twenty four dollars
2431
+ * twelve cents payment" before starting audio generation.
2432
+ */
2433
+ normalize_for_speech?: boolean;
2434
+
2435
+ /**
2436
+ * Whether this agent opts in for signed URLs for public logs and recordings. When
2437
+ * enabled, the generated URLs will include security signatures that restrict
2438
+ * access and automatically expire after 24 hours.
2439
+ */
2440
+ opt_in_signed_url?: boolean;
2441
+
2442
+ /**
2443
+ * Configuration for PII scrubbing from transcripts and recordings.
2444
+ */
2445
+ pii_config?: Agent.PiiConfig;
2446
+
2447
+ /**
2448
+ * Post call analysis data to extract from the call. This data will augment the
2449
+ * pre-defined variables extracted in the call analysis. This will be available
2450
+ * after the call ends.
2451
+ */
2452
+ post_call_analysis_data?: Array<
2453
+ | Agent.StringAnalysisData
2454
+ | Agent.EnumAnalysisData
2455
+ | Agent.BooleanAnalysisData
2456
+ | Agent.NumberAnalysisData
2457
+ > | null;
2458
+
2459
+ /**
2460
+ * The model to use for post call analysis. Default to gpt-4o-mini.
2461
+ */
2462
+ post_call_analysis_model?:
2463
+ | 'gpt-4o'
2464
+ | 'gpt-4o-mini'
2465
+ | 'gpt-4.1'
2466
+ | 'gpt-4.1-mini'
2467
+ | 'gpt-4.1-nano'
2468
+ | 'gpt-5'
2469
+ | 'gpt-5-mini'
2470
+ | 'gpt-5-nano'
2471
+ | 'claude-4.5-sonnet'
2472
+ | 'claude-4.0-sonnet'
2473
+ | 'claude-3.7-sonnet'
2474
+ | 'claude-3.5-haiku'
2475
+ | 'gemini-2.0-flash'
2476
+ | 'gemini-2.0-flash-lite'
2477
+ | 'gemini-2.5-flash'
2478
+ | 'gemini-2.5-flash-lite';
2479
+
2480
+ /**
2481
+ * A list of words / phrases and their pronunciation to be used to guide the audio
2482
+ * synthesize for consistent pronunciation. Currently only supported for English &
2483
+ * 11labs voices. Set to null to remove pronunciation dictionary from this agent.
2484
+ */
2485
+ pronunciation_dictionary?: Array<Agent.PronunciationDictionary> | null;
2486
+
2487
+ /**
2488
+ * If set, controls how many times agent would remind user when user is
2489
+ * unresponsive. Must be a non negative integer. If unset, default value of 1 will
2490
+ * apply (remind once). Set to 0 to disable agent from reminding.
2491
+ */
2492
+ reminder_max_count?: number;
2493
+
2494
+ /**
2495
+ * If set (in milliseconds), will trigger a reminder to the agent to speak if the
2496
+ * user has been silent for the specified duration after some agent speech. Must be
2497
+ * a positive number. If unset, default value of 10000 ms (10 s) will apply.
2498
+ */
2499
+ reminder_trigger_ms?: number;
2500
+
2501
+ /**
2502
+ * The Response Engine to attach to the agent. It is used to generate responses for
2503
+ * the agent. You need to create a Response Engine first before attaching it to an
2504
+ * agent.
2505
+ */
2506
+ response_engine?:
2507
+ | Agent.ResponseEngineRetellLm
2508
+ | Agent.ResponseEngineCustomLm
2509
+ | Agent.ResponseEngineConversationFlow;
2510
+
2511
+ /**
2512
+ * Controls how responsive is the agent. Value ranging from [0,1]. Lower value
2513
+ * means less responsive agent (wait more, respond slower), while higher value
2514
+ * means faster exchanges (respond when it can). If unset, default value 1 will
2515
+ * apply.
2516
+ */
2517
+ responsiveness?: number;
2518
+
2519
+ /**
2520
+ * If set, the phone ringing will last for the specified amount of milliseconds.
2521
+ * This applies for both outbound call ringtime, and call transfer ringtime.
2522
+ * Default to 30000 (30 s). Valid range is [5000, 90000].
2523
+ */
2524
+ ring_duration_ms?: number;
2525
+
2526
+ /**
2527
+ * If set, determines whether speech to text should focus on latency or accuracy.
2528
+ * Default to fast mode.
2529
+ */
2530
+ stt_mode?: 'fast' | 'accurate';
2531
+
2532
+ user_dtmf_options?: Agent.UserDtmfOptions | null;
2533
+
2534
+ /**
2535
+ * If set, determines the vocabulary set to use for transcription. This setting
2536
+ * only applies for English agents, for non English agent, this setting is a no-op.
2537
+ * Default to general.
2538
+ */
2539
+ vocab_specialization?: 'general' | 'medical';
2540
+
2541
+ /**
2542
+ * Unique voice id used for the agent. Find list of available voices and their
2543
+ * preview in Dashboard.
2544
+ */
2545
+ voice_id?: string;
2546
+
2547
+ /**
2548
+ * Optionally set the voice model used for the selected voice. Currently only
2549
+ * elevenlab voices have voice model selections. Set to null to remove voice model
2550
+ * selection, and default ones will apply. Check out the dashboard for details on
2551
+ * each voice model.
2552
+ */
2553
+ voice_model?:
2554
+ | 'eleven_turbo_v2'
2555
+ | 'eleven_flash_v2'
2556
+ | 'eleven_turbo_v2_5'
2557
+ | 'eleven_flash_v2_5'
2558
+ | 'eleven_multilingual_v2'
2559
+ | 'tts-1'
2560
+ | 'gpt-4o-mini-tts'
2561
+ | null;
2562
+
2563
+ /**
2564
+ * Controls speed of voice. Value ranging from [0.5,2]. Lower value means slower
2565
+ * speech, while higher value means faster speech rate. If unset, default value 1
2566
+ * will apply.
2567
+ */
2568
+ voice_speed?: number;
2569
+
2570
+ /**
2571
+ * Controls how stable the voice is. Value ranging from [0,2]. Lower value means
2572
+ * more stable, and higher value means more variant speech generation. Currently
2573
+ * this setting only applies to `11labs` voices. If unset, default value 1 will
2574
+ * apply.
2575
+ */
2576
+ voice_temperature?: number;
2577
+
2578
+ /**
2579
+ * If this option is set, the call will try to detect voicemail in the first 3
2580
+ * minutes of the call. Actions defined (hangup, or leave a message) will be
2581
+ * applied when the voicemail is detected. Set this to null to disable voicemail
2582
+ * detection.
2583
+ */
2584
+ voicemail_option?: Agent.VoicemailOption | null;
2585
+
2586
+ /**
2587
+ * If set, will control the volume of the agent. Value ranging from [0,2]. Lower
2588
+ * value means quieter agent speech, while higher value means louder agent speech.
2589
+ * If unset, default value 1 will apply.
2590
+ */
2591
+ volume?: number;
2592
+
2593
+ /**
2594
+ * The timeout for the webhook in milliseconds. If not set, default value of 10000
2595
+ * will apply.
2596
+ */
2597
+ webhook_timeout_ms?: number;
2598
+
2599
+ /**
2600
+ * The webhook for agent to listen to call events. See what events it would get at
2601
+ * [webhook doc](/features/webhook). If set, will binds webhook events for this
2602
+ * agent to the specified url, and will ignore the account level webhook for this
2603
+ * agent. Set to `null` to remove webhook url from this agent.
2604
+ */
2605
+ webhook_url?: string | null;
2606
+ }
2607
+
2608
+ export namespace Agent {
2609
+ /**
2610
+ * Configuration for PII scrubbing from transcripts and recordings.
2611
+ */
2612
+ export interface PiiConfig {
2613
+ /**
2614
+ * List of PII categories to scrub from transcripts and recordings.
2615
+ */
2616
+ categories: Array<
2617
+ | 'person_name'
2618
+ | 'address'
2619
+ | 'email'
2620
+ | 'phone_number'
2621
+ | 'ssn'
2622
+ | 'passport'
2623
+ | 'driver_license'
2624
+ | 'credit_card'
2625
+ | 'bank_account'
2626
+ | 'password'
2627
+ | 'pin'
2628
+ | 'medical_id'
2629
+ | 'date_of_birth'
2630
+ >;
2631
+
2632
+ /**
2633
+ * The processing mode for PII scrubbing. Currently only post-call is supported.
2634
+ */
2635
+ mode: 'post_call';
2636
+ }
2637
+
2638
+ export interface StringAnalysisData {
2639
+ /**
2640
+ * Description of the variable.
2641
+ */
2642
+ description: string;
2643
+
2644
+ /**
2645
+ * Name of the variable.
2646
+ */
2647
+ name: string;
2648
+
2649
+ /**
2650
+ * Type of the variable to extract.
2651
+ */
2652
+ type: 'string';
2653
+
2654
+ /**
2655
+ * Examples of the variable value to teach model the style and syntax.
2656
+ */
2657
+ examples?: Array<string>;
2658
+ }
2659
+
2660
+ export interface EnumAnalysisData {
2661
+ /**
2662
+ * The possible values of the variable, must be non empty array.
2663
+ */
2664
+ choices: Array<string>;
2665
+
2666
+ /**
2667
+ * Description of the variable.
2668
+ */
2669
+ description: string;
2670
+
2671
+ /**
2672
+ * Name of the variable.
2673
+ */
2674
+ name: string;
2675
+
2676
+ /**
2677
+ * Type of the variable to extract.
2678
+ */
2679
+ type: 'enum';
2680
+ }
2681
+
2682
+ export interface BooleanAnalysisData {
2683
+ /**
2684
+ * Description of the variable.
2685
+ */
2686
+ description: string;
2687
+
2688
+ /**
2689
+ * Name of the variable.
2690
+ */
2691
+ name: string;
2692
+
2693
+ /**
2694
+ * Type of the variable to extract.
2695
+ */
2696
+ type: 'boolean';
2697
+ }
2698
+
2699
+ export interface NumberAnalysisData {
2700
+ /**
2701
+ * Description of the variable.
2702
+ */
2703
+ description: string;
2704
+
2705
+ /**
2706
+ * Name of the variable.
2707
+ */
2708
+ name: string;
2709
+
2710
+ /**
2711
+ * Type of the variable to extract.
2712
+ */
2713
+ type: 'number';
2714
+ }
2715
+
2716
+ export interface PronunciationDictionary {
2717
+ /**
2718
+ * The phonetic alphabet to be used for pronunciation.
2719
+ */
2720
+ alphabet: 'ipa' | 'cmu';
2721
+
2722
+ /**
2723
+ * Pronunciation of the word in the format of a IPA / CMU pronunciation.
2724
+ */
2725
+ phoneme: string;
2726
+
2727
+ /**
2728
+ * The string of word / phrase to be annotated with pronunciation.
2729
+ */
2730
+ word: string;
2731
+ }
2732
+
2733
+ export interface ResponseEngineRetellLm {
2734
+ /**
2735
+ * id of the Retell LLM Response Engine.
2736
+ */
2737
+ llm_id: string;
2738
+
2739
+ /**
2740
+ * type of the Response Engine.
2741
+ */
2742
+ type: 'retell-llm';
2743
+
2744
+ /**
2745
+ * Version of the Retell LLM Response Engine.
2746
+ */
2747
+ version?: number | null;
2748
+ }
2749
+
2750
+ export interface ResponseEngineCustomLm {
2751
+ /**
2752
+ * LLM websocket url of the custom LLM.
2753
+ */
2754
+ llm_websocket_url: string;
2755
+
2756
+ /**
2757
+ * type of the Response Engine.
2758
+ */
2759
+ type: 'custom-llm';
2760
+ }
2761
+
2762
+ export interface ResponseEngineConversationFlow {
2763
+ /**
2764
+ * ID of the Conversation Flow Response Engine.
2765
+ */
2766
+ conversation_flow_id: string;
2767
+
2768
+ /**
2769
+ * type of the Response Engine.
2770
+ */
2771
+ type: 'conversation-flow';
2772
+
2773
+ /**
2774
+ * Version of the Conversation Flow Response Engine.
2775
+ */
2776
+ version?: number | null;
2777
+ }
2778
+
2779
+ export interface UserDtmfOptions {
2780
+ /**
2781
+ * The maximum number of digits allowed in the user's DTMF (Dual-Tone
2782
+ * Multi-Frequency) input per turn. Once this limit is reached, the input is
2783
+ * considered complete and a response will be generated immediately.
2784
+ */
2785
+ digit_limit?: number | null;
2786
+
2787
+ /**
2788
+ * A single key that signals the end of DTMF input. Acceptable values include any
2789
+ * digit (0–9), the pound/hash symbol (#), or the asterisk (\*).
2790
+ */
2791
+ termination_key?: string | null;
2792
+
2793
+ /**
2794
+ * The time (in milliseconds) to wait for user DTMF input before timing out. The
2795
+ * timer resets with each digit received.
2796
+ */
2797
+ timeout_ms?: number;
2798
+ }
2799
+
2800
+ /**
2801
+ * If this option is set, the call will try to detect voicemail in the first 3
2802
+ * minutes of the call. Actions defined (hangup, or leave a message) will be
2803
+ * applied when the voicemail is detected. Set this to null to disable voicemail
2804
+ * detection.
2805
+ */
2806
+ export interface VoicemailOption {
2807
+ action:
2808
+ | VoicemailOption.VoicemailActionPrompt
2809
+ | VoicemailOption.VoicemailActionStaticText
2810
+ | VoicemailOption.VoicemailActionHangup;
2811
+ }
2812
+
2813
+ export namespace VoicemailOption {
2814
+ export interface VoicemailActionPrompt {
2815
+ /**
2816
+ * The prompt used to generate the text to be spoken when the call is detected to
2817
+ * be in voicemail.
2818
+ */
2819
+ text: string;
2820
+
2821
+ type: 'prompt';
2822
+ }
2823
+
2824
+ export interface VoicemailActionStaticText {
2825
+ /**
2826
+ * The text to be spoken when the call is detected to be in voicemail.
2827
+ */
2828
+ text: string;
2829
+
2830
+ type: 'static_text';
2831
+ }
2832
+
2833
+ export interface VoicemailActionHangup {
2834
+ type: 'hangup';
2835
+ }
2836
+ }
2837
+ }
2838
+
2839
+ /**
2840
+ * Override conversation flow configuration settings. Only applicable when using
2841
+ * conversation flow as the response engine. Supported attributes - model_choice,
2842
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
2843
+ * begin_after_user_silence_ms.
2844
+ */
2845
+ export interface ConversationFlow {
2846
+ /**
2847
+ * If set, the AI will begin the conversation after waiting for the user for the
2848
+ * duration (in milliseconds) specified by this attribute. This only applies if the
2849
+ * agent is configured to wait for the user to speak first. If not set, the agent
2850
+ * will wait indefinitely for the user to speak.
2851
+ */
2852
+ begin_after_user_silence_ms?: number | null;
2853
+
2854
+ /**
2855
+ * Knowledge base configuration for RAG retrieval.
2856
+ */
2857
+ kb_config?: ConversationFlow.KBConfig;
2858
+
2859
+ /**
2860
+ * Knowledge base IDs for RAG (Retrieval-Augmented Generation).
2861
+ */
2862
+ knowledge_base_ids?: Array<string> | null;
2863
+
2864
+ /**
2865
+ * The model choice for the conversation flow.
2866
+ */
2867
+ model_choice?: ConversationFlow.ModelChoice;
2868
+
2869
+ /**
2870
+ * Controls the randomness of the model's responses. Lower values make responses
2871
+ * more deterministic.
2872
+ */
2873
+ model_temperature?: number | null;
2874
+
2875
+ /**
2876
+ * Who starts the conversation - user or agent.
2877
+ */
2878
+ start_speaker?: 'user' | 'agent';
2879
+ }
2880
+
2881
+ export namespace ConversationFlow {
2882
+ /**
2883
+ * Knowledge base configuration for RAG retrieval.
2884
+ */
2885
+ export interface KBConfig {
2886
+ /**
2887
+ * Similarity threshold for filtering search results
2888
+ */
2889
+ filter_score?: number;
2890
+
2891
+ /**
2892
+ * Max number of knowledge base chunks to retrieve
2893
+ */
2894
+ top_k?: number;
2895
+ }
2896
+
2897
+ /**
2898
+ * The model choice for the conversation flow.
2899
+ */
2900
+ export interface ModelChoice {
2901
+ /**
2902
+ * The LLM model to use
2903
+ */
2904
+ model:
2905
+ | 'gpt-5'
2906
+ | 'gpt-5-mini'
2907
+ | 'gpt-5-nano'
2908
+ | 'gpt-4o'
2909
+ | 'gpt-4o-mini'
2910
+ | 'gpt-4.1'
2911
+ | 'gpt-4.1-mini'
2912
+ | 'gpt-4.1-nano'
2913
+ | 'claude-3.7-sonnet'
2914
+ | 'claude-3.5-haiku'
2915
+ | 'gemini-2.0-flash'
2916
+ | 'gemini-2.0-flash-lite'
2917
+ | 'gemini-2.5-flash'
2918
+ | 'gemini-2.5-flash-lite';
2919
+
2920
+ /**
2921
+ * Type of model choice
2922
+ */
2923
+ type: 'cascading';
2924
+
2925
+ /**
2926
+ * Whether to use high priority pool with more dedicated resource, default false
2927
+ */
2928
+ high_priority?: boolean;
2929
+ }
2930
+ }
2931
+
2932
+ /**
2933
+ * Override Retell LLM configuration settings. Only applicable when using Retell
2934
+ * LLM as the response engine. Supported attributes - model, s2s_model,
2935
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
2936
+ * begin_after_user_silence_ms, begin_message.
2937
+ */
2938
+ export interface RetellLlm {
2939
+ /**
2940
+ * If set, the AI will begin the conversation after waiting for the user for the
2941
+ * duration (in milliseconds) specified by this attribute. This only applies if the
2942
+ * agent is configured to wait for the user to speak first. If not set, the agent
2943
+ * will wait indefinitely for the user to speak.
2944
+ */
2945
+ begin_after_user_silence_ms?: number | null;
2946
+
2947
+ /**
2948
+ * First utterance said by the agent in the call. If not set, LLM will dynamically
2949
+ * generate a message. If set to "", agent will wait for user to speak first.
2950
+ */
2951
+ begin_message?: string | null;
2952
+
2953
+ /**
2954
+ * Knowledge base configuration for RAG retrieval.
2955
+ */
2956
+ kb_config?: RetellLlm.KBConfig | null;
2957
+
2958
+ /**
2959
+ * A list of knowledge base ids to use for this resource.
2960
+ */
2961
+ knowledge_base_ids?: Array<string> | null;
2962
+
2963
+ /**
2964
+ * Select the underlying text LLM. If not set, would default to gpt-4.1.
2965
+ */
2966
+ model?:
2967
+ | 'gpt-5'
2968
+ | 'gpt-5-mini'
2969
+ | 'gpt-5-nano'
2970
+ | 'gpt-4o'
2971
+ | 'gpt-4o-mini'
2972
+ | 'gpt-4.1'
2973
+ | 'gpt-4.1-mini'
2974
+ | 'gpt-4.1-nano'
2975
+ | 'claude-3.7-sonnet'
2976
+ | 'claude-3.5-haiku'
2977
+ | 'gemini-2.0-flash'
2978
+ | 'gemini-2.0-flash-lite'
2979
+ | 'gemini-2.5-flash'
2980
+ | 'gemini-2.5-flash-lite'
2981
+ | null;
2982
+
2983
+ /**
2984
+ * If set, will control the randomness of the response. Value ranging from [0,1].
2985
+ * Lower value means more deterministic, while higher value means more random. If
2986
+ * unset, default value 0 will apply. Note that for tool calling, a lower value is
2987
+ * recommended.
2988
+ */
2989
+ model_temperature?: number;
2990
+
2991
+ /**
2992
+ * Select the underlying speech to speech model. Can only set this or model, not
2993
+ * both.
2994
+ */
2995
+ s2s_model?: 'gpt-4o-realtime' | 'gpt-4o-mini-realtime' | 'gpt-realtime' | null;
2996
+
2997
+ /**
2998
+ * The speaker who starts the conversation. Required. Must be either 'user' or
2999
+ * 'agent'.
3000
+ */
3001
+ start_speaker?: 'user' | 'agent';
3002
+ }
3003
+
3004
+ export namespace RetellLlm {
3005
+ /**
3006
+ * Knowledge base configuration for RAG retrieval.
3007
+ */
3008
+ export interface KBConfig {
3009
+ /**
3010
+ * Similarity threshold for filtering search results
3011
+ */
3012
+ filter_score?: number;
3013
+
3014
+ /**
3015
+ * Max number of knowledge base chunks to retrieve
3016
+ */
3017
+ top_k?: number;
3018
+ }
3019
+ }
3020
+ }
3021
+ }
3022
+
2179
3023
  export interface CallCreateWebCallParams {
2180
3024
  /**
2181
3025
  * Unique id of agent used for the call. Your agent would contain the LLM Websocket
@@ -2183,6 +3027,12 @@ export interface CallCreateWebCallParams {
2183
3027
  */
2184
3028
  agent_id: string;
2185
3029
 
3030
+ /**
3031
+ * Override configuration for agent, retell LLM, or conversation flow settings for
3032
+ * a specific call.
3033
+ */
3034
+ agent_override?: CallCreateWebCallParams.AgentOverride;
3035
+
2186
3036
  /**
2187
3037
  * The version of the agent to use for the call.
2188
3038
  */
@@ -2203,45 +3053,1691 @@ export interface CallCreateWebCallParams {
2203
3053
  retell_llm_dynamic_variables?: { [key: string]: unknown };
2204
3054
  }
2205
3055
 
2206
- export interface CallRegisterPhoneCallParams {
3056
+ export namespace CallCreateWebCallParams {
2207
3057
  /**
2208
- * The agent to use for the call.
3058
+ * Override configuration for agent, retell LLM, or conversation flow settings for
3059
+ * a specific call.
2209
3060
  */
2210
- agent_id: string;
3061
+ export interface AgentOverride {
3062
+ /**
3063
+ * Override agent configuration settings. Any properties specified here will
3064
+ * override the base agent configuration for this call.
3065
+ */
3066
+ agent?: AgentOverride.Agent;
2211
3067
 
2212
- /**
2213
- * The version of the agent to use for the call.
2214
- */
2215
- agent_version?: number;
3068
+ /**
3069
+ * Override conversation flow configuration settings. Only applicable when using
3070
+ * conversation flow as the response engine. Supported attributes - model_choice,
3071
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
3072
+ * begin_after_user_silence_ms.
3073
+ */
3074
+ conversation_flow?: AgentOverride.ConversationFlow;
2216
3075
 
2217
- /**
2218
- * Direction of the phone call. Stored for tracking purpose.
2219
- */
2220
- direction?: 'inbound' | 'outbound';
3076
+ /**
3077
+ * Override Retell LLM configuration settings. Only applicable when using Retell
3078
+ * LLM as the response engine. Supported attributes - model, s2s_model,
3079
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
3080
+ * begin_after_user_silence_ms, begin_message.
3081
+ */
3082
+ retell_llm?: AgentOverride.RetellLlm;
3083
+ }
2221
3084
 
2222
- /**
2223
- * The number you own in E.164 format. Stored for tracking purpose.
2224
- */
2225
- from_number?: string;
3085
+ export namespace AgentOverride {
3086
+ /**
3087
+ * Override agent configuration settings. Any properties specified here will
3088
+ * override the base agent configuration for this call.
3089
+ */
3090
+ export interface Agent {
3091
+ /**
3092
+ * The name of the agent. Only used for your own reference.
3093
+ */
3094
+ agent_name?: string | null;
2226
3095
 
2227
- /**
2228
- * An arbitrary object for storage purpose only. You can put anything here like
2229
- * your internal customer id associated with the call. Not used for processing. You
2230
- * can later get this field from the call object.
2231
- */
2232
- metadata?: unknown;
3096
+ /**
3097
+ * If set to true, DTMF input will be accepted and processed. If false, any DTMF
3098
+ * input will be ignored. Default to true.
3099
+ */
3100
+ allow_user_dtmf?: boolean;
2233
3101
 
2234
- /**
2235
- * Add optional dynamic variables in key value pairs of string that injects into
2236
- * your Response Engine prompt and tool description. Only applicable for Response
2237
- * Engine.
2238
- */
2239
- retell_llm_dynamic_variables?: { [key: string]: unknown };
3102
+ /**
3103
+ * If set, will add ambient environment sound to the call to make experience more
3104
+ * realistic. Currently supports the following options:
3105
+ *
3106
+ * - `coffee-shop`: Coffee shop ambience with people chatting in background.
3107
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/coffee-shop.wav)
3108
+ *
3109
+ * - `convention-hall`: Convention hall ambience, with some echo and people
3110
+ * chatting in background.
3111
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/convention-hall.wav)
3112
+ *
3113
+ * - `summer-outdoor`: Summer outdoor ambience with cicada chirping.
3114
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/summer-outdoor.wav)
3115
+ *
3116
+ * - `mountain-outdoor`: Mountain outdoor ambience with birds singing.
3117
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/mountain-outdoor.wav)
3118
+ *
3119
+ * - `static-noise`: Constant static noise.
3120
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/static-noise.wav)
3121
+ *
3122
+ * - `call-center`: Call center work noise.
3123
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/call-center.wav)
3124
+ *
3125
+ * Set to `null` to remove ambient sound from this agent.
3126
+ */
3127
+ ambient_sound?:
3128
+ | 'coffee-shop'
3129
+ | 'convention-hall'
3130
+ | 'summer-outdoor'
3131
+ | 'mountain-outdoor'
3132
+ | 'static-noise'
3133
+ | 'call-center'
3134
+ | null;
2240
3135
 
2241
- /**
2242
- * The number you want to call, in E.164 format. Stored for tracking purpose.
2243
- */
2244
- to_number?: string;
3136
+ /**
3137
+ * If set, will control the volume of the ambient sound. Value ranging from [0,2].
3138
+ * Lower value means quieter ambient sound, while higher value means louder ambient
3139
+ * sound. If unset, default value 1 will apply.
3140
+ */
3141
+ ambient_sound_volume?: number;
3142
+
3143
+ /**
3144
+ * Only applicable when enable_backchannel is true. Controls how often the agent
3145
+ * would backchannel when a backchannel is possible. Value ranging from [0,1].
3146
+ * Lower value means less frequent backchannel, while higher value means more
3147
+ * frequent backchannel. If unset, default value 0.8 will apply.
3148
+ */
3149
+ backchannel_frequency?: number;
3150
+
3151
+ /**
3152
+ * Only applicable when enable_backchannel is true. A list of words that the agent
3153
+ * would use as backchannel. If not set, default backchannel words will apply.
3154
+ * Check out
3155
+ * [backchannel default words](/agent/interaction-configuration#backchannel) for
3156
+ * more details. Note that certain voices do not work too well with certain words,
3157
+ * so it's recommended to experiment before adding any words.
3158
+ */
3159
+ backchannel_words?: Array<string> | null;
3160
+
3161
+ /**
3162
+ * If set, will delay the first message by the specified amount of milliseconds, so
3163
+ * that it gives user more time to prepare to take the call. Valid range is [0,
3164
+ * 5000]. If not set or set to 0, agent will speak immediately. Only applicable
3165
+ * when agent speaks first.
3166
+ */
3167
+ begin_message_delay_ms?: number;
3168
+
3169
+ /**
3170
+ * Provide a customized list of keywords to bias the transcriber model, so that
3171
+ * these words are more likely to get transcribed. Commonly used for names, brands,
3172
+ * street, etc.
3173
+ */
3174
+ boosted_keywords?: Array<string> | null;
3175
+
3176
+ /**
3177
+ * Granular setting to manage how Retell stores sensitive data (transcripts,
3178
+ * recordings, logs, etc.). This replaces the deprecated
3179
+ * `opt_out_sensitive_data_storage` field.
3180
+ *
3181
+ * - `everything`: Store all data including transcripts, recordings, and logs.
3182
+ * - `everything_except_pii`: Store data without PII when PII is detected.
3183
+ * - `basic_attributes_only`: Store only basic attributes; no
3184
+ * transcripts/recordings/logs. If not set, default value of "everything" will
3185
+ * apply.
3186
+ */
3187
+ data_storage_setting?: 'everything' | 'everything_except_pii' | 'basic_attributes_only';
3188
+
3189
+ /**
3190
+ * If set, determines what denoising mode to use. Default to noise-cancellation.
3191
+ */
3192
+ denoising_mode?: 'noise-cancellation' | 'noise-and-background-speech-cancellation';
3193
+
3194
+ /**
3195
+ * Controls whether the agent would backchannel (agent interjects the speaker with
3196
+ * phrases like "yeah", "uh-huh" to signify interest and engagement). Backchannel
3197
+ * when enabled tends to show up more in longer user utterances. If not set, agent
3198
+ * will not backchannel.
3199
+ */
3200
+ enable_backchannel?: boolean;
3201
+
3202
+ /**
3203
+ * If users stay silent for a period after agent speech, end the call. The minimum
3204
+ * value allowed is 10,000 ms (10 s). By default, this is set to 600000 (10 min).
3205
+ */
3206
+ end_call_after_silence_ms?: number;
3207
+
3208
+ /**
3209
+ * When TTS provider for the selected voice is experiencing outages, we would use
3210
+ * fallback voices listed here for the agent. Voice id and the fallback voice ids
3211
+ * must be from different TTS providers. The system would go through the list in
3212
+ * order, if the first one in the list is also having outage, it would use the next
3213
+ * one. Set to null to remove voice fallback for the agent.
3214
+ */
3215
+ fallback_voice_ids?: Array<string> | null;
3216
+
3217
+ /**
3218
+ * Controls how sensitive the agent is to user interruptions. Value ranging from
3219
+ * [0,1]. Lower value means it will take longer / more words for user to interrupt
3220
+ * agent, while higher value means it's easier for user to interrupt agent. If
3221
+ * unset, default value 1 will apply. When this is set to 0, agent would never be
3222
+ * interrupted.
3223
+ */
3224
+ interruption_sensitivity?: number;
3225
+
3226
+ /**
3227
+ * Specifies what language (and dialect) the speech recognition will operate in.
3228
+ * For instance, selecting `en-GB` optimizes speech recognition for British
3229
+ * English. If unset, will use default value `en-US`. Select `multi` for
3230
+ * multilingual support, currently this supports Spanish and English.
3231
+ */
3232
+ language?:
3233
+ | 'en-US'
3234
+ | 'en-IN'
3235
+ | 'en-GB'
3236
+ | 'en-AU'
3237
+ | 'en-NZ'
3238
+ | 'de-DE'
3239
+ | 'es-ES'
3240
+ | 'es-419'
3241
+ | 'hi-IN'
3242
+ | 'fr-FR'
3243
+ | 'fr-CA'
3244
+ | 'ja-JP'
3245
+ | 'pt-PT'
3246
+ | 'pt-BR'
3247
+ | 'zh-CN'
3248
+ | 'ru-RU'
3249
+ | 'it-IT'
3250
+ | 'ko-KR'
3251
+ | 'nl-NL'
3252
+ | 'nl-BE'
3253
+ | 'pl-PL'
3254
+ | 'tr-TR'
3255
+ | 'th-TH'
3256
+ | 'vi-VN'
3257
+ | 'ro-RO'
3258
+ | 'bg-BG'
3259
+ | 'ca-ES'
3260
+ | 'da-DK'
3261
+ | 'fi-FI'
3262
+ | 'el-GR'
3263
+ | 'hu-HU'
3264
+ | 'id-ID'
3265
+ | 'no-NO'
3266
+ | 'sk-SK'
3267
+ | 'sv-SE'
3268
+ | 'multi';
3269
+
3270
+ /**
3271
+ * Maximum allowed length for the call, will force end the call if reached. The
3272
+ * minimum value allowed is 60,000 ms (1 min), and maximum value allowed is
3273
+ * 7,200,000 (2 hours). By default, this is set to 3,600,000 (1 hour).
3274
+ */
3275
+ max_call_duration_ms?: number;
3276
+
3277
+ /**
3278
+ * If set to true, will normalize the some part of text (number, currency, date,
3279
+ * etc) to spoken to its spoken form for more consistent speech synthesis
3280
+ * (sometimes the voice synthesize system itself might read these wrong with the
3281
+ * raw text). For example, it will convert "Call my number 2137112342 on Jul 5th,
3282
+ * 2024 for the $24.12 payment" to "Call my number two one three seven one one two
3283
+ * three four two on july fifth, twenty twenty four for the twenty four dollars
3284
+ * twelve cents payment" before starting audio generation.
3285
+ */
3286
+ normalize_for_speech?: boolean;
3287
+
3288
+ /**
3289
+ * Whether this agent opts in for signed URLs for public logs and recordings. When
3290
+ * enabled, the generated URLs will include security signatures that restrict
3291
+ * access and automatically expire after 24 hours.
3292
+ */
3293
+ opt_in_signed_url?: boolean;
3294
+
3295
+ /**
3296
+ * Configuration for PII scrubbing from transcripts and recordings.
3297
+ */
3298
+ pii_config?: Agent.PiiConfig;
3299
+
3300
+ /**
3301
+ * Post call analysis data to extract from the call. This data will augment the
3302
+ * pre-defined variables extracted in the call analysis. This will be available
3303
+ * after the call ends.
3304
+ */
3305
+ post_call_analysis_data?: Array<
3306
+ | Agent.StringAnalysisData
3307
+ | Agent.EnumAnalysisData
3308
+ | Agent.BooleanAnalysisData
3309
+ | Agent.NumberAnalysisData
3310
+ > | null;
3311
+
3312
+ /**
3313
+ * The model to use for post call analysis. Default to gpt-4o-mini.
3314
+ */
3315
+ post_call_analysis_model?:
3316
+ | 'gpt-4o'
3317
+ | 'gpt-4o-mini'
3318
+ | 'gpt-4.1'
3319
+ | 'gpt-4.1-mini'
3320
+ | 'gpt-4.1-nano'
3321
+ | 'gpt-5'
3322
+ | 'gpt-5-mini'
3323
+ | 'gpt-5-nano'
3324
+ | 'claude-4.5-sonnet'
3325
+ | 'claude-4.0-sonnet'
3326
+ | 'claude-3.7-sonnet'
3327
+ | 'claude-3.5-haiku'
3328
+ | 'gemini-2.0-flash'
3329
+ | 'gemini-2.0-flash-lite'
3330
+ | 'gemini-2.5-flash'
3331
+ | 'gemini-2.5-flash-lite';
3332
+
3333
+ /**
3334
+ * A list of words / phrases and their pronunciation to be used to guide the audio
3335
+ * synthesize for consistent pronunciation. Currently only supported for English &
3336
+ * 11labs voices. Set to null to remove pronunciation dictionary from this agent.
3337
+ */
3338
+ pronunciation_dictionary?: Array<Agent.PronunciationDictionary> | null;
3339
+
3340
+ /**
3341
+ * If set, controls how many times agent would remind user when user is
3342
+ * unresponsive. Must be a non negative integer. If unset, default value of 1 will
3343
+ * apply (remind once). Set to 0 to disable agent from reminding.
3344
+ */
3345
+ reminder_max_count?: number;
3346
+
3347
+ /**
3348
+ * If set (in milliseconds), will trigger a reminder to the agent to speak if the
3349
+ * user has been silent for the specified duration after some agent speech. Must be
3350
+ * a positive number. If unset, default value of 10000 ms (10 s) will apply.
3351
+ */
3352
+ reminder_trigger_ms?: number;
3353
+
3354
+ /**
3355
+ * The Response Engine to attach to the agent. It is used to generate responses for
3356
+ * the agent. You need to create a Response Engine first before attaching it to an
3357
+ * agent.
3358
+ */
3359
+ response_engine?:
3360
+ | Agent.ResponseEngineRetellLm
3361
+ | Agent.ResponseEngineCustomLm
3362
+ | Agent.ResponseEngineConversationFlow;
3363
+
3364
+ /**
3365
+ * Controls how responsive is the agent. Value ranging from [0,1]. Lower value
3366
+ * means less responsive agent (wait more, respond slower), while higher value
3367
+ * means faster exchanges (respond when it can). If unset, default value 1 will
3368
+ * apply.
3369
+ */
3370
+ responsiveness?: number;
3371
+
3372
+ /**
3373
+ * If set, the phone ringing will last for the specified amount of milliseconds.
3374
+ * This applies for both outbound call ringtime, and call transfer ringtime.
3375
+ * Default to 30000 (30 s). Valid range is [5000, 90000].
3376
+ */
3377
+ ring_duration_ms?: number;
3378
+
3379
+ /**
3380
+ * If set, determines whether speech to text should focus on latency or accuracy.
3381
+ * Default to fast mode.
3382
+ */
3383
+ stt_mode?: 'fast' | 'accurate';
3384
+
3385
+ user_dtmf_options?: Agent.UserDtmfOptions | null;
3386
+
3387
+ /**
3388
+ * If set, determines the vocabulary set to use for transcription. This setting
3389
+ * only applies for English agents, for non English agent, this setting is a no-op.
3390
+ * Default to general.
3391
+ */
3392
+ vocab_specialization?: 'general' | 'medical';
3393
+
3394
+ /**
3395
+ * Unique voice id used for the agent. Find list of available voices and their
3396
+ * preview in Dashboard.
3397
+ */
3398
+ voice_id?: string;
3399
+
3400
+ /**
3401
+ * Optionally set the voice model used for the selected voice. Currently only
3402
+ * elevenlab voices have voice model selections. Set to null to remove voice model
3403
+ * selection, and default ones will apply. Check out the dashboard for details on
3404
+ * each voice model.
3405
+ */
3406
+ voice_model?:
3407
+ | 'eleven_turbo_v2'
3408
+ | 'eleven_flash_v2'
3409
+ | 'eleven_turbo_v2_5'
3410
+ | 'eleven_flash_v2_5'
3411
+ | 'eleven_multilingual_v2'
3412
+ | 'tts-1'
3413
+ | 'gpt-4o-mini-tts'
3414
+ | null;
3415
+
3416
+ /**
3417
+ * Controls speed of voice. Value ranging from [0.5,2]. Lower value means slower
3418
+ * speech, while higher value means faster speech rate. If unset, default value 1
3419
+ * will apply.
3420
+ */
3421
+ voice_speed?: number;
3422
+
3423
+ /**
3424
+ * Controls how stable the voice is. Value ranging from [0,2]. Lower value means
3425
+ * more stable, and higher value means more variant speech generation. Currently
3426
+ * this setting only applies to `11labs` voices. If unset, default value 1 will
3427
+ * apply.
3428
+ */
3429
+ voice_temperature?: number;
3430
+
3431
+ /**
3432
+ * If this option is set, the call will try to detect voicemail in the first 3
3433
+ * minutes of the call. Actions defined (hangup, or leave a message) will be
3434
+ * applied when the voicemail is detected. Set this to null to disable voicemail
3435
+ * detection.
3436
+ */
3437
+ voicemail_option?: Agent.VoicemailOption | null;
3438
+
3439
+ /**
3440
+ * If set, will control the volume of the agent. Value ranging from [0,2]. Lower
3441
+ * value means quieter agent speech, while higher value means louder agent speech.
3442
+ * If unset, default value 1 will apply.
3443
+ */
3444
+ volume?: number;
3445
+
3446
+ /**
3447
+ * The timeout for the webhook in milliseconds. If not set, default value of 10000
3448
+ * will apply.
3449
+ */
3450
+ webhook_timeout_ms?: number;
3451
+
3452
+ /**
3453
+ * The webhook for agent to listen to call events. See what events it would get at
3454
+ * [webhook doc](/features/webhook). If set, will binds webhook events for this
3455
+ * agent to the specified url, and will ignore the account level webhook for this
3456
+ * agent. Set to `null` to remove webhook url from this agent.
3457
+ */
3458
+ webhook_url?: string | null;
3459
+ }
3460
+
3461
+ export namespace Agent {
3462
+ /**
3463
+ * Configuration for PII scrubbing from transcripts and recordings.
3464
+ */
3465
+ export interface PiiConfig {
3466
+ /**
3467
+ * List of PII categories to scrub from transcripts and recordings.
3468
+ */
3469
+ categories: Array<
3470
+ | 'person_name'
3471
+ | 'address'
3472
+ | 'email'
3473
+ | 'phone_number'
3474
+ | 'ssn'
3475
+ | 'passport'
3476
+ | 'driver_license'
3477
+ | 'credit_card'
3478
+ | 'bank_account'
3479
+ | 'password'
3480
+ | 'pin'
3481
+ | 'medical_id'
3482
+ | 'date_of_birth'
3483
+ >;
3484
+
3485
+ /**
3486
+ * The processing mode for PII scrubbing. Currently only post-call is supported.
3487
+ */
3488
+ mode: 'post_call';
3489
+ }
3490
+
3491
+ export interface StringAnalysisData {
3492
+ /**
3493
+ * Description of the variable.
3494
+ */
3495
+ description: string;
3496
+
3497
+ /**
3498
+ * Name of the variable.
3499
+ */
3500
+ name: string;
3501
+
3502
+ /**
3503
+ * Type of the variable to extract.
3504
+ */
3505
+ type: 'string';
3506
+
3507
+ /**
3508
+ * Examples of the variable value to teach model the style and syntax.
3509
+ */
3510
+ examples?: Array<string>;
3511
+ }
3512
+
3513
+ export interface EnumAnalysisData {
3514
+ /**
3515
+ * The possible values of the variable, must be non empty array.
3516
+ */
3517
+ choices: Array<string>;
3518
+
3519
+ /**
3520
+ * Description of the variable.
3521
+ */
3522
+ description: string;
3523
+
3524
+ /**
3525
+ * Name of the variable.
3526
+ */
3527
+ name: string;
3528
+
3529
+ /**
3530
+ * Type of the variable to extract.
3531
+ */
3532
+ type: 'enum';
3533
+ }
3534
+
3535
+ export interface BooleanAnalysisData {
3536
+ /**
3537
+ * Description of the variable.
3538
+ */
3539
+ description: string;
3540
+
3541
+ /**
3542
+ * Name of the variable.
3543
+ */
3544
+ name: string;
3545
+
3546
+ /**
3547
+ * Type of the variable to extract.
3548
+ */
3549
+ type: 'boolean';
3550
+ }
3551
+
3552
+ export interface NumberAnalysisData {
3553
+ /**
3554
+ * Description of the variable.
3555
+ */
3556
+ description: string;
3557
+
3558
+ /**
3559
+ * Name of the variable.
3560
+ */
3561
+ name: string;
3562
+
3563
+ /**
3564
+ * Type of the variable to extract.
3565
+ */
3566
+ type: 'number';
3567
+ }
3568
+
3569
+ export interface PronunciationDictionary {
3570
+ /**
3571
+ * The phonetic alphabet to be used for pronunciation.
3572
+ */
3573
+ alphabet: 'ipa' | 'cmu';
3574
+
3575
+ /**
3576
+ * Pronunciation of the word in the format of a IPA / CMU pronunciation.
3577
+ */
3578
+ phoneme: string;
3579
+
3580
+ /**
3581
+ * The string of word / phrase to be annotated with pronunciation.
3582
+ */
3583
+ word: string;
3584
+ }
3585
+
3586
+ export interface ResponseEngineRetellLm {
3587
+ /**
3588
+ * id of the Retell LLM Response Engine.
3589
+ */
3590
+ llm_id: string;
3591
+
3592
+ /**
3593
+ * type of the Response Engine.
3594
+ */
3595
+ type: 'retell-llm';
3596
+
3597
+ /**
3598
+ * Version of the Retell LLM Response Engine.
3599
+ */
3600
+ version?: number | null;
3601
+ }
3602
+
3603
+ export interface ResponseEngineCustomLm {
3604
+ /**
3605
+ * LLM websocket url of the custom LLM.
3606
+ */
3607
+ llm_websocket_url: string;
3608
+
3609
+ /**
3610
+ * type of the Response Engine.
3611
+ */
3612
+ type: 'custom-llm';
3613
+ }
3614
+
3615
+ export interface ResponseEngineConversationFlow {
3616
+ /**
3617
+ * ID of the Conversation Flow Response Engine.
3618
+ */
3619
+ conversation_flow_id: string;
3620
+
3621
+ /**
3622
+ * type of the Response Engine.
3623
+ */
3624
+ type: 'conversation-flow';
3625
+
3626
+ /**
3627
+ * Version of the Conversation Flow Response Engine.
3628
+ */
3629
+ version?: number | null;
3630
+ }
3631
+
3632
+ export interface UserDtmfOptions {
3633
+ /**
3634
+ * The maximum number of digits allowed in the user's DTMF (Dual-Tone
3635
+ * Multi-Frequency) input per turn. Once this limit is reached, the input is
3636
+ * considered complete and a response will be generated immediately.
3637
+ */
3638
+ digit_limit?: number | null;
3639
+
3640
+ /**
3641
+ * A single key that signals the end of DTMF input. Acceptable values include any
3642
+ * digit (0–9), the pound/hash symbol (#), or the asterisk (\*).
3643
+ */
3644
+ termination_key?: string | null;
3645
+
3646
+ /**
3647
+ * The time (in milliseconds) to wait for user DTMF input before timing out. The
3648
+ * timer resets with each digit received.
3649
+ */
3650
+ timeout_ms?: number;
3651
+ }
3652
+
3653
+ /**
3654
+ * If this option is set, the call will try to detect voicemail in the first 3
3655
+ * minutes of the call. Actions defined (hangup, or leave a message) will be
3656
+ * applied when the voicemail is detected. Set this to null to disable voicemail
3657
+ * detection.
3658
+ */
3659
+ export interface VoicemailOption {
3660
+ action:
3661
+ | VoicemailOption.VoicemailActionPrompt
3662
+ | VoicemailOption.VoicemailActionStaticText
3663
+ | VoicemailOption.VoicemailActionHangup;
3664
+ }
3665
+
3666
+ export namespace VoicemailOption {
3667
+ export interface VoicemailActionPrompt {
3668
+ /**
3669
+ * The prompt used to generate the text to be spoken when the call is detected to
3670
+ * be in voicemail.
3671
+ */
3672
+ text: string;
3673
+
3674
+ type: 'prompt';
3675
+ }
3676
+
3677
+ export interface VoicemailActionStaticText {
3678
+ /**
3679
+ * The text to be spoken when the call is detected to be in voicemail.
3680
+ */
3681
+ text: string;
3682
+
3683
+ type: 'static_text';
3684
+ }
3685
+
3686
+ export interface VoicemailActionHangup {
3687
+ type: 'hangup';
3688
+ }
3689
+ }
3690
+ }
3691
+
3692
+ /**
3693
+ * Override conversation flow configuration settings. Only applicable when using
3694
+ * conversation flow as the response engine. Supported attributes - model_choice,
3695
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
3696
+ * begin_after_user_silence_ms.
3697
+ */
3698
+ export interface ConversationFlow {
3699
+ /**
3700
+ * If set, the AI will begin the conversation after waiting for the user for the
3701
+ * duration (in milliseconds) specified by this attribute. This only applies if the
3702
+ * agent is configured to wait for the user to speak first. If not set, the agent
3703
+ * will wait indefinitely for the user to speak.
3704
+ */
3705
+ begin_after_user_silence_ms?: number | null;
3706
+
3707
+ /**
3708
+ * Knowledge base configuration for RAG retrieval.
3709
+ */
3710
+ kb_config?: ConversationFlow.KBConfig;
3711
+
3712
+ /**
3713
+ * Knowledge base IDs for RAG (Retrieval-Augmented Generation).
3714
+ */
3715
+ knowledge_base_ids?: Array<string> | null;
3716
+
3717
+ /**
3718
+ * The model choice for the conversation flow.
3719
+ */
3720
+ model_choice?: ConversationFlow.ModelChoice;
3721
+
3722
+ /**
3723
+ * Controls the randomness of the model's responses. Lower values make responses
3724
+ * more deterministic.
3725
+ */
3726
+ model_temperature?: number | null;
3727
+
3728
+ /**
3729
+ * Who starts the conversation - user or agent.
3730
+ */
3731
+ start_speaker?: 'user' | 'agent';
3732
+ }
3733
+
3734
+ export namespace ConversationFlow {
3735
+ /**
3736
+ * Knowledge base configuration for RAG retrieval.
3737
+ */
3738
+ export interface KBConfig {
3739
+ /**
3740
+ * Similarity threshold for filtering search results
3741
+ */
3742
+ filter_score?: number;
3743
+
3744
+ /**
3745
+ * Max number of knowledge base chunks to retrieve
3746
+ */
3747
+ top_k?: number;
3748
+ }
3749
+
3750
+ /**
3751
+ * The model choice for the conversation flow.
3752
+ */
3753
+ export interface ModelChoice {
3754
+ /**
3755
+ * The LLM model to use
3756
+ */
3757
+ model:
3758
+ | 'gpt-5'
3759
+ | 'gpt-5-mini'
3760
+ | 'gpt-5-nano'
3761
+ | 'gpt-4o'
3762
+ | 'gpt-4o-mini'
3763
+ | 'gpt-4.1'
3764
+ | 'gpt-4.1-mini'
3765
+ | 'gpt-4.1-nano'
3766
+ | 'claude-3.7-sonnet'
3767
+ | 'claude-3.5-haiku'
3768
+ | 'gemini-2.0-flash'
3769
+ | 'gemini-2.0-flash-lite'
3770
+ | 'gemini-2.5-flash'
3771
+ | 'gemini-2.5-flash-lite';
3772
+
3773
+ /**
3774
+ * Type of model choice
3775
+ */
3776
+ type: 'cascading';
3777
+
3778
+ /**
3779
+ * Whether to use high priority pool with more dedicated resource, default false
3780
+ */
3781
+ high_priority?: boolean;
3782
+ }
3783
+ }
3784
+
3785
+ /**
3786
+ * Override Retell LLM configuration settings. Only applicable when using Retell
3787
+ * LLM as the response engine. Supported attributes - model, s2s_model,
3788
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
3789
+ * begin_after_user_silence_ms, begin_message.
3790
+ */
3791
+ export interface RetellLlm {
3792
+ /**
3793
+ * If set, the AI will begin the conversation after waiting for the user for the
3794
+ * duration (in milliseconds) specified by this attribute. This only applies if the
3795
+ * agent is configured to wait for the user to speak first. If not set, the agent
3796
+ * will wait indefinitely for the user to speak.
3797
+ */
3798
+ begin_after_user_silence_ms?: number | null;
3799
+
3800
+ /**
3801
+ * First utterance said by the agent in the call. If not set, LLM will dynamically
3802
+ * generate a message. If set to "", agent will wait for user to speak first.
3803
+ */
3804
+ begin_message?: string | null;
3805
+
3806
+ /**
3807
+ * Knowledge base configuration for RAG retrieval.
3808
+ */
3809
+ kb_config?: RetellLlm.KBConfig | null;
3810
+
3811
+ /**
3812
+ * A list of knowledge base ids to use for this resource.
3813
+ */
3814
+ knowledge_base_ids?: Array<string> | null;
3815
+
3816
+ /**
3817
+ * Select the underlying text LLM. If not set, would default to gpt-4.1.
3818
+ */
3819
+ model?:
3820
+ | 'gpt-5'
3821
+ | 'gpt-5-mini'
3822
+ | 'gpt-5-nano'
3823
+ | 'gpt-4o'
3824
+ | 'gpt-4o-mini'
3825
+ | 'gpt-4.1'
3826
+ | 'gpt-4.1-mini'
3827
+ | 'gpt-4.1-nano'
3828
+ | 'claude-3.7-sonnet'
3829
+ | 'claude-3.5-haiku'
3830
+ | 'gemini-2.0-flash'
3831
+ | 'gemini-2.0-flash-lite'
3832
+ | 'gemini-2.5-flash'
3833
+ | 'gemini-2.5-flash-lite'
3834
+ | null;
3835
+
3836
+ /**
3837
+ * If set, will control the randomness of the response. Value ranging from [0,1].
3838
+ * Lower value means more deterministic, while higher value means more random. If
3839
+ * unset, default value 0 will apply. Note that for tool calling, a lower value is
3840
+ * recommended.
3841
+ */
3842
+ model_temperature?: number;
3843
+
3844
+ /**
3845
+ * Select the underlying speech to speech model. Can only set this or model, not
3846
+ * both.
3847
+ */
3848
+ s2s_model?: 'gpt-4o-realtime' | 'gpt-4o-mini-realtime' | 'gpt-realtime' | null;
3849
+
3850
+ /**
3851
+ * The speaker who starts the conversation. Required. Must be either 'user' or
3852
+ * 'agent'.
3853
+ */
3854
+ start_speaker?: 'user' | 'agent';
3855
+ }
3856
+
3857
+ export namespace RetellLlm {
3858
+ /**
3859
+ * Knowledge base configuration for RAG retrieval.
3860
+ */
3861
+ export interface KBConfig {
3862
+ /**
3863
+ * Similarity threshold for filtering search results
3864
+ */
3865
+ filter_score?: number;
3866
+
3867
+ /**
3868
+ * Max number of knowledge base chunks to retrieve
3869
+ */
3870
+ top_k?: number;
3871
+ }
3872
+ }
3873
+ }
3874
+ }
3875
+
3876
+ export interface CallRegisterPhoneCallParams {
3877
+ /**
3878
+ * The agent to use for the call.
3879
+ */
3880
+ agent_id: string;
3881
+
3882
+ /**
3883
+ * Override configuration for agent, retell LLM, or conversation flow settings for
3884
+ * a specific call.
3885
+ */
3886
+ agent_override?: CallRegisterPhoneCallParams.AgentOverride;
3887
+
3888
+ /**
3889
+ * The version of the agent to use for the call.
3890
+ */
3891
+ agent_version?: number;
3892
+
3893
+ /**
3894
+ * Direction of the phone call. Stored for tracking purpose.
3895
+ */
3896
+ direction?: 'inbound' | 'outbound';
3897
+
3898
+ /**
3899
+ * The number you own in E.164 format. Stored for tracking purpose.
3900
+ */
3901
+ from_number?: string;
3902
+
3903
+ /**
3904
+ * An arbitrary object for storage purpose only. You can put anything here like
3905
+ * your internal customer id associated with the call. Not used for processing. You
3906
+ * can later get this field from the call object.
3907
+ */
3908
+ metadata?: unknown;
3909
+
3910
+ /**
3911
+ * Add optional dynamic variables in key value pairs of string that injects into
3912
+ * your Response Engine prompt and tool description. Only applicable for Response
3913
+ * Engine.
3914
+ */
3915
+ retell_llm_dynamic_variables?: { [key: string]: unknown };
3916
+
3917
+ /**
3918
+ * The number you want to call, in E.164 format. Stored for tracking purpose.
3919
+ */
3920
+ to_number?: string;
3921
+ }
3922
+
3923
+ export namespace CallRegisterPhoneCallParams {
3924
+ /**
3925
+ * Override configuration for agent, retell LLM, or conversation flow settings for
3926
+ * a specific call.
3927
+ */
3928
+ export interface AgentOverride {
3929
+ /**
3930
+ * Override agent configuration settings. Any properties specified here will
3931
+ * override the base agent configuration for this call.
3932
+ */
3933
+ agent?: AgentOverride.Agent;
3934
+
3935
+ /**
3936
+ * Override conversation flow configuration settings. Only applicable when using
3937
+ * conversation flow as the response engine. Supported attributes - model_choice,
3938
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
3939
+ * begin_after_user_silence_ms.
3940
+ */
3941
+ conversation_flow?: AgentOverride.ConversationFlow;
3942
+
3943
+ /**
3944
+ * Override Retell LLM configuration settings. Only applicable when using Retell
3945
+ * LLM as the response engine. Supported attributes - model, s2s_model,
3946
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
3947
+ * begin_after_user_silence_ms, begin_message.
3948
+ */
3949
+ retell_llm?: AgentOverride.RetellLlm;
3950
+ }
3951
+
3952
+ export namespace AgentOverride {
3953
+ /**
3954
+ * Override agent configuration settings. Any properties specified here will
3955
+ * override the base agent configuration for this call.
3956
+ */
3957
+ export interface Agent {
3958
+ /**
3959
+ * The name of the agent. Only used for your own reference.
3960
+ */
3961
+ agent_name?: string | null;
3962
+
3963
+ /**
3964
+ * If set to true, DTMF input will be accepted and processed. If false, any DTMF
3965
+ * input will be ignored. Default to true.
3966
+ */
3967
+ allow_user_dtmf?: boolean;
3968
+
3969
+ /**
3970
+ * If set, will add ambient environment sound to the call to make experience more
3971
+ * realistic. Currently supports the following options:
3972
+ *
3973
+ * - `coffee-shop`: Coffee shop ambience with people chatting in background.
3974
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/coffee-shop.wav)
3975
+ *
3976
+ * - `convention-hall`: Convention hall ambience, with some echo and people
3977
+ * chatting in background.
3978
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/convention-hall.wav)
3979
+ *
3980
+ * - `summer-outdoor`: Summer outdoor ambience with cicada chirping.
3981
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/summer-outdoor.wav)
3982
+ *
3983
+ * - `mountain-outdoor`: Mountain outdoor ambience with birds singing.
3984
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/mountain-outdoor.wav)
3985
+ *
3986
+ * - `static-noise`: Constant static noise.
3987
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/static-noise.wav)
3988
+ *
3989
+ * - `call-center`: Call center work noise.
3990
+ * [Listen to Ambience](https://retell-utils-public.s3.us-west-2.amazonaws.com/call-center.wav)
3991
+ *
3992
+ * Set to `null` to remove ambient sound from this agent.
3993
+ */
3994
+ ambient_sound?:
3995
+ | 'coffee-shop'
3996
+ | 'convention-hall'
3997
+ | 'summer-outdoor'
3998
+ | 'mountain-outdoor'
3999
+ | 'static-noise'
4000
+ | 'call-center'
4001
+ | null;
4002
+
4003
+ /**
4004
+ * If set, will control the volume of the ambient sound. Value ranging from [0,2].
4005
+ * Lower value means quieter ambient sound, while higher value means louder ambient
4006
+ * sound. If unset, default value 1 will apply.
4007
+ */
4008
+ ambient_sound_volume?: number;
4009
+
4010
+ /**
4011
+ * Only applicable when enable_backchannel is true. Controls how often the agent
4012
+ * would backchannel when a backchannel is possible. Value ranging from [0,1].
4013
+ * Lower value means less frequent backchannel, while higher value means more
4014
+ * frequent backchannel. If unset, default value 0.8 will apply.
4015
+ */
4016
+ backchannel_frequency?: number;
4017
+
4018
+ /**
4019
+ * Only applicable when enable_backchannel is true. A list of words that the agent
4020
+ * would use as backchannel. If not set, default backchannel words will apply.
4021
+ * Check out
4022
+ * [backchannel default words](/agent/interaction-configuration#backchannel) for
4023
+ * more details. Note that certain voices do not work too well with certain words,
4024
+ * so it's recommended to experiment before adding any words.
4025
+ */
4026
+ backchannel_words?: Array<string> | null;
4027
+
4028
+ /**
4029
+ * If set, will delay the first message by the specified amount of milliseconds, so
4030
+ * that it gives user more time to prepare to take the call. Valid range is [0,
4031
+ * 5000]. If not set or set to 0, agent will speak immediately. Only applicable
4032
+ * when agent speaks first.
4033
+ */
4034
+ begin_message_delay_ms?: number;
4035
+
4036
+ /**
4037
+ * Provide a customized list of keywords to bias the transcriber model, so that
4038
+ * these words are more likely to get transcribed. Commonly used for names, brands,
4039
+ * street, etc.
4040
+ */
4041
+ boosted_keywords?: Array<string> | null;
4042
+
4043
+ /**
4044
+ * Granular setting to manage how Retell stores sensitive data (transcripts,
4045
+ * recordings, logs, etc.). This replaces the deprecated
4046
+ * `opt_out_sensitive_data_storage` field.
4047
+ *
4048
+ * - `everything`: Store all data including transcripts, recordings, and logs.
4049
+ * - `everything_except_pii`: Store data without PII when PII is detected.
4050
+ * - `basic_attributes_only`: Store only basic attributes; no
4051
+ * transcripts/recordings/logs. If not set, default value of "everything" will
4052
+ * apply.
4053
+ */
4054
+ data_storage_setting?: 'everything' | 'everything_except_pii' | 'basic_attributes_only';
4055
+
4056
+ /**
4057
+ * If set, determines what denoising mode to use. Default to noise-cancellation.
4058
+ */
4059
+ denoising_mode?: 'noise-cancellation' | 'noise-and-background-speech-cancellation';
4060
+
4061
+ /**
4062
+ * Controls whether the agent would backchannel (agent interjects the speaker with
4063
+ * phrases like "yeah", "uh-huh" to signify interest and engagement). Backchannel
4064
+ * when enabled tends to show up more in longer user utterances. If not set, agent
4065
+ * will not backchannel.
4066
+ */
4067
+ enable_backchannel?: boolean;
4068
+
4069
+ /**
4070
+ * If users stay silent for a period after agent speech, end the call. The minimum
4071
+ * value allowed is 10,000 ms (10 s). By default, this is set to 600000 (10 min).
4072
+ */
4073
+ end_call_after_silence_ms?: number;
4074
+
4075
+ /**
4076
+ * When TTS provider for the selected voice is experiencing outages, we would use
4077
+ * fallback voices listed here for the agent. Voice id and the fallback voice ids
4078
+ * must be from different TTS providers. The system would go through the list in
4079
+ * order, if the first one in the list is also having outage, it would use the next
4080
+ * one. Set to null to remove voice fallback for the agent.
4081
+ */
4082
+ fallback_voice_ids?: Array<string> | null;
4083
+
4084
+ /**
4085
+ * Controls how sensitive the agent is to user interruptions. Value ranging from
4086
+ * [0,1]. Lower value means it will take longer / more words for user to interrupt
4087
+ * agent, while higher value means it's easier for user to interrupt agent. If
4088
+ * unset, default value 1 will apply. When this is set to 0, agent would never be
4089
+ * interrupted.
4090
+ */
4091
+ interruption_sensitivity?: number;
4092
+
4093
+ /**
4094
+ * Specifies what language (and dialect) the speech recognition will operate in.
4095
+ * For instance, selecting `en-GB` optimizes speech recognition for British
4096
+ * English. If unset, will use default value `en-US`. Select `multi` for
4097
+ * multilingual support, currently this supports Spanish and English.
4098
+ */
4099
+ language?:
4100
+ | 'en-US'
4101
+ | 'en-IN'
4102
+ | 'en-GB'
4103
+ | 'en-AU'
4104
+ | 'en-NZ'
4105
+ | 'de-DE'
4106
+ | 'es-ES'
4107
+ | 'es-419'
4108
+ | 'hi-IN'
4109
+ | 'fr-FR'
4110
+ | 'fr-CA'
4111
+ | 'ja-JP'
4112
+ | 'pt-PT'
4113
+ | 'pt-BR'
4114
+ | 'zh-CN'
4115
+ | 'ru-RU'
4116
+ | 'it-IT'
4117
+ | 'ko-KR'
4118
+ | 'nl-NL'
4119
+ | 'nl-BE'
4120
+ | 'pl-PL'
4121
+ | 'tr-TR'
4122
+ | 'th-TH'
4123
+ | 'vi-VN'
4124
+ | 'ro-RO'
4125
+ | 'bg-BG'
4126
+ | 'ca-ES'
4127
+ | 'da-DK'
4128
+ | 'fi-FI'
4129
+ | 'el-GR'
4130
+ | 'hu-HU'
4131
+ | 'id-ID'
4132
+ | 'no-NO'
4133
+ | 'sk-SK'
4134
+ | 'sv-SE'
4135
+ | 'multi';
4136
+
4137
+ /**
4138
+ * Maximum allowed length for the call, will force end the call if reached. The
4139
+ * minimum value allowed is 60,000 ms (1 min), and maximum value allowed is
4140
+ * 7,200,000 (2 hours). By default, this is set to 3,600,000 (1 hour).
4141
+ */
4142
+ max_call_duration_ms?: number;
4143
+
4144
+ /**
4145
+ * If set to true, will normalize the some part of text (number, currency, date,
4146
+ * etc) to spoken to its spoken form for more consistent speech synthesis
4147
+ * (sometimes the voice synthesize system itself might read these wrong with the
4148
+ * raw text). For example, it will convert "Call my number 2137112342 on Jul 5th,
4149
+ * 2024 for the $24.12 payment" to "Call my number two one three seven one one two
4150
+ * three four two on july fifth, twenty twenty four for the twenty four dollars
4151
+ * twelve cents payment" before starting audio generation.
4152
+ */
4153
+ normalize_for_speech?: boolean;
4154
+
4155
+ /**
4156
+ * Whether this agent opts in for signed URLs for public logs and recordings. When
4157
+ * enabled, the generated URLs will include security signatures that restrict
4158
+ * access and automatically expire after 24 hours.
4159
+ */
4160
+ opt_in_signed_url?: boolean;
4161
+
4162
+ /**
4163
+ * Configuration for PII scrubbing from transcripts and recordings.
4164
+ */
4165
+ pii_config?: Agent.PiiConfig;
4166
+
4167
+ /**
4168
+ * Post call analysis data to extract from the call. This data will augment the
4169
+ * pre-defined variables extracted in the call analysis. This will be available
4170
+ * after the call ends.
4171
+ */
4172
+ post_call_analysis_data?: Array<
4173
+ | Agent.StringAnalysisData
4174
+ | Agent.EnumAnalysisData
4175
+ | Agent.BooleanAnalysisData
4176
+ | Agent.NumberAnalysisData
4177
+ > | null;
4178
+
4179
+ /**
4180
+ * The model to use for post call analysis. Default to gpt-4o-mini.
4181
+ */
4182
+ post_call_analysis_model?:
4183
+ | 'gpt-4o'
4184
+ | 'gpt-4o-mini'
4185
+ | 'gpt-4.1'
4186
+ | 'gpt-4.1-mini'
4187
+ | 'gpt-4.1-nano'
4188
+ | 'gpt-5'
4189
+ | 'gpt-5-mini'
4190
+ | 'gpt-5-nano'
4191
+ | 'claude-4.5-sonnet'
4192
+ | 'claude-4.0-sonnet'
4193
+ | 'claude-3.7-sonnet'
4194
+ | 'claude-3.5-haiku'
4195
+ | 'gemini-2.0-flash'
4196
+ | 'gemini-2.0-flash-lite'
4197
+ | 'gemini-2.5-flash'
4198
+ | 'gemini-2.5-flash-lite';
4199
+
4200
+ /**
4201
+ * A list of words / phrases and their pronunciation to be used to guide the audio
4202
+ * synthesize for consistent pronunciation. Currently only supported for English &
4203
+ * 11labs voices. Set to null to remove pronunciation dictionary from this agent.
4204
+ */
4205
+ pronunciation_dictionary?: Array<Agent.PronunciationDictionary> | null;
4206
+
4207
+ /**
4208
+ * If set, controls how many times agent would remind user when user is
4209
+ * unresponsive. Must be a non negative integer. If unset, default value of 1 will
4210
+ * apply (remind once). Set to 0 to disable agent from reminding.
4211
+ */
4212
+ reminder_max_count?: number;
4213
+
4214
+ /**
4215
+ * If set (in milliseconds), will trigger a reminder to the agent to speak if the
4216
+ * user has been silent for the specified duration after some agent speech. Must be
4217
+ * a positive number. If unset, default value of 10000 ms (10 s) will apply.
4218
+ */
4219
+ reminder_trigger_ms?: number;
4220
+
4221
+ /**
4222
+ * The Response Engine to attach to the agent. It is used to generate responses for
4223
+ * the agent. You need to create a Response Engine first before attaching it to an
4224
+ * agent.
4225
+ */
4226
+ response_engine?:
4227
+ | Agent.ResponseEngineRetellLm
4228
+ | Agent.ResponseEngineCustomLm
4229
+ | Agent.ResponseEngineConversationFlow;
4230
+
4231
+ /**
4232
+ * Controls how responsive is the agent. Value ranging from [0,1]. Lower value
4233
+ * means less responsive agent (wait more, respond slower), while higher value
4234
+ * means faster exchanges (respond when it can). If unset, default value 1 will
4235
+ * apply.
4236
+ */
4237
+ responsiveness?: number;
4238
+
4239
+ /**
4240
+ * If set, the phone ringing will last for the specified amount of milliseconds.
4241
+ * This applies for both outbound call ringtime, and call transfer ringtime.
4242
+ * Default to 30000 (30 s). Valid range is [5000, 90000].
4243
+ */
4244
+ ring_duration_ms?: number;
4245
+
4246
+ /**
4247
+ * If set, determines whether speech to text should focus on latency or accuracy.
4248
+ * Default to fast mode.
4249
+ */
4250
+ stt_mode?: 'fast' | 'accurate';
4251
+
4252
+ user_dtmf_options?: Agent.UserDtmfOptions | null;
4253
+
4254
+ /**
4255
+ * If set, determines the vocabulary set to use for transcription. This setting
4256
+ * only applies for English agents, for non English agent, this setting is a no-op.
4257
+ * Default to general.
4258
+ */
4259
+ vocab_specialization?: 'general' | 'medical';
4260
+
4261
+ /**
4262
+ * Unique voice id used for the agent. Find list of available voices and their
4263
+ * preview in Dashboard.
4264
+ */
4265
+ voice_id?: string;
4266
+
4267
+ /**
4268
+ * Optionally set the voice model used for the selected voice. Currently only
4269
+ * elevenlab voices have voice model selections. Set to null to remove voice model
4270
+ * selection, and default ones will apply. Check out the dashboard for details on
4271
+ * each voice model.
4272
+ */
4273
+ voice_model?:
4274
+ | 'eleven_turbo_v2'
4275
+ | 'eleven_flash_v2'
4276
+ | 'eleven_turbo_v2_5'
4277
+ | 'eleven_flash_v2_5'
4278
+ | 'eleven_multilingual_v2'
4279
+ | 'tts-1'
4280
+ | 'gpt-4o-mini-tts'
4281
+ | null;
4282
+
4283
+ /**
4284
+ * Controls speed of voice. Value ranging from [0.5,2]. Lower value means slower
4285
+ * speech, while higher value means faster speech rate. If unset, default value 1
4286
+ * will apply.
4287
+ */
4288
+ voice_speed?: number;
4289
+
4290
+ /**
4291
+ * Controls how stable the voice is. Value ranging from [0,2]. Lower value means
4292
+ * more stable, and higher value means more variant speech generation. Currently
4293
+ * this setting only applies to `11labs` voices. If unset, default value 1 will
4294
+ * apply.
4295
+ */
4296
+ voice_temperature?: number;
4297
+
4298
+ /**
4299
+ * If this option is set, the call will try to detect voicemail in the first 3
4300
+ * minutes of the call. Actions defined (hangup, or leave a message) will be
4301
+ * applied when the voicemail is detected. Set this to null to disable voicemail
4302
+ * detection.
4303
+ */
4304
+ voicemail_option?: Agent.VoicemailOption | null;
4305
+
4306
+ /**
4307
+ * If set, will control the volume of the agent. Value ranging from [0,2]. Lower
4308
+ * value means quieter agent speech, while higher value means louder agent speech.
4309
+ * If unset, default value 1 will apply.
4310
+ */
4311
+ volume?: number;
4312
+
4313
+ /**
4314
+ * The timeout for the webhook in milliseconds. If not set, default value of 10000
4315
+ * will apply.
4316
+ */
4317
+ webhook_timeout_ms?: number;
4318
+
4319
+ /**
4320
+ * The webhook for agent to listen to call events. See what events it would get at
4321
+ * [webhook doc](/features/webhook). If set, will binds webhook events for this
4322
+ * agent to the specified url, and will ignore the account level webhook for this
4323
+ * agent. Set to `null` to remove webhook url from this agent.
4324
+ */
4325
+ webhook_url?: string | null;
4326
+ }
4327
+
4328
+ export namespace Agent {
4329
+ /**
4330
+ * Configuration for PII scrubbing from transcripts and recordings.
4331
+ */
4332
+ export interface PiiConfig {
4333
+ /**
4334
+ * List of PII categories to scrub from transcripts and recordings.
4335
+ */
4336
+ categories: Array<
4337
+ | 'person_name'
4338
+ | 'address'
4339
+ | 'email'
4340
+ | 'phone_number'
4341
+ | 'ssn'
4342
+ | 'passport'
4343
+ | 'driver_license'
4344
+ | 'credit_card'
4345
+ | 'bank_account'
4346
+ | 'password'
4347
+ | 'pin'
4348
+ | 'medical_id'
4349
+ | 'date_of_birth'
4350
+ >;
4351
+
4352
+ /**
4353
+ * The processing mode for PII scrubbing. Currently only post-call is supported.
4354
+ */
4355
+ mode: 'post_call';
4356
+ }
4357
+
4358
+ export interface StringAnalysisData {
4359
+ /**
4360
+ * Description of the variable.
4361
+ */
4362
+ description: string;
4363
+
4364
+ /**
4365
+ * Name of the variable.
4366
+ */
4367
+ name: string;
4368
+
4369
+ /**
4370
+ * Type of the variable to extract.
4371
+ */
4372
+ type: 'string';
4373
+
4374
+ /**
4375
+ * Examples of the variable value to teach model the style and syntax.
4376
+ */
4377
+ examples?: Array<string>;
4378
+ }
4379
+
4380
+ export interface EnumAnalysisData {
4381
+ /**
4382
+ * The possible values of the variable, must be non empty array.
4383
+ */
4384
+ choices: Array<string>;
4385
+
4386
+ /**
4387
+ * Description of the variable.
4388
+ */
4389
+ description: string;
4390
+
4391
+ /**
4392
+ * Name of the variable.
4393
+ */
4394
+ name: string;
4395
+
4396
+ /**
4397
+ * Type of the variable to extract.
4398
+ */
4399
+ type: 'enum';
4400
+ }
4401
+
4402
+ export interface BooleanAnalysisData {
4403
+ /**
4404
+ * Description of the variable.
4405
+ */
4406
+ description: string;
4407
+
4408
+ /**
4409
+ * Name of the variable.
4410
+ */
4411
+ name: string;
4412
+
4413
+ /**
4414
+ * Type of the variable to extract.
4415
+ */
4416
+ type: 'boolean';
4417
+ }
4418
+
4419
+ export interface NumberAnalysisData {
4420
+ /**
4421
+ * Description of the variable.
4422
+ */
4423
+ description: string;
4424
+
4425
+ /**
4426
+ * Name of the variable.
4427
+ */
4428
+ name: string;
4429
+
4430
+ /**
4431
+ * Type of the variable to extract.
4432
+ */
4433
+ type: 'number';
4434
+ }
4435
+
4436
+ export interface PronunciationDictionary {
4437
+ /**
4438
+ * The phonetic alphabet to be used for pronunciation.
4439
+ */
4440
+ alphabet: 'ipa' | 'cmu';
4441
+
4442
+ /**
4443
+ * Pronunciation of the word in the format of a IPA / CMU pronunciation.
4444
+ */
4445
+ phoneme: string;
4446
+
4447
+ /**
4448
+ * The string of word / phrase to be annotated with pronunciation.
4449
+ */
4450
+ word: string;
4451
+ }
4452
+
4453
+ export interface ResponseEngineRetellLm {
4454
+ /**
4455
+ * id of the Retell LLM Response Engine.
4456
+ */
4457
+ llm_id: string;
4458
+
4459
+ /**
4460
+ * type of the Response Engine.
4461
+ */
4462
+ type: 'retell-llm';
4463
+
4464
+ /**
4465
+ * Version of the Retell LLM Response Engine.
4466
+ */
4467
+ version?: number | null;
4468
+ }
4469
+
4470
+ export interface ResponseEngineCustomLm {
4471
+ /**
4472
+ * LLM websocket url of the custom LLM.
4473
+ */
4474
+ llm_websocket_url: string;
4475
+
4476
+ /**
4477
+ * type of the Response Engine.
4478
+ */
4479
+ type: 'custom-llm';
4480
+ }
4481
+
4482
+ export interface ResponseEngineConversationFlow {
4483
+ /**
4484
+ * ID of the Conversation Flow Response Engine.
4485
+ */
4486
+ conversation_flow_id: string;
4487
+
4488
+ /**
4489
+ * type of the Response Engine.
4490
+ */
4491
+ type: 'conversation-flow';
4492
+
4493
+ /**
4494
+ * Version of the Conversation Flow Response Engine.
4495
+ */
4496
+ version?: number | null;
4497
+ }
4498
+
4499
+ export interface UserDtmfOptions {
4500
+ /**
4501
+ * The maximum number of digits allowed in the user's DTMF (Dual-Tone
4502
+ * Multi-Frequency) input per turn. Once this limit is reached, the input is
4503
+ * considered complete and a response will be generated immediately.
4504
+ */
4505
+ digit_limit?: number | null;
4506
+
4507
+ /**
4508
+ * A single key that signals the end of DTMF input. Acceptable values include any
4509
+ * digit (0–9), the pound/hash symbol (#), or the asterisk (\*).
4510
+ */
4511
+ termination_key?: string | null;
4512
+
4513
+ /**
4514
+ * The time (in milliseconds) to wait for user DTMF input before timing out. The
4515
+ * timer resets with each digit received.
4516
+ */
4517
+ timeout_ms?: number;
4518
+ }
4519
+
4520
+ /**
4521
+ * If this option is set, the call will try to detect voicemail in the first 3
4522
+ * minutes of the call. Actions defined (hangup, or leave a message) will be
4523
+ * applied when the voicemail is detected. Set this to null to disable voicemail
4524
+ * detection.
4525
+ */
4526
+ export interface VoicemailOption {
4527
+ action:
4528
+ | VoicemailOption.VoicemailActionPrompt
4529
+ | VoicemailOption.VoicemailActionStaticText
4530
+ | VoicemailOption.VoicemailActionHangup;
4531
+ }
4532
+
4533
+ export namespace VoicemailOption {
4534
+ export interface VoicemailActionPrompt {
4535
+ /**
4536
+ * The prompt used to generate the text to be spoken when the call is detected to
4537
+ * be in voicemail.
4538
+ */
4539
+ text: string;
4540
+
4541
+ type: 'prompt';
4542
+ }
4543
+
4544
+ export interface VoicemailActionStaticText {
4545
+ /**
4546
+ * The text to be spoken when the call is detected to be in voicemail.
4547
+ */
4548
+ text: string;
4549
+
4550
+ type: 'static_text';
4551
+ }
4552
+
4553
+ export interface VoicemailActionHangup {
4554
+ type: 'hangup';
4555
+ }
4556
+ }
4557
+ }
4558
+
4559
+ /**
4560
+ * Override conversation flow configuration settings. Only applicable when using
4561
+ * conversation flow as the response engine. Supported attributes - model_choice,
4562
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
4563
+ * begin_after_user_silence_ms.
4564
+ */
4565
+ export interface ConversationFlow {
4566
+ /**
4567
+ * If set, the AI will begin the conversation after waiting for the user for the
4568
+ * duration (in milliseconds) specified by this attribute. This only applies if the
4569
+ * agent is configured to wait for the user to speak first. If not set, the agent
4570
+ * will wait indefinitely for the user to speak.
4571
+ */
4572
+ begin_after_user_silence_ms?: number | null;
4573
+
4574
+ /**
4575
+ * Knowledge base configuration for RAG retrieval.
4576
+ */
4577
+ kb_config?: ConversationFlow.KBConfig;
4578
+
4579
+ /**
4580
+ * Knowledge base IDs for RAG (Retrieval-Augmented Generation).
4581
+ */
4582
+ knowledge_base_ids?: Array<string> | null;
4583
+
4584
+ /**
4585
+ * The model choice for the conversation flow.
4586
+ */
4587
+ model_choice?: ConversationFlow.ModelChoice;
4588
+
4589
+ /**
4590
+ * Controls the randomness of the model's responses. Lower values make responses
4591
+ * more deterministic.
4592
+ */
4593
+ model_temperature?: number | null;
4594
+
4595
+ /**
4596
+ * Who starts the conversation - user or agent.
4597
+ */
4598
+ start_speaker?: 'user' | 'agent';
4599
+ }
4600
+
4601
+ export namespace ConversationFlow {
4602
+ /**
4603
+ * Knowledge base configuration for RAG retrieval.
4604
+ */
4605
+ export interface KBConfig {
4606
+ /**
4607
+ * Similarity threshold for filtering search results
4608
+ */
4609
+ filter_score?: number;
4610
+
4611
+ /**
4612
+ * Max number of knowledge base chunks to retrieve
4613
+ */
4614
+ top_k?: number;
4615
+ }
4616
+
4617
+ /**
4618
+ * The model choice for the conversation flow.
4619
+ */
4620
+ export interface ModelChoice {
4621
+ /**
4622
+ * The LLM model to use
4623
+ */
4624
+ model:
4625
+ | 'gpt-5'
4626
+ | 'gpt-5-mini'
4627
+ | 'gpt-5-nano'
4628
+ | 'gpt-4o'
4629
+ | 'gpt-4o-mini'
4630
+ | 'gpt-4.1'
4631
+ | 'gpt-4.1-mini'
4632
+ | 'gpt-4.1-nano'
4633
+ | 'claude-3.7-sonnet'
4634
+ | 'claude-3.5-haiku'
4635
+ | 'gemini-2.0-flash'
4636
+ | 'gemini-2.0-flash-lite'
4637
+ | 'gemini-2.5-flash'
4638
+ | 'gemini-2.5-flash-lite';
4639
+
4640
+ /**
4641
+ * Type of model choice
4642
+ */
4643
+ type: 'cascading';
4644
+
4645
+ /**
4646
+ * Whether to use high priority pool with more dedicated resource, default false
4647
+ */
4648
+ high_priority?: boolean;
4649
+ }
4650
+ }
4651
+
4652
+ /**
4653
+ * Override Retell LLM configuration settings. Only applicable when using Retell
4654
+ * LLM as the response engine. Supported attributes - model, s2s_model,
4655
+ * model_temperature, knowledge_base_ids, kb_config, start_speaker,
4656
+ * begin_after_user_silence_ms, begin_message.
4657
+ */
4658
+ export interface RetellLlm {
4659
+ /**
4660
+ * If set, the AI will begin the conversation after waiting for the user for the
4661
+ * duration (in milliseconds) specified by this attribute. This only applies if the
4662
+ * agent is configured to wait for the user to speak first. If not set, the agent
4663
+ * will wait indefinitely for the user to speak.
4664
+ */
4665
+ begin_after_user_silence_ms?: number | null;
4666
+
4667
+ /**
4668
+ * First utterance said by the agent in the call. If not set, LLM will dynamically
4669
+ * generate a message. If set to "", agent will wait for user to speak first.
4670
+ */
4671
+ begin_message?: string | null;
4672
+
4673
+ /**
4674
+ * Knowledge base configuration for RAG retrieval.
4675
+ */
4676
+ kb_config?: RetellLlm.KBConfig | null;
4677
+
4678
+ /**
4679
+ * A list of knowledge base ids to use for this resource.
4680
+ */
4681
+ knowledge_base_ids?: Array<string> | null;
4682
+
4683
+ /**
4684
+ * Select the underlying text LLM. If not set, would default to gpt-4.1.
4685
+ */
4686
+ model?:
4687
+ | 'gpt-5'
4688
+ | 'gpt-5-mini'
4689
+ | 'gpt-5-nano'
4690
+ | 'gpt-4o'
4691
+ | 'gpt-4o-mini'
4692
+ | 'gpt-4.1'
4693
+ | 'gpt-4.1-mini'
4694
+ | 'gpt-4.1-nano'
4695
+ | 'claude-3.7-sonnet'
4696
+ | 'claude-3.5-haiku'
4697
+ | 'gemini-2.0-flash'
4698
+ | 'gemini-2.0-flash-lite'
4699
+ | 'gemini-2.5-flash'
4700
+ | 'gemini-2.5-flash-lite'
4701
+ | null;
4702
+
4703
+ /**
4704
+ * If set, will control the randomness of the response. Value ranging from [0,1].
4705
+ * Lower value means more deterministic, while higher value means more random. If
4706
+ * unset, default value 0 will apply. Note that for tool calling, a lower value is
4707
+ * recommended.
4708
+ */
4709
+ model_temperature?: number;
4710
+
4711
+ /**
4712
+ * Select the underlying speech to speech model. Can only set this or model, not
4713
+ * both.
4714
+ */
4715
+ s2s_model?: 'gpt-4o-realtime' | 'gpt-4o-mini-realtime' | 'gpt-realtime' | null;
4716
+
4717
+ /**
4718
+ * The speaker who starts the conversation. Required. Must be either 'user' or
4719
+ * 'agent'.
4720
+ */
4721
+ start_speaker?: 'user' | 'agent';
4722
+ }
4723
+
4724
+ export namespace RetellLlm {
4725
+ /**
4726
+ * Knowledge base configuration for RAG retrieval.
4727
+ */
4728
+ export interface KBConfig {
4729
+ /**
4730
+ * Similarity threshold for filtering search results
4731
+ */
4732
+ filter_score?: number;
4733
+
4734
+ /**
4735
+ * Max number of knowledge base chunks to retrieve
4736
+ */
4737
+ top_k?: number;
4738
+ }
4739
+ }
4740
+ }
2245
4741
  }
2246
4742
 
2247
4743
  export declare namespace Call {