jambonz-python-sdk 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jambonz_python_sdk-0.2.0.dist-info/METADATA +179 -0
- jambonz_python_sdk-0.2.0.dist-info/RECORD +119 -0
- jambonz_python_sdk-0.2.0.dist-info/WHEEL +4 -0
- jambonz_sdk/__init__.py +52 -0
- jambonz_sdk/_signature.py +73 -0
- jambonz_sdk/client/__init__.py +15 -0
- jambonz_sdk/client/api.py +241 -0
- jambonz_sdk/schema/callbacks/amd.schema.json +50 -0
- jambonz_sdk/schema/callbacks/base.schema.json +29 -0
- jambonz_sdk/schema/callbacks/call-status.schema.json +22 -0
- jambonz_sdk/schema/callbacks/conference-status.schema.json +24 -0
- jambonz_sdk/schema/callbacks/conference-wait.schema.json +11 -0
- jambonz_sdk/schema/callbacks/conference.schema.json +11 -0
- jambonz_sdk/schema/callbacks/dequeue.schema.json +19 -0
- jambonz_sdk/schema/callbacks/dial-dtmf.schema.json +18 -0
- jambonz_sdk/schema/callbacks/dial-hold.schema.json +22 -0
- jambonz_sdk/schema/callbacks/dial-refer.schema.json +28 -0
- jambonz_sdk/schema/callbacks/dial.schema.json +31 -0
- jambonz_sdk/schema/callbacks/enqueue-wait.schema.json +17 -0
- jambonz_sdk/schema/callbacks/enqueue.schema.json +27 -0
- jambonz_sdk/schema/callbacks/gather-partial.schema.json +54 -0
- jambonz_sdk/schema/callbacks/gather.schema.json +60 -0
- jambonz_sdk/schema/callbacks/listen.schema.json +21 -0
- jambonz_sdk/schema/callbacks/llm.schema.json +30 -0
- jambonz_sdk/schema/callbacks/message.schema.json +35 -0
- jambonz_sdk/schema/callbacks/pipeline-turn.schema.json +109 -0
- jambonz_sdk/schema/callbacks/play.schema.json +36 -0
- jambonz_sdk/schema/callbacks/session-new.schema.json +143 -0
- jambonz_sdk/schema/callbacks/session-reconnect.schema.json +9 -0
- jambonz_sdk/schema/callbacks/session-redirect.schema.json +38 -0
- jambonz_sdk/schema/callbacks/sip-refer-event.schema.json +20 -0
- jambonz_sdk/schema/callbacks/sip-refer.schema.json +22 -0
- jambonz_sdk/schema/callbacks/sip-request.schema.json +27 -0
- jambonz_sdk/schema/callbacks/transcribe-translation.schema.json +24 -0
- jambonz_sdk/schema/callbacks/transcribe.schema.json +46 -0
- jambonz_sdk/schema/callbacks/tts-streaming-event.schema.json +77 -0
- jambonz_sdk/schema/callbacks/verb-status.schema.json +57 -0
- jambonz_sdk/schema/components/actionHook.schema.json +36 -0
- jambonz_sdk/schema/components/actionHookDelayAction.schema.json +37 -0
- jambonz_sdk/schema/components/amd.schema.json +68 -0
- jambonz_sdk/schema/components/auth.schema.json +18 -0
- jambonz_sdk/schema/components/bidirectionalAudio.schema.json +22 -0
- jambonz_sdk/schema/components/fillerNoise.schema.json +25 -0
- jambonz_sdk/schema/components/llm-base.schema.json +94 -0
- jambonz_sdk/schema/components/recognizer-assemblyAiOptions.schema.json +66 -0
- jambonz_sdk/schema/components/recognizer-awsOptions.schema.json +52 -0
- jambonz_sdk/schema/components/recognizer-azureOptions.schema.json +32 -0
- jambonz_sdk/schema/components/recognizer-cobaltOptions.schema.json +34 -0
- jambonz_sdk/schema/components/recognizer-customOptions.schema.json +27 -0
- jambonz_sdk/schema/components/recognizer-deepgramOptions.schema.json +147 -0
- jambonz_sdk/schema/components/recognizer-elevenlabsOptions.schema.json +39 -0
- jambonz_sdk/schema/components/recognizer-gladiaOptions.schema.json +8 -0
- jambonz_sdk/schema/components/recognizer-googleOptions.schema.json +35 -0
- jambonz_sdk/schema/components/recognizer-houndifyOptions.schema.json +53 -0
- jambonz_sdk/schema/components/recognizer-ibmOptions.schema.json +54 -0
- jambonz_sdk/schema/components/recognizer-nuanceOptions.schema.json +150 -0
- jambonz_sdk/schema/components/recognizer-nvidiaOptions.schema.json +39 -0
- jambonz_sdk/schema/components/recognizer-openaiOptions.schema.json +59 -0
- jambonz_sdk/schema/components/recognizer-sonioxOptions.schema.json +46 -0
- jambonz_sdk/schema/components/recognizer-speechmaticsOptions.schema.json +100 -0
- jambonz_sdk/schema/components/recognizer-verbioOptions.schema.json +46 -0
- jambonz_sdk/schema/components/recognizer.schema.json +216 -0
- jambonz_sdk/schema/components/synthesizer.schema.json +82 -0
- jambonz_sdk/schema/components/target.schema.json +105 -0
- jambonz_sdk/schema/components/vad.schema.json +48 -0
- jambonz_sdk/schema/jambonz-app.schema.json +113 -0
- jambonz_sdk/schema/verbs/alert.schema.json +34 -0
- jambonz_sdk/schema/verbs/answer.schema.json +22 -0
- jambonz_sdk/schema/verbs/conference.schema.json +107 -0
- jambonz_sdk/schema/verbs/config.schema.json +221 -0
- jambonz_sdk/schema/verbs/deepgram_s2s.schema.json +81 -0
- jambonz_sdk/schema/verbs/dequeue.schema.json +51 -0
- jambonz_sdk/schema/verbs/dial.schema.json +200 -0
- jambonz_sdk/schema/verbs/dialogflow.schema.json +148 -0
- jambonz_sdk/schema/verbs/dtmf.schema.json +49 -0
- jambonz_sdk/schema/verbs/dub.schema.json +103 -0
- jambonz_sdk/schema/verbs/elevenlabs_s2s.schema.json +81 -0
- jambonz_sdk/schema/verbs/enqueue.schema.json +53 -0
- jambonz_sdk/schema/verbs/gather.schema.json +190 -0
- jambonz_sdk/schema/verbs/google_s2s.schema.json +42 -0
- jambonz_sdk/schema/verbs/hangup.schema.json +36 -0
- jambonz_sdk/schema/verbs/leave.schema.json +22 -0
- jambonz_sdk/schema/verbs/listen.schema.json +127 -0
- jambonz_sdk/schema/verbs/llm.schema.json +44 -0
- jambonz_sdk/schema/verbs/message.schema.json +82 -0
- jambonz_sdk/schema/verbs/openai_s2s.schema.json +42 -0
- jambonz_sdk/schema/verbs/pause.schema.json +36 -0
- jambonz_sdk/schema/verbs/pipeline.schema.json +240 -0
- jambonz_sdk/schema/verbs/play.schema.json +96 -0
- jambonz_sdk/schema/verbs/redirect.schema.json +34 -0
- jambonz_sdk/schema/verbs/rest_dial.schema.json +113 -0
- jambonz_sdk/schema/verbs/s2s.schema.json +39 -0
- jambonz_sdk/schema/verbs/say.schema.json +107 -0
- jambonz_sdk/schema/verbs/sip-decline.schema.json +58 -0
- jambonz_sdk/schema/verbs/sip-refer.schema.json +58 -0
- jambonz_sdk/schema/verbs/sip-request.schema.json +54 -0
- jambonz_sdk/schema/verbs/stream.schema.json +103 -0
- jambonz_sdk/schema/verbs/tag.schema.json +41 -0
- jambonz_sdk/schema/verbs/transcribe.schema.json +57 -0
- jambonz_sdk/schema/verbs/ultravox_s2s.schema.json +41 -0
- jambonz_sdk/types/__init__.py +139 -0
- jambonz_sdk/types/components.py +250 -0
- jambonz_sdk/types/rest.py +59 -0
- jambonz_sdk/types/session.py +55 -0
- jambonz_sdk/types/verbs.py +572 -0
- jambonz_sdk/validator.py +107 -0
- jambonz_sdk/verb_builder.py +316 -0
- jambonz_sdk/verb_builder.pyi +1133 -0
- jambonz_sdk/verb_registry.py +102 -0
- jambonz_sdk/webhook/__init__.py +10 -0
- jambonz_sdk/webhook/middleware.py +63 -0
- jambonz_sdk/webhook/response.py +43 -0
- jambonz_sdk/websocket/__init__.py +15 -0
- jambonz_sdk/websocket/audio_client.py +11 -0
- jambonz_sdk/websocket/audio_stream.py +151 -0
- jambonz_sdk/websocket/client.py +165 -0
- jambonz_sdk/websocket/endpoint.py +193 -0
- jambonz_sdk/websocket/router.py +87 -0
- jambonz_sdk/websocket/session.py +259 -0
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/pipeline",
|
|
4
|
+
"minVersion": "10.1.0",
|
|
5
|
+
"title": "Pipeline",
|
|
6
|
+
"description": "Configures a complete STT → LLM → TTS voice AI pipeline with integrated turn detection. Provides a higher-level abstraction than manually orchestrating the individual components. Optimized for building voice AI agents with proper turn-taking behavior.",
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"verb": {
|
|
10
|
+
"const": "pipeline"
|
|
11
|
+
},
|
|
12
|
+
"id": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "An optional unique identifier for this verb instance."
|
|
15
|
+
},
|
|
16
|
+
"stt": {
|
|
17
|
+
"$ref": "../components/recognizer",
|
|
18
|
+
"description": "Speech-to-text configuration for the pipeline."
|
|
19
|
+
},
|
|
20
|
+
"tts": {
|
|
21
|
+
"$ref": "../components/synthesizer",
|
|
22
|
+
"description": "Text-to-speech configuration for the pipeline."
|
|
23
|
+
},
|
|
24
|
+
"turnDetection": {
|
|
25
|
+
"oneOf": [
|
|
26
|
+
{
|
|
27
|
+
"type": "string",
|
|
28
|
+
"enum": ["stt", "krisp"],
|
|
29
|
+
"description": "Turn detection strategy shorthand. 'stt' uses the STT vendor's native signals (silence-based for most vendors; acoustic+semantic for deepgramflux, assemblyai, speechmatics). 'krisp' uses the Krisp acoustic end-of-turn model with default settings."
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"type": "object",
|
|
33
|
+
"description": "Turn detection configuration with tunable parameters.",
|
|
34
|
+
"properties": {
|
|
35
|
+
"mode": {
|
|
36
|
+
"type": "string",
|
|
37
|
+
"enum": ["krisp"],
|
|
38
|
+
"description": "Turn detection mode. Currently only 'krisp' supports object-form tuning."
|
|
39
|
+
},
|
|
40
|
+
"threshold": {
|
|
41
|
+
"type": "number",
|
|
42
|
+
"minimum": 0,
|
|
43
|
+
"maximum": 1,
|
|
44
|
+
"description": "Krisp end-of-turn confidence threshold (0.0–1.0). Lower values trigger earlier turn transitions. Default: 0.5"
|
|
45
|
+
},
|
|
46
|
+
"model": {
|
|
47
|
+
"type": "string",
|
|
48
|
+
"description": "Optional Krisp model name override."
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
"required": ["mode"],
|
|
52
|
+
"additionalProperties": false
|
|
53
|
+
}
|
|
54
|
+
],
|
|
55
|
+
"default": "stt",
|
|
56
|
+
"description": "Turn detection strategy. Controls when the pipeline decides the user has finished speaking. STT vendors with native turn-taking (deepgramflux, assemblyai, speechmatics) always use their built-in detection regardless of this setting."
|
|
57
|
+
},
|
|
58
|
+
"bargeIn": {
|
|
59
|
+
"type": "object",
|
|
60
|
+
"description": "Controls whether and how the user can interrupt the assistant while it is speaking.",
|
|
61
|
+
"properties": {
|
|
62
|
+
"enable": {
|
|
63
|
+
"type": "boolean",
|
|
64
|
+
"description": "Allow the user to interrupt the assistant while it is speaking. Default: true.",
|
|
65
|
+
"default": true
|
|
66
|
+
},
|
|
67
|
+
"minSpeechDuration": {
|
|
68
|
+
"type": "number",
|
|
69
|
+
"minimum": 0,
|
|
70
|
+
"description": "Seconds of detected speech required before confirming an interruption. Prevents brief noises from cutting off the assistant. Default: 0.5",
|
|
71
|
+
"default": 0.5
|
|
72
|
+
},
|
|
73
|
+
"sticky": {
|
|
74
|
+
"type": "boolean",
|
|
75
|
+
"description": "If true, once the user interrupts the assistant does not resume speaking. Default: false.",
|
|
76
|
+
"default": false
|
|
77
|
+
}
|
|
78
|
+
},
|
|
79
|
+
"additionalProperties": true
|
|
80
|
+
},
|
|
81
|
+
"noResponseTimeout": {
|
|
82
|
+
"type": "number",
|
|
83
|
+
"minimum": 0,
|
|
84
|
+
"description": "Seconds to wait after the assistant finishes speaking before prompting the user to respond. 0 disables. Default: 0.",
|
|
85
|
+
"default": 0
|
|
86
|
+
},
|
|
87
|
+
"llm": {
|
|
88
|
+
"type": "object",
|
|
89
|
+
"description": "LLM configuration for the pipeline. See the 'llm' verb schema for details.",
|
|
90
|
+
"additionalProperties": true
|
|
91
|
+
},
|
|
92
|
+
"actionHook": {
|
|
93
|
+
"$ref": "../components/actionHook",
|
|
94
|
+
"description": "A webhook invoked when the pipeline ends."
|
|
95
|
+
},
|
|
96
|
+
"eventHook": {
|
|
97
|
+
"$ref": "../components/actionHook",
|
|
98
|
+
"description": "A webhook invoked for pipeline events. Receives event types: 'user_transcript' (user speech recognized), 'agent_response' (assistant reply), 'user_interruption' (barge-in detected), and 'turn_end' (end-of-turn summary with transcript, response, and latency metrics)."
|
|
99
|
+
},
|
|
100
|
+
"toolHook": {
|
|
101
|
+
"$ref": "../components/actionHook",
|
|
102
|
+
"description": "A webhook invoked when the LLM requests a tool/function call. The payload includes the tool name and arguments; the response provides the tool result."
|
|
103
|
+
},
|
|
104
|
+
"greeting": {
|
|
105
|
+
"type": "boolean",
|
|
106
|
+
"description": "Whether the LLM should generate an initial greeting before the user speaks. Default: true.",
|
|
107
|
+
"default": true
|
|
108
|
+
},
|
|
109
|
+
"earlyGeneration": {
|
|
110
|
+
"type": "boolean",
|
|
111
|
+
"description": "Enable speculative LLM prompting before end-of-turn is confirmed. When using Krisp turn detection, set this to true to speculatively prompt the LLM before Krisp confirms the turn has ended. If the transcript matches when turn ends, buffered tokens are released immediately — reducing response latency. Note: Deepgram Flux performs early generation automatically via its native EagerEndOfTurn signal regardless of this setting. Default: false.",
|
|
112
|
+
"default": false
|
|
113
|
+
},
|
|
114
|
+
"noiseIsolation": {
|
|
115
|
+
"oneOf": [
|
|
116
|
+
{
|
|
117
|
+
"type": "string",
|
|
118
|
+
"enum": ["krisp", "rnnoise"],
|
|
119
|
+
"description": "Shorthand — enable noise isolation with the specified vendor using default settings."
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"type": "object",
|
|
123
|
+
"description": "Detailed noise isolation configuration.",
|
|
124
|
+
"properties": {
|
|
125
|
+
"mode": {
|
|
126
|
+
"type": "string",
|
|
127
|
+
"description": "Noise isolation vendor/mode (e.g. 'krisp')."
|
|
128
|
+
},
|
|
129
|
+
"level": {
|
|
130
|
+
"type": "number",
|
|
131
|
+
"minimum": 0,
|
|
132
|
+
"maximum": 100,
|
|
133
|
+
"description": "Suppression level 0–100. Default: 100."
|
|
134
|
+
},
|
|
135
|
+
"direction": {
|
|
136
|
+
"type": "string",
|
|
137
|
+
"enum": ["read", "write"],
|
|
138
|
+
"description": "Audio direction to apply noise isolation. 'read' filters caller audio, 'write' filters outbound audio. Default: 'read'."
|
|
139
|
+
},
|
|
140
|
+
"model": {
|
|
141
|
+
"type": "string",
|
|
142
|
+
"description": "Optional model name override."
|
|
143
|
+
}
|
|
144
|
+
},
|
|
145
|
+
"required": ["mode"],
|
|
146
|
+
"additionalProperties": false
|
|
147
|
+
}
|
|
148
|
+
],
|
|
149
|
+
"description": "Enable server-side noise isolation to reduce background noise on call audio. Defaults to filtering inbound (caller) audio; set direction to 'write' for outbound. Useful for improving STT accuracy in noisy environments."
|
|
150
|
+
},
|
|
151
|
+
"mcpServers": {
|
|
152
|
+
"type": "array",
|
|
153
|
+
"items": {
|
|
154
|
+
"type": "object",
|
|
155
|
+
"properties": {
|
|
156
|
+
"url": {
|
|
157
|
+
"type": "string",
|
|
158
|
+
"format": "uri",
|
|
159
|
+
"description": "The URL of the MCP server."
|
|
160
|
+
},
|
|
161
|
+
"auth": {
|
|
162
|
+
"type": "object",
|
|
163
|
+
"description": "Authentication for the MCP server.",
|
|
164
|
+
"additionalProperties": true
|
|
165
|
+
},
|
|
166
|
+
"roots": {
|
|
167
|
+
"type": "array",
|
|
168
|
+
"items": { "type": "object" },
|
|
169
|
+
"description": "MCP root definitions."
|
|
170
|
+
}
|
|
171
|
+
},
|
|
172
|
+
"required": ["url"]
|
|
173
|
+
},
|
|
174
|
+
"description": "External MCP servers that provide tools to the LLM. The pipeline connects at startup via SSE, discovers available tools, and makes them callable by the LLM."
|
|
175
|
+
}
|
|
176
|
+
},
|
|
177
|
+
"required": [
|
|
178
|
+
"llm"
|
|
179
|
+
],
|
|
180
|
+
"examples": [
|
|
181
|
+
{
|
|
182
|
+
"verb": "pipeline",
|
|
183
|
+
"stt": {
|
|
184
|
+
"vendor": "deepgram",
|
|
185
|
+
"language": "en-US"
|
|
186
|
+
},
|
|
187
|
+
"tts": {
|
|
188
|
+
"vendor": "cartesia",
|
|
189
|
+
"voice": "sonic-english"
|
|
190
|
+
},
|
|
191
|
+
"llm": {
|
|
192
|
+
"vendor": "openai",
|
|
193
|
+
"model": "gpt-4o",
|
|
194
|
+
"llmOptions": {
|
|
195
|
+
"messages": [
|
|
196
|
+
{
|
|
197
|
+
"role": "system",
|
|
198
|
+
"content": "You are a helpful voice assistant."
|
|
199
|
+
}
|
|
200
|
+
]
|
|
201
|
+
}
|
|
202
|
+
},
|
|
203
|
+
"turnDetection": "stt",
|
|
204
|
+
"actionHook": "/pipeline-complete"
|
|
205
|
+
},
|
|
206
|
+
{
|
|
207
|
+
"verb": "pipeline",
|
|
208
|
+
"stt": {
|
|
209
|
+
"vendor": "deepgram",
|
|
210
|
+
"language": "en-US"
|
|
211
|
+
},
|
|
212
|
+
"tts": {
|
|
213
|
+
"vendor": "cartesia",
|
|
214
|
+
"voice": "sonic-english"
|
|
215
|
+
},
|
|
216
|
+
"llm": {
|
|
217
|
+
"vendor": "anthropic",
|
|
218
|
+
"model": "claude-opus-4-6",
|
|
219
|
+
"llmOptions": {
|
|
220
|
+
"messages": [
|
|
221
|
+
{
|
|
222
|
+
"role": "user",
|
|
223
|
+
"content": "You are a helpful voice assistant."
|
|
224
|
+
}
|
|
225
|
+
]
|
|
226
|
+
}
|
|
227
|
+
},
|
|
228
|
+
"turnDetection": {
|
|
229
|
+
"mode": "krisp",
|
|
230
|
+
"threshold": 0.3
|
|
231
|
+
},
|
|
232
|
+
"bargeIn": {
|
|
233
|
+
"enable": true,
|
|
234
|
+
"minSpeechDuration": 0.3,
|
|
235
|
+
"sticky": false
|
|
236
|
+
},
|
|
237
|
+
"actionHook": "/pipeline-complete"
|
|
238
|
+
}
|
|
239
|
+
]
|
|
240
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/play",
|
|
4
|
+
"minVersion": "0.9.6",
|
|
5
|
+
"title": "Play",
|
|
6
|
+
"description": "Plays an audio file to the caller. Supports WAV and MP3 formats hosted at a URL. Can play a single file or cycle through a list of files.",
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"verb": {
|
|
10
|
+
"const": "play",
|
|
11
|
+
"description": "The verb name."
|
|
12
|
+
},
|
|
13
|
+
"id": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"description": "An optional unique identifier for this verb instance."
|
|
16
|
+
},
|
|
17
|
+
"url": {
|
|
18
|
+
"oneOf": [
|
|
19
|
+
{
|
|
20
|
+
"type": "string",
|
|
21
|
+
"format": "uri"
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"type": "array",
|
|
25
|
+
"items": {
|
|
26
|
+
"type": "string",
|
|
27
|
+
"format": "uri"
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
],
|
|
31
|
+
"description": "The URL(s) of the audio file(s) to play. Supports WAV and MP3. If an array, files are played in sequence.",
|
|
32
|
+
"examples": [
|
|
33
|
+
"https://example.com/sounds/greeting.wav",
|
|
34
|
+
[
|
|
35
|
+
"https://example.com/sounds/part1.wav",
|
|
36
|
+
"https://example.com/sounds/part2.wav"
|
|
37
|
+
]
|
|
38
|
+
]
|
|
39
|
+
},
|
|
40
|
+
"loop": {
|
|
41
|
+
"oneOf": [
|
|
42
|
+
{
|
|
43
|
+
"type": "number"
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"type": "string"
|
|
47
|
+
}
|
|
48
|
+
],
|
|
49
|
+
"description": "Number of times to repeat playback. Use 0 or 'forever' to loop indefinitely until interrupted.",
|
|
50
|
+
"examples": [
|
|
51
|
+
3,
|
|
52
|
+
"forever"
|
|
53
|
+
]
|
|
54
|
+
},
|
|
55
|
+
"earlyMedia": {
|
|
56
|
+
"type": "boolean",
|
|
57
|
+
"description": "If true, play the audio as early media before the call is answered."
|
|
58
|
+
},
|
|
59
|
+
"seekOffset": {
|
|
60
|
+
"oneOf": [
|
|
61
|
+
{
|
|
62
|
+
"type": "number"
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"type": "string"
|
|
66
|
+
}
|
|
67
|
+
],
|
|
68
|
+
"description": "Start playback at this offset in seconds from the beginning of the file."
|
|
69
|
+
},
|
|
70
|
+
"timeoutSecs": {
|
|
71
|
+
"oneOf": [
|
|
72
|
+
{
|
|
73
|
+
"type": "number"
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"type": "string"
|
|
77
|
+
}
|
|
78
|
+
],
|
|
79
|
+
"description": "Maximum time in seconds to play the audio. Playback stops after this duration even if the file has not finished."
|
|
80
|
+
},
|
|
81
|
+
"actionHook": {
|
|
82
|
+
"$ref": "../components/actionHook",
|
|
83
|
+
"description": "A webhook to invoke when playback completes."
|
|
84
|
+
}
|
|
85
|
+
},
|
|
86
|
+
"required": [
|
|
87
|
+
"url"
|
|
88
|
+
],
|
|
89
|
+
"examples": [
|
|
90
|
+
{
|
|
91
|
+
"verb": "play",
|
|
92
|
+
"url": "https://example.com/sounds/hold-music.mp3",
|
|
93
|
+
"loop": "forever"
|
|
94
|
+
}
|
|
95
|
+
]
|
|
96
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/redirect",
|
|
4
|
+
"minVersion": "0.9.6",
|
|
5
|
+
"title": "Redirect",
|
|
6
|
+
"description": "Transfers call control to a different webhook URL. The current verb stack is abandoned and the new webhook's response becomes the active application. Useful for modular application design where different URLs handle different phases of a call.",
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"verb": {
|
|
10
|
+
"const": "redirect"
|
|
11
|
+
},
|
|
12
|
+
"id": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "An optional unique identifier for this verb instance."
|
|
15
|
+
},
|
|
16
|
+
"actionHook": {
|
|
17
|
+
"$ref": "../components/actionHook",
|
|
18
|
+
"description": "The webhook to transfer control to. Must return a new array of verbs."
|
|
19
|
+
},
|
|
20
|
+
"statusHook": {
|
|
21
|
+
"$ref": "../components/actionHook",
|
|
22
|
+
"description": "A webhook to receive call status events after the redirect."
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"required": [
|
|
26
|
+
"actionHook"
|
|
27
|
+
],
|
|
28
|
+
"examples": [
|
|
29
|
+
{
|
|
30
|
+
"verb": "redirect",
|
|
31
|
+
"actionHook": "/new-handler"
|
|
32
|
+
}
|
|
33
|
+
]
|
|
34
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/rest:dial",
|
|
4
|
+
"title": "REST Dial",
|
|
5
|
+
"description": "Internal verb used to originate an outbound call via the REST API. Not typically used directly in application verb arrays.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"verb": {
|
|
9
|
+
"const": "rest:dial"
|
|
10
|
+
},
|
|
11
|
+
"id": {
|
|
12
|
+
"type": "string"
|
|
13
|
+
},
|
|
14
|
+
"account_sid": {
|
|
15
|
+
"type": "string"
|
|
16
|
+
},
|
|
17
|
+
"application_sid": {
|
|
18
|
+
"type": "string"
|
|
19
|
+
},
|
|
20
|
+
"call_hook": {
|
|
21
|
+
"oneOf": [
|
|
22
|
+
{ "type": "string" },
|
|
23
|
+
{ "type": "object" }
|
|
24
|
+
],
|
|
25
|
+
"description": "Webhook URL or object for call control."
|
|
26
|
+
},
|
|
27
|
+
"call_status_hook": {
|
|
28
|
+
"oneOf": [
|
|
29
|
+
{ "type": "string" },
|
|
30
|
+
{ "type": "object" }
|
|
31
|
+
],
|
|
32
|
+
"description": "Webhook URL or object for call status notifications."
|
|
33
|
+
},
|
|
34
|
+
"from": {
|
|
35
|
+
"type": "string",
|
|
36
|
+
"description": "The caller ID for the outbound call."
|
|
37
|
+
},
|
|
38
|
+
"callerName": {
|
|
39
|
+
"type": "string",
|
|
40
|
+
"description": "Display name for the caller."
|
|
41
|
+
},
|
|
42
|
+
"fromHost": {
|
|
43
|
+
"type": "string",
|
|
44
|
+
"description": "SIP host to use in the From header."
|
|
45
|
+
},
|
|
46
|
+
"speech_synthesis_vendor": {
|
|
47
|
+
"type": "string"
|
|
48
|
+
},
|
|
49
|
+
"speech_synthesis_voice": {
|
|
50
|
+
"type": "string"
|
|
51
|
+
},
|
|
52
|
+
"speech_synthesis_language": {
|
|
53
|
+
"type": "string"
|
|
54
|
+
},
|
|
55
|
+
"speech_recognizer_vendor": {
|
|
56
|
+
"type": "string"
|
|
57
|
+
},
|
|
58
|
+
"speech_recognizer_language": {
|
|
59
|
+
"type": "string"
|
|
60
|
+
},
|
|
61
|
+
"tag": {
|
|
62
|
+
"type": "object",
|
|
63
|
+
"description": "Arbitrary metadata to attach to the call.",
|
|
64
|
+
"additionalProperties": true
|
|
65
|
+
},
|
|
66
|
+
"to": {
|
|
67
|
+
"$ref": "../components/target",
|
|
68
|
+
"description": "The call destination."
|
|
69
|
+
},
|
|
70
|
+
"headers": {
|
|
71
|
+
"type": "object",
|
|
72
|
+
"description": "Custom SIP headers to include on the outbound INVITE.",
|
|
73
|
+
"additionalProperties": {
|
|
74
|
+
"oneOf": [
|
|
75
|
+
{ "type": "string" },
|
|
76
|
+
{ "type": "number" }
|
|
77
|
+
]
|
|
78
|
+
}
|
|
79
|
+
},
|
|
80
|
+
"timeout": {
|
|
81
|
+
"type": "number",
|
|
82
|
+
"description": "Ring timeout in seconds."
|
|
83
|
+
},
|
|
84
|
+
"amd": {
|
|
85
|
+
"$ref": "../components/amd",
|
|
86
|
+
"description": "Answering machine detection configuration."
|
|
87
|
+
},
|
|
88
|
+
"dual_streams": {
|
|
89
|
+
"type": "boolean",
|
|
90
|
+
"description": "If true, send separate audio streams for each call leg."
|
|
91
|
+
},
|
|
92
|
+
"sipRequestWithinDialogHook": {
|
|
93
|
+
"type": "string",
|
|
94
|
+
"description": "Webhook for in-dialog SIP requests."
|
|
95
|
+
},
|
|
96
|
+
"referHook": {
|
|
97
|
+
"oneOf": [
|
|
98
|
+
{ "type": "string" },
|
|
99
|
+
{ "type": "object" }
|
|
100
|
+
],
|
|
101
|
+
"description": "Webhook for SIP REFER handling."
|
|
102
|
+
},
|
|
103
|
+
"timeLimit": {
|
|
104
|
+
"type": "number",
|
|
105
|
+
"description": "Maximum call duration in seconds."
|
|
106
|
+
}
|
|
107
|
+
},
|
|
108
|
+
"required": [
|
|
109
|
+
"call_hook",
|
|
110
|
+
"from",
|
|
111
|
+
"to"
|
|
112
|
+
]
|
|
113
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/s2s",
|
|
4
|
+
"minVersion": "10.1.0",
|
|
5
|
+
"title": "S2S",
|
|
6
|
+
"description": "Synonym for 'llm'. Connects the caller to a large language model for a real-time speech-to-speech voice conversation. Requires 'vendor' to be specified explicitly.",
|
|
7
|
+
"type": "object",
|
|
8
|
+
"allOf": [
|
|
9
|
+
{
|
|
10
|
+
"$ref": "../components/llm-base"
|
|
11
|
+
}
|
|
12
|
+
],
|
|
13
|
+
"properties": {
|
|
14
|
+
"verb": {
|
|
15
|
+
"const": "s2s",
|
|
16
|
+
"description": "The verb name."
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"required": [
|
|
20
|
+
"vendor",
|
|
21
|
+
"llmOptions"
|
|
22
|
+
],
|
|
23
|
+
"examples": [
|
|
24
|
+
{
|
|
25
|
+
"verb": "s2s",
|
|
26
|
+
"vendor": "openai",
|
|
27
|
+
"model": "gpt-4o-realtime",
|
|
28
|
+
"llmOptions": {
|
|
29
|
+
"messages": [
|
|
30
|
+
{
|
|
31
|
+
"role": "system",
|
|
32
|
+
"content": "You are a helpful voice assistant."
|
|
33
|
+
}
|
|
34
|
+
]
|
|
35
|
+
},
|
|
36
|
+
"actionHook": "/s2s-complete"
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/say",
|
|
4
|
+
"minVersion": "0.9.6",
|
|
5
|
+
"title": "Say",
|
|
6
|
+
"description": "Speaks text to the caller using text-to-speech. The text can be plain text or SSML. Optionally streams TTS output incrementally for lower latency. This is one of the most commonly used verbs in jambonz applications.",
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"verb": {
|
|
10
|
+
"const": "say",
|
|
11
|
+
"description": "The verb name."
|
|
12
|
+
},
|
|
13
|
+
"id": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"description": "An optional unique identifier for this verb instance. Can be used to reference it in other contexts."
|
|
16
|
+
},
|
|
17
|
+
"text": {
|
|
18
|
+
"oneOf": [
|
|
19
|
+
{
|
|
20
|
+
"type": "string"
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
"type": "array",
|
|
24
|
+
"items": {
|
|
25
|
+
"type": "string"
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
],
|
|
29
|
+
"description": "The text to speak. Can be plain text or SSML markup. If an array is provided, one entry is selected at random (useful for variety in prompts).",
|
|
30
|
+
"examples": [
|
|
31
|
+
"Hello, welcome to our service.",
|
|
32
|
+
"<speak>Hello <break time='500ms'/> welcome.</speak>",
|
|
33
|
+
[
|
|
34
|
+
"Hello!",
|
|
35
|
+
"Hi there!",
|
|
36
|
+
"Welcome!"
|
|
37
|
+
]
|
|
38
|
+
]
|
|
39
|
+
},
|
|
40
|
+
"instructions": {
|
|
41
|
+
"type": "string",
|
|
42
|
+
"description": "Natural language instructions to guide TTS expression and delivery. Supported by vendors that offer instruction-based synthesis (e.g. ElevenLabs, some OpenAI models).",
|
|
43
|
+
"examples": [
|
|
44
|
+
"Speak in a warm, friendly tone",
|
|
45
|
+
"Sound excited and energetic"
|
|
46
|
+
]
|
|
47
|
+
},
|
|
48
|
+
"stream": {
|
|
49
|
+
"type": "boolean",
|
|
50
|
+
"description": "If true, stream TTS audio to the caller incrementally as it is generated, rather than waiting for the complete audio. Reduces time-to-first-byte for long utterances. Requires a vendor that supports streaming synthesis."
|
|
51
|
+
},
|
|
52
|
+
"loop": {
|
|
53
|
+
"oneOf": [
|
|
54
|
+
{
|
|
55
|
+
"type": "number"
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"type": "string"
|
|
59
|
+
}
|
|
60
|
+
],
|
|
61
|
+
"description": "Number of times to repeat the speech. Use 0 or 'forever' to loop indefinitely until interrupted.",
|
|
62
|
+
"examples": [
|
|
63
|
+
2,
|
|
64
|
+
"forever"
|
|
65
|
+
]
|
|
66
|
+
},
|
|
67
|
+
"synthesizer": {
|
|
68
|
+
"$ref": "../components/synthesizer",
|
|
69
|
+
"description": "Override the session-level TTS configuration for this specific utterance."
|
|
70
|
+
},
|
|
71
|
+
"earlyMedia": {
|
|
72
|
+
"type": "boolean",
|
|
73
|
+
"description": "If true, play the audio as early media (before the call is answered). Used for playing announcements or prompts to the caller before the call is formally connected."
|
|
74
|
+
},
|
|
75
|
+
"disableTtsCache": {
|
|
76
|
+
"type": "boolean",
|
|
77
|
+
"description": "If true, bypass the TTS cache and always generate fresh audio. Useful when the same text should be re-synthesized (e.g. with different SSML or when the voice has been updated)."
|
|
78
|
+
},
|
|
79
|
+
"closeStreamOnEmpty": {
|
|
80
|
+
"type": "boolean",
|
|
81
|
+
"description": "If true, close the TTS stream when an empty text string is received. Only applies when stream is true."
|
|
82
|
+
}
|
|
83
|
+
},
|
|
84
|
+
"examples": [
|
|
85
|
+
{
|
|
86
|
+
"verb": "say",
|
|
87
|
+
"text": "Hello, welcome to Acme Corp. How can I help you today?"
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
"verb": "say",
|
|
91
|
+
"text": "Please hold while I transfer your call.",
|
|
92
|
+
"synthesizer": {
|
|
93
|
+
"vendor": "elevenlabs",
|
|
94
|
+
"voice": "Rachel"
|
|
95
|
+
}
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
"verb": "say",
|
|
99
|
+
"text": [
|
|
100
|
+
"Hello!",
|
|
101
|
+
"Hi there!",
|
|
102
|
+
"Welcome!"
|
|
103
|
+
],
|
|
104
|
+
"loop": 1
|
|
105
|
+
}
|
|
106
|
+
]
|
|
107
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/sip:decline",
|
|
4
|
+
"minVersion": "0.9.6",
|
|
5
|
+
"title": "SIP Decline",
|
|
6
|
+
"description": "Rejects an incoming call with a SIP error response. Used to decline calls with a specific status code and reason (e.g. 486 Busy Here, 603 Decline).",
|
|
7
|
+
"type": "object",
|
|
8
|
+
"properties": {
|
|
9
|
+
"verb": {
|
|
10
|
+
"const": "sip:decline"
|
|
11
|
+
},
|
|
12
|
+
"id": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "An optional unique identifier for this verb instance."
|
|
15
|
+
},
|
|
16
|
+
"status": {
|
|
17
|
+
"type": "number",
|
|
18
|
+
"description": "The SIP response status code to send.",
|
|
19
|
+
"examples": [
|
|
20
|
+
486,
|
|
21
|
+
603,
|
|
22
|
+
404,
|
|
23
|
+
480
|
|
24
|
+
]
|
|
25
|
+
},
|
|
26
|
+
"reason": {
|
|
27
|
+
"type": "string",
|
|
28
|
+
"description": "The SIP reason phrase to include in the response.",
|
|
29
|
+
"examples": [
|
|
30
|
+
"Busy Here",
|
|
31
|
+
"Decline",
|
|
32
|
+
"Not Found"
|
|
33
|
+
]
|
|
34
|
+
},
|
|
35
|
+
"headers": {
|
|
36
|
+
"type": "object",
|
|
37
|
+
"description": "Custom SIP headers to include in the response.",
|
|
38
|
+
"additionalProperties": {
|
|
39
|
+
"oneOf": [{ "type": "string" }, { "type": "number" }]
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
},
|
|
43
|
+
"required": [
|
|
44
|
+
"status"
|
|
45
|
+
],
|
|
46
|
+
"examples": [
|
|
47
|
+
{
|
|
48
|
+
"verb": "sip:decline",
|
|
49
|
+
"status": 486,
|
|
50
|
+
"reason": "Busy Here"
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"verb": "sip:decline",
|
|
54
|
+
"status": 603,
|
|
55
|
+
"reason": "Decline"
|
|
56
|
+
}
|
|
57
|
+
]
|
|
58
|
+
}
|