jambonz-python-sdk 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jambonz_python_sdk-0.2.0.dist-info/METADATA +179 -0
- jambonz_python_sdk-0.2.0.dist-info/RECORD +119 -0
- jambonz_python_sdk-0.2.0.dist-info/WHEEL +4 -0
- jambonz_sdk/__init__.py +52 -0
- jambonz_sdk/_signature.py +73 -0
- jambonz_sdk/client/__init__.py +15 -0
- jambonz_sdk/client/api.py +241 -0
- jambonz_sdk/schema/callbacks/amd.schema.json +50 -0
- jambonz_sdk/schema/callbacks/base.schema.json +29 -0
- jambonz_sdk/schema/callbacks/call-status.schema.json +22 -0
- jambonz_sdk/schema/callbacks/conference-status.schema.json +24 -0
- jambonz_sdk/schema/callbacks/conference-wait.schema.json +11 -0
- jambonz_sdk/schema/callbacks/conference.schema.json +11 -0
- jambonz_sdk/schema/callbacks/dequeue.schema.json +19 -0
- jambonz_sdk/schema/callbacks/dial-dtmf.schema.json +18 -0
- jambonz_sdk/schema/callbacks/dial-hold.schema.json +22 -0
- jambonz_sdk/schema/callbacks/dial-refer.schema.json +28 -0
- jambonz_sdk/schema/callbacks/dial.schema.json +31 -0
- jambonz_sdk/schema/callbacks/enqueue-wait.schema.json +17 -0
- jambonz_sdk/schema/callbacks/enqueue.schema.json +27 -0
- jambonz_sdk/schema/callbacks/gather-partial.schema.json +54 -0
- jambonz_sdk/schema/callbacks/gather.schema.json +60 -0
- jambonz_sdk/schema/callbacks/listen.schema.json +21 -0
- jambonz_sdk/schema/callbacks/llm.schema.json +30 -0
- jambonz_sdk/schema/callbacks/message.schema.json +35 -0
- jambonz_sdk/schema/callbacks/pipeline-turn.schema.json +109 -0
- jambonz_sdk/schema/callbacks/play.schema.json +36 -0
- jambonz_sdk/schema/callbacks/session-new.schema.json +143 -0
- jambonz_sdk/schema/callbacks/session-reconnect.schema.json +9 -0
- jambonz_sdk/schema/callbacks/session-redirect.schema.json +38 -0
- jambonz_sdk/schema/callbacks/sip-refer-event.schema.json +20 -0
- jambonz_sdk/schema/callbacks/sip-refer.schema.json +22 -0
- jambonz_sdk/schema/callbacks/sip-request.schema.json +27 -0
- jambonz_sdk/schema/callbacks/transcribe-translation.schema.json +24 -0
- jambonz_sdk/schema/callbacks/transcribe.schema.json +46 -0
- jambonz_sdk/schema/callbacks/tts-streaming-event.schema.json +77 -0
- jambonz_sdk/schema/callbacks/verb-status.schema.json +57 -0
- jambonz_sdk/schema/components/actionHook.schema.json +36 -0
- jambonz_sdk/schema/components/actionHookDelayAction.schema.json +37 -0
- jambonz_sdk/schema/components/amd.schema.json +68 -0
- jambonz_sdk/schema/components/auth.schema.json +18 -0
- jambonz_sdk/schema/components/bidirectionalAudio.schema.json +22 -0
- jambonz_sdk/schema/components/fillerNoise.schema.json +25 -0
- jambonz_sdk/schema/components/llm-base.schema.json +94 -0
- jambonz_sdk/schema/components/recognizer-assemblyAiOptions.schema.json +66 -0
- jambonz_sdk/schema/components/recognizer-awsOptions.schema.json +52 -0
- jambonz_sdk/schema/components/recognizer-azureOptions.schema.json +32 -0
- jambonz_sdk/schema/components/recognizer-cobaltOptions.schema.json +34 -0
- jambonz_sdk/schema/components/recognizer-customOptions.schema.json +27 -0
- jambonz_sdk/schema/components/recognizer-deepgramOptions.schema.json +147 -0
- jambonz_sdk/schema/components/recognizer-elevenlabsOptions.schema.json +39 -0
- jambonz_sdk/schema/components/recognizer-gladiaOptions.schema.json +8 -0
- jambonz_sdk/schema/components/recognizer-googleOptions.schema.json +35 -0
- jambonz_sdk/schema/components/recognizer-houndifyOptions.schema.json +53 -0
- jambonz_sdk/schema/components/recognizer-ibmOptions.schema.json +54 -0
- jambonz_sdk/schema/components/recognizer-nuanceOptions.schema.json +150 -0
- jambonz_sdk/schema/components/recognizer-nvidiaOptions.schema.json +39 -0
- jambonz_sdk/schema/components/recognizer-openaiOptions.schema.json +59 -0
- jambonz_sdk/schema/components/recognizer-sonioxOptions.schema.json +46 -0
- jambonz_sdk/schema/components/recognizer-speechmaticsOptions.schema.json +100 -0
- jambonz_sdk/schema/components/recognizer-verbioOptions.schema.json +46 -0
- jambonz_sdk/schema/components/recognizer.schema.json +216 -0
- jambonz_sdk/schema/components/synthesizer.schema.json +82 -0
- jambonz_sdk/schema/components/target.schema.json +105 -0
- jambonz_sdk/schema/components/vad.schema.json +48 -0
- jambonz_sdk/schema/jambonz-app.schema.json +113 -0
- jambonz_sdk/schema/verbs/alert.schema.json +34 -0
- jambonz_sdk/schema/verbs/answer.schema.json +22 -0
- jambonz_sdk/schema/verbs/conference.schema.json +107 -0
- jambonz_sdk/schema/verbs/config.schema.json +221 -0
- jambonz_sdk/schema/verbs/deepgram_s2s.schema.json +81 -0
- jambonz_sdk/schema/verbs/dequeue.schema.json +51 -0
- jambonz_sdk/schema/verbs/dial.schema.json +200 -0
- jambonz_sdk/schema/verbs/dialogflow.schema.json +148 -0
- jambonz_sdk/schema/verbs/dtmf.schema.json +49 -0
- jambonz_sdk/schema/verbs/dub.schema.json +103 -0
- jambonz_sdk/schema/verbs/elevenlabs_s2s.schema.json +81 -0
- jambonz_sdk/schema/verbs/enqueue.schema.json +53 -0
- jambonz_sdk/schema/verbs/gather.schema.json +190 -0
- jambonz_sdk/schema/verbs/google_s2s.schema.json +42 -0
- jambonz_sdk/schema/verbs/hangup.schema.json +36 -0
- jambonz_sdk/schema/verbs/leave.schema.json +22 -0
- jambonz_sdk/schema/verbs/listen.schema.json +127 -0
- jambonz_sdk/schema/verbs/llm.schema.json +44 -0
- jambonz_sdk/schema/verbs/message.schema.json +82 -0
- jambonz_sdk/schema/verbs/openai_s2s.schema.json +42 -0
- jambonz_sdk/schema/verbs/pause.schema.json +36 -0
- jambonz_sdk/schema/verbs/pipeline.schema.json +240 -0
- jambonz_sdk/schema/verbs/play.schema.json +96 -0
- jambonz_sdk/schema/verbs/redirect.schema.json +34 -0
- jambonz_sdk/schema/verbs/rest_dial.schema.json +113 -0
- jambonz_sdk/schema/verbs/s2s.schema.json +39 -0
- jambonz_sdk/schema/verbs/say.schema.json +107 -0
- jambonz_sdk/schema/verbs/sip-decline.schema.json +58 -0
- jambonz_sdk/schema/verbs/sip-refer.schema.json +58 -0
- jambonz_sdk/schema/verbs/sip-request.schema.json +54 -0
- jambonz_sdk/schema/verbs/stream.schema.json +103 -0
- jambonz_sdk/schema/verbs/tag.schema.json +41 -0
- jambonz_sdk/schema/verbs/transcribe.schema.json +57 -0
- jambonz_sdk/schema/verbs/ultravox_s2s.schema.json +41 -0
- jambonz_sdk/types/__init__.py +139 -0
- jambonz_sdk/types/components.py +250 -0
- jambonz_sdk/types/rest.py +59 -0
- jambonz_sdk/types/session.py +55 -0
- jambonz_sdk/types/verbs.py +572 -0
- jambonz_sdk/validator.py +107 -0
- jambonz_sdk/verb_builder.py +316 -0
- jambonz_sdk/verb_builder.pyi +1133 -0
- jambonz_sdk/verb_registry.py +102 -0
- jambonz_sdk/webhook/__init__.py +10 -0
- jambonz_sdk/webhook/middleware.py +63 -0
- jambonz_sdk/webhook/response.py +43 -0
- jambonz_sdk/websocket/__init__.py +15 -0
- jambonz_sdk/websocket/audio_client.py +11 -0
- jambonz_sdk/websocket/audio_stream.py +151 -0
- jambonz_sdk/websocket/client.py +165 -0
- jambonz_sdk/websocket/endpoint.py +193 -0
- jambonz_sdk/websocket/router.py +87 -0
- jambonz_sdk/websocket/session.py +259 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/callbacks/tts-streaming-event",
|
|
4
|
+
"title": "TTS Streaming Event",
|
|
5
|
+
"description": "Events sent to the '/streaming-event' WebSocket endpoint during TTS streaming. These are sent as 'tts:streaming-event' messages. The tts_spoken event is only sent when trackTtsPlayout is enabled via the config verb.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"oneOf": [
|
|
8
|
+
{
|
|
9
|
+
"properties": {
|
|
10
|
+
"event_type": {
|
|
11
|
+
"const": "stream_open",
|
|
12
|
+
"description": "The TTS streaming connection has been established."
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"required": ["event_type"],
|
|
16
|
+
"additionalProperties": false
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"properties": {
|
|
20
|
+
"event_type": {
|
|
21
|
+
"const": "stream_closed",
|
|
22
|
+
"description": "The TTS streaming connection has been closed."
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"required": ["event_type"],
|
|
26
|
+
"additionalProperties": false
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"properties": {
|
|
30
|
+
"event_type": {
|
|
31
|
+
"const": "stream_paused",
|
|
32
|
+
"description": "TTS streaming has been paused."
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
"required": ["event_type"],
|
|
36
|
+
"additionalProperties": false
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"properties": {
|
|
40
|
+
"event_type": {
|
|
41
|
+
"const": "stream_resumed",
|
|
42
|
+
"description": "TTS streaming has been resumed."
|
|
43
|
+
}
|
|
44
|
+
},
|
|
45
|
+
"required": ["event_type"],
|
|
46
|
+
"additionalProperties": false
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"properties": {
|
|
50
|
+
"event_type": {
|
|
51
|
+
"const": "user_interruption",
|
|
52
|
+
"description": "The user interrupted (barged in) during TTS playout, causing the stream to be cleared."
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
"required": ["event_type"],
|
|
56
|
+
"additionalProperties": false
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"properties": {
|
|
60
|
+
"event_type": {
|
|
61
|
+
"const": "tts_spoken",
|
|
62
|
+
"description": "Reports the actual text that was spoken via TTS. Sent on utterance completion or when the user barges in. Only sent when trackTtsPlayout is enabled via the config verb. Requires a TTS vendor that supports alignment data (e.g. ElevenLabs)."
|
|
63
|
+
},
|
|
64
|
+
"text": {
|
|
65
|
+
"type": "string",
|
|
66
|
+
"description": "The text that was actually spoken before completion or interruption."
|
|
67
|
+
},
|
|
68
|
+
"bargein": {
|
|
69
|
+
"type": "boolean",
|
|
70
|
+
"description": "True if the user barged in (interrupted) before the TTS finished speaking. False if the utterance completed normally."
|
|
71
|
+
}
|
|
72
|
+
},
|
|
73
|
+
"required": ["event_type", "text", "bargein"],
|
|
74
|
+
"additionalProperties": false
|
|
75
|
+
}
|
|
76
|
+
]
|
|
77
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/callbacks/verb-status",
|
|
4
|
+
"title": "Verb Status Event",
|
|
5
|
+
"description": "Real-time verb lifecycle events sent over WebSocket when notifyEvents is enabled on the session. These are informational — no response is expected.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"event": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"enum": [
|
|
11
|
+
"starting",
|
|
12
|
+
"finished",
|
|
13
|
+
"start-playback",
|
|
14
|
+
"stop-playback",
|
|
15
|
+
"kill-playback",
|
|
16
|
+
"dtmf-bargein-detected",
|
|
17
|
+
"speech-bargein-detected",
|
|
18
|
+
"synthesized-audio"
|
|
19
|
+
],
|
|
20
|
+
"description": "The verb lifecycle event."
|
|
21
|
+
},
|
|
22
|
+
"verb": {
|
|
23
|
+
"type": "string",
|
|
24
|
+
"description": "The verb name (e.g. 'say', 'play', 'gather'). Present on synthesized-audio, start-playback, stop-playback, kill-playback, and dtmf/speech-bargein events."
|
|
25
|
+
},
|
|
26
|
+
"name": {
|
|
27
|
+
"type": "string",
|
|
28
|
+
"description": "The verb name. Present on 'starting' and 'finished' events (these use 'name' instead of 'verb')."
|
|
29
|
+
},
|
|
30
|
+
"id": {
|
|
31
|
+
"type": "string",
|
|
32
|
+
"description": "The verb instance id, if one was assigned by the application."
|
|
33
|
+
},
|
|
34
|
+
"vendor": {
|
|
35
|
+
"type": "string",
|
|
36
|
+
"description": "TTS vendor name. Present on synthesized-audio events."
|
|
37
|
+
},
|
|
38
|
+
"language": {
|
|
39
|
+
"type": "string",
|
|
40
|
+
"description": "TTS language code. Present on synthesized-audio events."
|
|
41
|
+
},
|
|
42
|
+
"characters": {
|
|
43
|
+
"type": "integer",
|
|
44
|
+
"description": "Number of characters synthesized. Present on synthesized-audio events when not served from cache."
|
|
45
|
+
},
|
|
46
|
+
"elapsed_time": {
|
|
47
|
+
"type": "number",
|
|
48
|
+
"description": "TTS round-trip time in milliseconds. Present on synthesized-audio events when not served from cache."
|
|
49
|
+
},
|
|
50
|
+
"served_from_cache": {
|
|
51
|
+
"type": "boolean",
|
|
52
|
+
"description": "Whether the TTS audio was served from cache. Present on synthesized-audio events."
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
"required": ["event"],
|
|
56
|
+
"additionalProperties": true
|
|
57
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/actionHook",
|
|
4
|
+
"title": "ActionHook",
|
|
5
|
+
"description": "A webhook or websocket callback that jambonz invokes when a verb completes. Reports verb results (e.g. speech recognition from 'gather', dial outcome) and receives the next verbs to execute. In webhook mode: jambonz POSTs to this URL and the HTTP response body is the next verb array. In WebSocket mode: this value becomes an event name emitted on the session — bind session.on('/hookName', (evt) => {...}) and respond with session.reply() (NOT session.send()). The callback payload always includes 'reason' plus verb-specific fields (e.g. 'speech', 'digits' for gather). Can be a simple URL/path string or an object with additional options.",
|
|
6
|
+
"oneOf": [
|
|
7
|
+
{
|
|
8
|
+
"type": "string",
|
|
9
|
+
"format": "uri",
|
|
10
|
+
"description": "A URL to invoke. For webhook applications this is an HTTP(S) URL. For websocket applications this is typically a relative path or event name.",
|
|
11
|
+
"examples": ["https://myapp.example.com/gather-result", "/gather-result"]
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"type": "object",
|
|
15
|
+
"description": "A hook specification with URL and additional options.",
|
|
16
|
+
"properties": {
|
|
17
|
+
"url": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"format": "uri",
|
|
20
|
+
"description": "The URL to invoke."
|
|
21
|
+
},
|
|
22
|
+
"method": {
|
|
23
|
+
"type": "string",
|
|
24
|
+
"description": "The HTTP method to use. Only applies to webhook applications.",
|
|
25
|
+
"enum": ["GET", "POST"],
|
|
26
|
+
"default": "POST"
|
|
27
|
+
},
|
|
28
|
+
"basicAuth": {
|
|
29
|
+
"$ref": "auth",
|
|
30
|
+
"description": "Basic authentication credentials to include in the request."
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"required": ["url"]
|
|
34
|
+
}
|
|
35
|
+
]
|
|
36
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/actionHookDelayAction",
|
|
4
|
+
"title": "ActionHookDelayAction",
|
|
5
|
+
"description": "Configuration for what to do when an actionHook (webhook) takes a long time to respond. Allows playing interim content (e.g. 'please wait' messages, hold music) while waiting for the webhook response, with configurable retry and give-up behavior.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"enabled": {
|
|
9
|
+
"type": "boolean",
|
|
10
|
+
"description": "Whether to enable delay handling for actionHooks."
|
|
11
|
+
},
|
|
12
|
+
"noResponseTimeout": {
|
|
13
|
+
"type": "number",
|
|
14
|
+
"description": "Time in seconds to wait before executing the delay actions. If the webhook responds before this timeout, the delay actions are skipped.",
|
|
15
|
+
"examples": [3, 5]
|
|
16
|
+
},
|
|
17
|
+
"noResponseGiveUpTimeout": {
|
|
18
|
+
"type": "number",
|
|
19
|
+
"description": "Total time in seconds to wait for a webhook response before giving up and executing the giveUpActions.",
|
|
20
|
+
"examples": [30, 60]
|
|
21
|
+
},
|
|
22
|
+
"retries": {
|
|
23
|
+
"type": "number",
|
|
24
|
+
"description": "Number of times to retry the delay actions while still waiting for the webhook response."
|
|
25
|
+
},
|
|
26
|
+
"actions": {
|
|
27
|
+
"type": "array",
|
|
28
|
+
"description": "An array of jambonz verbs to execute while waiting for the webhook response. Typically 'say' or 'play' verbs with messages like 'please hold'.",
|
|
29
|
+
"items": { "type": "object" }
|
|
30
|
+
},
|
|
31
|
+
"giveUpActions": {
|
|
32
|
+
"type": "array",
|
|
33
|
+
"description": "An array of jambonz verbs to execute if the webhook never responds within the giveUpTimeout. Typically an error message and/or hangup.",
|
|
34
|
+
"items": { "type": "object" }
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/amd",
|
|
4
|
+
"title": "Answering Machine Detection",
|
|
5
|
+
"description": "Configuration for answering machine detection (AMD). Detects whether an outbound or inbound call was answered by a human or a machine. Used as a nested property on the 'config' or 'dial' verb. IMPORTANT: AMD runs asynchronously in the background. When using AMD with the 'config' verb, you MUST follow it with a 'pause' verb (e.g. pause({ length: 25 })) to keep the call alive while AMD detection runs. Without a pause, the call will end immediately after config completes.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"actionHook": {
|
|
9
|
+
"$ref": "actionHook",
|
|
10
|
+
"description": "Webhook to receive AMD events (amd_human_detected, amd_machine_detected, amd_no_speech_detected, amd_decision_timeout, amd_machine_stopped_speaking, amd_tone_detected, amd_error, amd_stopped)."
|
|
11
|
+
},
|
|
12
|
+
"thresholdWordCount": {
|
|
13
|
+
"type": "number",
|
|
14
|
+
"description": "Number of spoken words in a greeting that triggers an amd_machine_detected result.",
|
|
15
|
+
"default": 9
|
|
16
|
+
},
|
|
17
|
+
"digitCount": {
|
|
18
|
+
"type": "number",
|
|
19
|
+
"description": "Number of digits in a greeting to trigger detection. 0 disables digit-based detection.",
|
|
20
|
+
"default": 0
|
|
21
|
+
},
|
|
22
|
+
"timers": {
|
|
23
|
+
"type": "object",
|
|
24
|
+
"description": "Timer settings controlling AMD detection windows.",
|
|
25
|
+
"properties": {
|
|
26
|
+
"noSpeechTimeoutMs": {
|
|
27
|
+
"type": "number",
|
|
28
|
+
"description": "Milliseconds to wait for any speech before returning amd_no_speech_detected.",
|
|
29
|
+
"default": 5000
|
|
30
|
+
},
|
|
31
|
+
"decisionTimeoutMs": {
|
|
32
|
+
"type": "number",
|
|
33
|
+
"description": "Milliseconds before returning amd_decision_timeout if no determination is made.",
|
|
34
|
+
"default": 15000
|
|
35
|
+
},
|
|
36
|
+
"toneTimeoutMs": {
|
|
37
|
+
"type": "number",
|
|
38
|
+
"description": "Milliseconds to wait for beep/tone detection.",
|
|
39
|
+
"default": 20000
|
|
40
|
+
},
|
|
41
|
+
"greetingCompletionTimeoutMs": {
|
|
42
|
+
"type": "number",
|
|
43
|
+
"description": "Milliseconds of silence after speech before determining the machine greeting is complete. Automatically reduced to 1000ms if a beep is detected.",
|
|
44
|
+
"default": 2000
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
"additionalProperties": false
|
|
48
|
+
},
|
|
49
|
+
"recognizer": {
|
|
50
|
+
"$ref": "recognizer",
|
|
51
|
+
"description": "Override the STT recognizer used for AMD speech detection. When omitted, AMD uses the session default recognizer with enhancedModel enabled."
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
"required": ["actionHook"],
|
|
55
|
+
"examples": [
|
|
56
|
+
{
|
|
57
|
+
"actionHook": "/amd-events"
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"actionHook": "/amd-events",
|
|
61
|
+
"thresholdWordCount": 6,
|
|
62
|
+
"timers": {
|
|
63
|
+
"noSpeechTimeoutMs": 3000,
|
|
64
|
+
"decisionTimeoutMs": 10000
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
]
|
|
68
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/auth",
|
|
4
|
+
"title": "Auth",
|
|
5
|
+
"description": "Basic authentication credentials, used for authenticating with external services such as websocket endpoints or SIP registrars.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"username": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "The username for authentication."
|
|
11
|
+
},
|
|
12
|
+
"password": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "The password for authentication."
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"required": ["username", "password"]
|
|
18
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/bidirectionalAudio",
|
|
4
|
+
"title": "BidirectionalAudio",
|
|
5
|
+
"description": "Configuration for bidirectional audio streaming over a websocket connection. When enabled, the remote websocket endpoint can send audio back to jambonz to be played to the caller.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"enabled": {
|
|
9
|
+
"type": "boolean",
|
|
10
|
+
"description": "Whether to enable bidirectional audio on the websocket connection."
|
|
11
|
+
},
|
|
12
|
+
"streaming": {
|
|
13
|
+
"type": "boolean",
|
|
14
|
+
"description": "If true, audio is streamed continuously rather than sent as complete messages."
|
|
15
|
+
},
|
|
16
|
+
"sampleRate": {
|
|
17
|
+
"type": "number",
|
|
18
|
+
"description": "The sample rate in Hz for bidirectional audio.",
|
|
19
|
+
"examples": [8000, 16000, 24000]
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/fillerNoise",
|
|
4
|
+
"title": "FillerNoise",
|
|
5
|
+
"description": "Configuration for playing background filler noise (e.g. keyboard typing, hold music) while the application is processing and the caller would otherwise hear silence. Commonly used during LLM response generation to indicate the system is working.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"enable": {
|
|
9
|
+
"type": "boolean",
|
|
10
|
+
"description": "Whether to enable filler noise."
|
|
11
|
+
},
|
|
12
|
+
"url": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"format": "uri",
|
|
15
|
+
"description": "URL of the audio file to play as filler noise. Should be a short, loopable audio clip.",
|
|
16
|
+
"examples": ["https://example.com/sounds/typing.wav"]
|
|
17
|
+
},
|
|
18
|
+
"startDelaySecs": {
|
|
19
|
+
"type": "number",
|
|
20
|
+
"description": "Number of seconds to wait before starting filler noise. Prevents filler noise from playing during brief processing pauses.",
|
|
21
|
+
"examples": [1, 2]
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"required": ["enable"]
|
|
25
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/llm-base",
|
|
4
|
+
"title": "LLM Base Properties",
|
|
5
|
+
"description": "Shared properties for llm, s2s, and vendor-specific s2s verb schemas.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"id": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "An optional unique identifier for this verb instance."
|
|
11
|
+
},
|
|
12
|
+
"vendor": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "The LLM vendor to use.",
|
|
15
|
+
"examples": ["openai", "anthropic", "google", "groq", "deepseek", "deepgram", "ultravox", "custom"]
|
|
16
|
+
},
|
|
17
|
+
"model": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"description": "The specific model to use from the vendor.",
|
|
20
|
+
"examples": ["gpt-4o", "claude-sonnet-4-20250514", "gemini-2.0-flash"]
|
|
21
|
+
},
|
|
22
|
+
"auth": {
|
|
23
|
+
"type": "object",
|
|
24
|
+
"description": "Authentication credentials for the LLM vendor API.",
|
|
25
|
+
"properties": {
|
|
26
|
+
"apiKey": {
|
|
27
|
+
"type": "string",
|
|
28
|
+
"description": "The API key for the LLM vendor."
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"additionalProperties": true
|
|
32
|
+
},
|
|
33
|
+
"connectOptions": {
|
|
34
|
+
"type": "object",
|
|
35
|
+
"description": "Additional connection options for the LLM vendor, such as custom base URLs or API versions.",
|
|
36
|
+
"additionalProperties": true
|
|
37
|
+
},
|
|
38
|
+
"llmOptions": {
|
|
39
|
+
"type": "object",
|
|
40
|
+
"description": "Configuration passed to the LLM including the system prompt, temperature, tools/functions, and other model parameters. The structure varies by vendor but typically includes 'messages' (conversation history), 'temperature', 'tools' (function definitions), and 'maxTokens'.",
|
|
41
|
+
"additionalProperties": true,
|
|
42
|
+
"examples": [
|
|
43
|
+
{
|
|
44
|
+
"messages": [
|
|
45
|
+
{ "role": "system", "content": "You are a helpful customer service agent for Acme Corp." }
|
|
46
|
+
],
|
|
47
|
+
"temperature": 0.7
|
|
48
|
+
}
|
|
49
|
+
]
|
|
50
|
+
},
|
|
51
|
+
"mcpServers": {
|
|
52
|
+
"type": "array",
|
|
53
|
+
"items": {
|
|
54
|
+
"type": "object",
|
|
55
|
+
"properties": {
|
|
56
|
+
"url": {
|
|
57
|
+
"type": "string",
|
|
58
|
+
"format": "uri",
|
|
59
|
+
"description": "The URL of the MCP server."
|
|
60
|
+
},
|
|
61
|
+
"auth": {
|
|
62
|
+
"type": "object",
|
|
63
|
+
"description": "Authentication for the MCP server.",
|
|
64
|
+
"additionalProperties": true
|
|
65
|
+
},
|
|
66
|
+
"roots": {
|
|
67
|
+
"type": "array",
|
|
68
|
+
"items": { "type": "object" },
|
|
69
|
+
"description": "MCP root definitions."
|
|
70
|
+
}
|
|
71
|
+
},
|
|
72
|
+
"required": ["url"]
|
|
73
|
+
},
|
|
74
|
+
"description": "Model Context Protocol servers to connect to. MCP servers provide tools that the LLM can invoke during the conversation."
|
|
75
|
+
},
|
|
76
|
+
"actionHook": {
|
|
77
|
+
"$ref": "actionHook",
|
|
78
|
+
"description": "A webhook invoked when the LLM conversation ends. Receives conversation details and should return the next verbs to execute."
|
|
79
|
+
},
|
|
80
|
+
"eventHook": {
|
|
81
|
+
"$ref": "actionHook",
|
|
82
|
+
"description": "A webhook invoked for real-time events during the LLM conversation (e.g. tool calls, transcription events)."
|
|
83
|
+
},
|
|
84
|
+
"toolHook": {
|
|
85
|
+
"$ref": "actionHook",
|
|
86
|
+
"description": "A webhook invoked when the LLM calls a tool/function. Receives the tool name and arguments, and should return the tool result."
|
|
87
|
+
},
|
|
88
|
+
"events": {
|
|
89
|
+
"type": "array",
|
|
90
|
+
"items": { "type": "string" },
|
|
91
|
+
"description": "List of event types to receive via the eventHook."
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-assemblyAiOptions",
|
|
4
|
+
"title": "AssemblyAI Recognizer Options",
|
|
5
|
+
"description": "AssemblyAI-specific STT options. Only applies when recognizer vendor is 'assemblyai'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"apiKey": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "AssemblyAI API key. Overrides credentials configured in jambonz."
|
|
11
|
+
},
|
|
12
|
+
"serviceVersion": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"enum": ["v2", "v3"],
|
|
15
|
+
"description": "AssemblyAI streaming API version."
|
|
16
|
+
},
|
|
17
|
+
"speechModel": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"description": "AssemblyAI speech model to use for recognition."
|
|
20
|
+
},
|
|
21
|
+
"formatTurns": {
|
|
22
|
+
"type": "boolean",
|
|
23
|
+
"description": "Enable turn-level formatting."
|
|
24
|
+
},
|
|
25
|
+
"endOfTurnConfidenceThreshold": {
|
|
26
|
+
"type": "number",
|
|
27
|
+
"description": "Confidence threshold for end-of-turn detection."
|
|
28
|
+
},
|
|
29
|
+
"minEndOfTurnSilenceWhenConfident": {
|
|
30
|
+
"type": "number",
|
|
31
|
+
"description": "Minimum silence duration (seconds) to trigger end-of-turn when confidence is met."
|
|
32
|
+
},
|
|
33
|
+
"maxTurnSilence": {
|
|
34
|
+
"type": "number",
|
|
35
|
+
"description": "Maximum silence duration (seconds) before forcing end-of-turn."
|
|
36
|
+
},
|
|
37
|
+
"minTurnSilence": {
|
|
38
|
+
"type": "number",
|
|
39
|
+
"description": "Minimum silence duration (seconds) before allowing end-of-turn."
|
|
40
|
+
},
|
|
41
|
+
"keyterms": {
|
|
42
|
+
"type": "array",
|
|
43
|
+
"items": {
|
|
44
|
+
"type": "string"
|
|
45
|
+
},
|
|
46
|
+
"description": "List of key terms to boost in recognition."
|
|
47
|
+
},
|
|
48
|
+
"prompt": {
|
|
49
|
+
"type": "string",
|
|
50
|
+
"description": "Prompt to guide the recognition model."
|
|
51
|
+
},
|
|
52
|
+
"languageDetection": {
|
|
53
|
+
"type": "boolean",
|
|
54
|
+
"description": "Enable automatic language detection."
|
|
55
|
+
},
|
|
56
|
+
"vadThreshold": {
|
|
57
|
+
"type": "number",
|
|
58
|
+
"description": "Voice activity detection threshold."
|
|
59
|
+
},
|
|
60
|
+
"inactivityTimeout": {
|
|
61
|
+
"type": "number",
|
|
62
|
+
"description": "Timeout (seconds) for inactivity before closing the stream."
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
"additionalProperties": false
|
|
66
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-awsOptions",
|
|
4
|
+
"title": "AWS Recognizer Options",
|
|
5
|
+
"description": "AWS Transcribe specific options. Only applies when recognizer vendor is 'aws'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"accessKey": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "AWS access key ID. Overrides credentials configured in jambonz."
|
|
11
|
+
},
|
|
12
|
+
"secretKey": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "AWS secret access key."
|
|
15
|
+
},
|
|
16
|
+
"securityToken": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "AWS temporary security token (for STS/assumed roles)."
|
|
19
|
+
},
|
|
20
|
+
"region": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "AWS region for the Transcribe service."
|
|
23
|
+
},
|
|
24
|
+
"vocabularyName": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "Name of a custom vocabulary to use."
|
|
27
|
+
},
|
|
28
|
+
"vocabularyFilterName": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "Name of a vocabulary filter to apply."
|
|
31
|
+
},
|
|
32
|
+
"vocabularyFilterMethod": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"enum": ["remove", "mask", "tag"],
|
|
35
|
+
"description": "How filtered vocabulary words should be handled."
|
|
36
|
+
},
|
|
37
|
+
"languageModelName": {
|
|
38
|
+
"type": "string",
|
|
39
|
+
"description": "Name of a custom language model."
|
|
40
|
+
},
|
|
41
|
+
"piiEntityTypes": {
|
|
42
|
+
"type": "array",
|
|
43
|
+
"items": { "type": "string" },
|
|
44
|
+
"description": "PII entity types to identify (e.g. 'BANK_ACCOUNT_NUMBER', 'CREDIT_DEBIT_NUMBER')."
|
|
45
|
+
},
|
|
46
|
+
"piiIdentifyEntities": {
|
|
47
|
+
"type": "boolean",
|
|
48
|
+
"description": "Enable PII entity identification."
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
"additionalProperties": false
|
|
52
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-azureOptions",
|
|
4
|
+
"title": "Azure Recognizer Options",
|
|
5
|
+
"description": "Azure Speech Services specific options. Only applies when recognizer vendor is 'microsoft'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"speechSegmentationSilenceTimeoutMs": {
|
|
9
|
+
"type": "number",
|
|
10
|
+
"description": "Silence timeout in milliseconds for speech segmentation."
|
|
11
|
+
},
|
|
12
|
+
"postProcessing": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "Post-processing mode for transcription results."
|
|
15
|
+
},
|
|
16
|
+
"audioLogging": {
|
|
17
|
+
"type": "boolean",
|
|
18
|
+
"description": "Enable audio logging for diagnostics."
|
|
19
|
+
},
|
|
20
|
+
"languageIdMode": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"enum": ["AtStart", "Continuous"],
|
|
23
|
+
"description": "Language identification mode when using multiple languages."
|
|
24
|
+
},
|
|
25
|
+
"speechRecognitionMode": {
|
|
26
|
+
"type": "string",
|
|
27
|
+
"enum": ["CONVERSATION", "DICTATION", "INTERACTIVE"],
|
|
28
|
+
"description": "Speech recognition mode optimized for the interaction type."
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"additionalProperties": false
|
|
32
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-cobaltOptions",
|
|
4
|
+
"title": "Cobalt Recognizer Options",
|
|
5
|
+
"description": "Cobalt-specific STT options. Only applies when recognizer vendor is 'cobalt'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"serverUri": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "Cobalt server URI."
|
|
11
|
+
},
|
|
12
|
+
"enableConfusionNetwork": {
|
|
13
|
+
"type": "boolean",
|
|
14
|
+
"description": "Enable confusion network output."
|
|
15
|
+
},
|
|
16
|
+
"metadata": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "Metadata string to pass to the server."
|
|
19
|
+
},
|
|
20
|
+
"compiledContextData": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "Compiled context data for biasing recognition."
|
|
23
|
+
},
|
|
24
|
+
"wordTimeOffsets": {
|
|
25
|
+
"type": "boolean",
|
|
26
|
+
"description": "Include word-level timestamps."
|
|
27
|
+
},
|
|
28
|
+
"contextToken": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "Context token for server-side context."
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"additionalProperties": false
|
|
34
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-customOptions",
|
|
4
|
+
"title": "Custom Recognizer Options",
|
|
5
|
+
"description": "Options for custom STT vendors. Only applies when recognizer vendor is 'custom'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"authToken": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "Authentication token for the custom STT service."
|
|
11
|
+
},
|
|
12
|
+
"uri": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "WebSocket URI of the custom STT service."
|
|
15
|
+
},
|
|
16
|
+
"sampleRate": {
|
|
17
|
+
"type": "number",
|
|
18
|
+
"description": "Audio sample rate in Hz."
|
|
19
|
+
},
|
|
20
|
+
"options": {
|
|
21
|
+
"type": "object",
|
|
22
|
+
"description": "Additional vendor-specific options passed through to the custom service.",
|
|
23
|
+
"additionalProperties": true
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
"additionalProperties": false
|
|
27
|
+
}
|