@jambonz/mcp-schema-server 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +305 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +135 -0
- package/dist/index.js.map +1 -0
- package/package.json +47 -0
- package/schema/components/actionHook.schema.json +36 -0
- package/schema/components/actionHookDelayAction.schema.json +37 -0
- package/schema/components/auth.schema.json +18 -0
- package/schema/components/bidirectionalAudio.schema.json +22 -0
- package/schema/components/fillerNoise.schema.json +25 -0
- package/schema/components/recognizer.schema.json +280 -0
- package/schema/components/synthesizer.schema.json +82 -0
- package/schema/components/target.schema.json +105 -0
- package/schema/components/vad.schema.json +48 -0
- package/schema/jambonz-app.schema.json +106 -0
- package/schema/verbs/alert.schema.json +20 -0
- package/schema/verbs/answer.schema.json +12 -0
- package/schema/verbs/conference.schema.json +43 -0
- package/schema/verbs/config.schema.json +174 -0
- package/schema/verbs/dequeue.schema.json +36 -0
- package/schema/verbs/dial.schema.json +157 -0
- package/schema/verbs/dtmf.schema.json +27 -0
- package/schema/verbs/dub.schema.json +52 -0
- package/schema/verbs/enqueue.schema.json +38 -0
- package/schema/verbs/gather.schema.json +145 -0
- package/schema/verbs/hangup.schema.json +29 -0
- package/schema/verbs/leave.schema.json +12 -0
- package/schema/verbs/listen.schema.json +110 -0
- package/schema/verbs/llm.schema.json +131 -0
- package/schema/verbs/message.schema.json +30 -0
- package/schema/verbs/pause.schema.json +26 -0
- package/schema/verbs/pipeline.schema.json +61 -0
- package/schema/verbs/play.schema.json +69 -0
- package/schema/verbs/redirect.schema.json +23 -0
- package/schema/verbs/say.schema.json +84 -0
- package/schema/verbs/sip-decline.schema.json +31 -0
- package/schema/verbs/sip-refer.schema.json +41 -0
- package/schema/verbs/sip-request.schema.json +33 -0
- package/schema/verbs/stream.schema.json +30 -0
- package/schema/verbs/tag.schema.json +21 -0
- package/schema/verbs/transcribe.schema.json +44 -0
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/gather",
|
|
4
|
+
"title": "Gather",
|
|
5
|
+
"description": "Collects user input via speech (STT) and/or DTMF digits. Optionally plays a prompt (using nested 'say' or 'play') while listening. When input is received, the result is sent to the actionHook which should return the next set of verbs. This is the primary verb for building interactive voice menus and conversational flows.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"verb": {
|
|
9
|
+
"const": "gather",
|
|
10
|
+
"description": "The verb name."
|
|
11
|
+
},
|
|
12
|
+
"id": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "An optional unique identifier for this verb instance."
|
|
15
|
+
},
|
|
16
|
+
"actionHook": {
|
|
17
|
+
"$ref": "../components/actionHook",
|
|
18
|
+
"description": "The webhook to invoke when input is collected. Receives the transcribed speech and/or DTMF digits. Must return a new array of verbs."
|
|
19
|
+
},
|
|
20
|
+
"input": {
|
|
21
|
+
"type": "array",
|
|
22
|
+
"items": {
|
|
23
|
+
"type": "string",
|
|
24
|
+
"enum": ["speech", "digits"]
|
|
25
|
+
},
|
|
26
|
+
"description": "The types of input to accept. Can include 'speech' (STT), 'digits' (DTMF), or both.",
|
|
27
|
+
"default": ["digits"],
|
|
28
|
+
"examples": [["speech", "digits"], ["speech"], ["digits"]]
|
|
29
|
+
},
|
|
30
|
+
"finishOnKey": {
|
|
31
|
+
"type": "string",
|
|
32
|
+
"description": "A DTMF key that signals the end of digit input. The key itself is not included in the collected digits.",
|
|
33
|
+
"examples": ["#", "*"]
|
|
34
|
+
},
|
|
35
|
+
"numDigits": {
|
|
36
|
+
"type": "number",
|
|
37
|
+
"description": "Exact number of DTMF digits to collect. Gather completes automatically when this many digits are received."
|
|
38
|
+
},
|
|
39
|
+
"minDigits": {
|
|
40
|
+
"type": "number",
|
|
41
|
+
"description": "Minimum number of DTMF digits required."
|
|
42
|
+
},
|
|
43
|
+
"maxDigits": {
|
|
44
|
+
"type": "number",
|
|
45
|
+
"description": "Maximum number of DTMF digits to collect."
|
|
46
|
+
},
|
|
47
|
+
"interDigitTimeout": {
|
|
48
|
+
"type": "number",
|
|
49
|
+
"description": "Time in seconds to wait between DTMF digits before considering input complete.",
|
|
50
|
+
"examples": [5]
|
|
51
|
+
},
|
|
52
|
+
"speechTimeout": {
|
|
53
|
+
"type": "number",
|
|
54
|
+
"description": "Time in seconds of silence after speech before considering the utterance complete.",
|
|
55
|
+
"examples": [2, 3]
|
|
56
|
+
},
|
|
57
|
+
"timeout": {
|
|
58
|
+
"type": "number",
|
|
59
|
+
"description": "Overall timeout in seconds. If no input is received within this time, the gather completes with no input and the actionHook is invoked.",
|
|
60
|
+
"examples": [10, 30]
|
|
61
|
+
},
|
|
62
|
+
"partialResultHook": {
|
|
63
|
+
"$ref": "../components/actionHook",
|
|
64
|
+
"description": "A webhook to invoke with interim (partial) speech recognition results. Useful for providing real-time feedback or early processing."
|
|
65
|
+
},
|
|
66
|
+
"listenDuringPrompt": {
|
|
67
|
+
"type": "boolean",
|
|
68
|
+
"description": "If true, listen for input while the prompt is playing. If false, only start listening after the prompt finishes.",
|
|
69
|
+
"default": true
|
|
70
|
+
},
|
|
71
|
+
"dtmfBargein": {
|
|
72
|
+
"type": "boolean",
|
|
73
|
+
"description": "If true, DTMF input interrupts (barges in on) any playing prompt."
|
|
74
|
+
},
|
|
75
|
+
"bargein": {
|
|
76
|
+
"type": "boolean",
|
|
77
|
+
"description": "If true, speech input interrupts (barges in on) any playing prompt."
|
|
78
|
+
},
|
|
79
|
+
"minBargeinWordCount": {
|
|
80
|
+
"type": "number",
|
|
81
|
+
"description": "Minimum number of words that must be recognized before barge-in is triggered. Prevents brief noises from interrupting prompts.",
|
|
82
|
+
"examples": [1, 2]
|
|
83
|
+
},
|
|
84
|
+
"recognizer": {
|
|
85
|
+
"$ref": "../components/recognizer",
|
|
86
|
+
"description": "Override the session-level STT configuration for this gather."
|
|
87
|
+
},
|
|
88
|
+
"say": {
|
|
89
|
+
"$ref": "say",
|
|
90
|
+
"description": "A nested 'say' verb to use as the prompt. Played to the caller while listening for input."
|
|
91
|
+
},
|
|
92
|
+
"play": {
|
|
93
|
+
"$ref": "play",
|
|
94
|
+
"description": "A nested 'play' verb to use as the prompt. Played to the caller while listening for input."
|
|
95
|
+
},
|
|
96
|
+
"fillerNoise": {
|
|
97
|
+
"$ref": "../components/fillerNoise",
|
|
98
|
+
"description": "Filler noise configuration while waiting for the actionHook to respond."
|
|
99
|
+
},
|
|
100
|
+
"actionHookDelayAction": {
|
|
101
|
+
"$ref": "../components/actionHookDelayAction",
|
|
102
|
+
"description": "Configuration for interim actions while the actionHook is processing."
|
|
103
|
+
}
|
|
104
|
+
},
|
|
105
|
+
"examples": [
|
|
106
|
+
{
|
|
107
|
+
"verb": "gather",
|
|
108
|
+
"input": ["speech", "digits"],
|
|
109
|
+
"actionHook": "/gather-result",
|
|
110
|
+
"timeout": 15,
|
|
111
|
+
"say": {
|
|
112
|
+
"text": "Please say or enter your account number."
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"verb": "gather",
|
|
117
|
+
"input": ["digits"],
|
|
118
|
+
"actionHook": "/menu-selection",
|
|
119
|
+
"numDigits": 1,
|
|
120
|
+
"say": {
|
|
121
|
+
"text": "Press 1 for sales, 2 for support, or 3 for billing."
|
|
122
|
+
}
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
"verb": "gather",
|
|
126
|
+
"input": ["speech"],
|
|
127
|
+
"actionHook": "/process-speech",
|
|
128
|
+
"timeout": 20,
|
|
129
|
+
"bargein": true,
|
|
130
|
+
"recognizer": {
|
|
131
|
+
"vendor": "deepgram",
|
|
132
|
+
"language": "en-US",
|
|
133
|
+
"hints": ["account", "balance", "transfer", "payment"]
|
|
134
|
+
},
|
|
135
|
+
"say": {
|
|
136
|
+
"text": "How can I help you today?"
|
|
137
|
+
},
|
|
138
|
+
"fillerNoise": {
|
|
139
|
+
"enable": true,
|
|
140
|
+
"url": "https://example.com/sounds/typing.wav",
|
|
141
|
+
"startDelaySecs": 2
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
]
|
|
145
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/hangup",
|
|
4
|
+
"title": "Hangup",
|
|
5
|
+
"description": "Terminates the call. Optionally includes custom SIP headers on the BYE request.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"verb": {
|
|
9
|
+
"const": "hangup",
|
|
10
|
+
"description": "The verb name."
|
|
11
|
+
},
|
|
12
|
+
"id": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "An optional unique identifier for this verb instance."
|
|
15
|
+
},
|
|
16
|
+
"headers": {
|
|
17
|
+
"type": "object",
|
|
18
|
+
"description": "Custom SIP headers to include on the BYE request.",
|
|
19
|
+
"additionalProperties": { "type": "string" }
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
"examples": [
|
|
23
|
+
{ "verb": "hangup" },
|
|
24
|
+
{
|
|
25
|
+
"verb": "hangup",
|
|
26
|
+
"headers": { "X-Reason": "call-complete" }
|
|
27
|
+
}
|
|
28
|
+
]
|
|
29
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/leave",
|
|
4
|
+
"title": "Leave",
|
|
5
|
+
"description": "Removes the caller from a conference or queue that they are currently in. Execution continues with the next verb in the application.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"verb": { "const": "leave" },
|
|
9
|
+
"id": { "type": "string", "description": "An optional unique identifier for this verb instance." }
|
|
10
|
+
},
|
|
11
|
+
"examples": [{ "verb": "leave" }]
|
|
12
|
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/listen",
|
|
4
|
+
"title": "Listen",
|
|
5
|
+
"description": "Streams real-time call audio to an external websocket endpoint. The remote endpoint receives raw audio and can optionally send audio back (bidirectional). Used for custom speech processing, real-time analysis, AI agent integration, and recording to external systems.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"verb": {
|
|
9
|
+
"const": "listen",
|
|
10
|
+
"description": "The verb name."
|
|
11
|
+
},
|
|
12
|
+
"id": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "An optional unique identifier for this verb instance."
|
|
15
|
+
},
|
|
16
|
+
"url": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"format": "uri",
|
|
19
|
+
"description": "The websocket URL to stream audio to.",
|
|
20
|
+
"examples": ["wss://myapp.example.com/audio-stream"]
|
|
21
|
+
},
|
|
22
|
+
"actionHook": {
|
|
23
|
+
"$ref": "../components/actionHook",
|
|
24
|
+
"description": "A webhook invoked when the listen session ends. Should return the next verbs to execute."
|
|
25
|
+
},
|
|
26
|
+
"wsAuth": {
|
|
27
|
+
"$ref": "../components/auth",
|
|
28
|
+
"description": "Authentication credentials for the websocket connection."
|
|
29
|
+
},
|
|
30
|
+
"mixType": {
|
|
31
|
+
"type": "string",
|
|
32
|
+
"description": "How to mix the audio channels when streaming. 'mono' sends a single mixed channel, 'stereo' sends caller and callee as separate left/right channels, 'mixed' sends both as a single mixed stream.",
|
|
33
|
+
"enum": ["mono", "stereo", "mixed"],
|
|
34
|
+
"default": "mono"
|
|
35
|
+
},
|
|
36
|
+
"metadata": {
|
|
37
|
+
"type": "object",
|
|
38
|
+
"description": "Arbitrary metadata to send to the websocket endpoint in the initial connection message.",
|
|
39
|
+
"additionalProperties": true
|
|
40
|
+
},
|
|
41
|
+
"sampleRate": {
|
|
42
|
+
"type": "number",
|
|
43
|
+
"description": "The audio sample rate in Hz.",
|
|
44
|
+
"examples": [8000, 16000, 24000],
|
|
45
|
+
"default": 8000
|
|
46
|
+
},
|
|
47
|
+
"finishOnKey": {
|
|
48
|
+
"type": "string",
|
|
49
|
+
"description": "A DTMF key that ends the listen session when pressed.",
|
|
50
|
+
"examples": ["#"]
|
|
51
|
+
},
|
|
52
|
+
"maxLength": {
|
|
53
|
+
"type": "number",
|
|
54
|
+
"description": "Maximum duration in seconds for the listen session."
|
|
55
|
+
},
|
|
56
|
+
"passDtmf": {
|
|
57
|
+
"type": "boolean",
|
|
58
|
+
"description": "If true, forward DTMF events to the websocket endpoint."
|
|
59
|
+
},
|
|
60
|
+
"playBeep": {
|
|
61
|
+
"type": "boolean",
|
|
62
|
+
"description": "If true, play a beep tone before streaming begins."
|
|
63
|
+
},
|
|
64
|
+
"disableBidirectionalAudio": {
|
|
65
|
+
"type": "boolean",
|
|
66
|
+
"description": "If true, disable receiving audio from the websocket endpoint. Audio flows only from the call to the websocket, not back."
|
|
67
|
+
},
|
|
68
|
+
"bidirectionalAudio": {
|
|
69
|
+
"$ref": "../components/bidirectionalAudio",
|
|
70
|
+
"description": "Fine-grained configuration for bidirectional audio."
|
|
71
|
+
},
|
|
72
|
+
"timeout": {
|
|
73
|
+
"type": "number",
|
|
74
|
+
"description": "Time in seconds to wait for audio activity before ending the listen session."
|
|
75
|
+
},
|
|
76
|
+
"transcribe": {
|
|
77
|
+
"type": "object",
|
|
78
|
+
"description": "Configuration for simultaneous real-time transcription of the audio being streamed.",
|
|
79
|
+
"additionalProperties": true
|
|
80
|
+
},
|
|
81
|
+
"earlyMedia": {
|
|
82
|
+
"type": "boolean",
|
|
83
|
+
"description": "If true, begin streaming audio before the call is formally answered."
|
|
84
|
+
},
|
|
85
|
+
"channel": {
|
|
86
|
+
"type": "number",
|
|
87
|
+
"description": "Specific audio channel to stream. Used when streaming a single channel of a multi-channel call."
|
|
88
|
+
}
|
|
89
|
+
},
|
|
90
|
+
"required": ["url"],
|
|
91
|
+
"examples": [
|
|
92
|
+
{
|
|
93
|
+
"verb": "listen",
|
|
94
|
+
"url": "wss://myapp.example.com/audio-stream",
|
|
95
|
+
"actionHook": "/listen-complete",
|
|
96
|
+
"sampleRate": 16000,
|
|
97
|
+
"mixType": "stereo"
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
"verb": "listen",
|
|
101
|
+
"url": "wss://myapp.example.com/ai-agent",
|
|
102
|
+
"bidirectionalAudio": {
|
|
103
|
+
"enabled": true,
|
|
104
|
+
"streaming": true,
|
|
105
|
+
"sampleRate": 24000
|
|
106
|
+
},
|
|
107
|
+
"metadata": { "callType": "support", "language": "en-US" }
|
|
108
|
+
}
|
|
109
|
+
]
|
|
110
|
+
}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/llm",
|
|
4
|
+
"title": "LLM",
|
|
5
|
+
"description": "Connects the caller to a large language model for a real-time voice conversation. Handles the complete STT → LLM → TTS pipeline, including turn detection, interruption handling, and tool/function calling. The caller speaks naturally and the LLM responds via synthesized speech. This is the primary verb for building AI voice agents on jambonz.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"verb": {
|
|
9
|
+
"const": "llm",
|
|
10
|
+
"description": "The verb name."
|
|
11
|
+
},
|
|
12
|
+
"id": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "An optional unique identifier for this verb instance."
|
|
15
|
+
},
|
|
16
|
+
"vendor": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "The LLM vendor to use.",
|
|
19
|
+
"examples": ["openai", "anthropic", "google", "groq", "deepseek", "custom"]
|
|
20
|
+
},
|
|
21
|
+
"model": {
|
|
22
|
+
"type": "string",
|
|
23
|
+
"description": "The specific model to use from the vendor.",
|
|
24
|
+
"examples": ["gpt-4o", "claude-sonnet-4-20250514", "gemini-2.0-flash"]
|
|
25
|
+
},
|
|
26
|
+
"auth": {
|
|
27
|
+
"type": "object",
|
|
28
|
+
"description": "Authentication credentials for the LLM vendor API.",
|
|
29
|
+
"properties": {
|
|
30
|
+
"apiKey": {
|
|
31
|
+
"type": "string",
|
|
32
|
+
"description": "The API key for the LLM vendor."
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
"additionalProperties": true
|
|
36
|
+
},
|
|
37
|
+
"connectOptions": {
|
|
38
|
+
"type": "object",
|
|
39
|
+
"description": "Additional connection options for the LLM vendor, such as custom base URLs or API versions.",
|
|
40
|
+
"additionalProperties": true
|
|
41
|
+
},
|
|
42
|
+
"llmOptions": {
|
|
43
|
+
"type": "object",
|
|
44
|
+
"description": "Configuration passed to the LLM including the system prompt, temperature, tools/functions, and other model parameters. The structure varies by vendor but typically includes 'messages' (conversation history), 'temperature', 'tools' (function definitions), and 'maxTokens'.",
|
|
45
|
+
"additionalProperties": true,
|
|
46
|
+
"examples": [
|
|
47
|
+
{
|
|
48
|
+
"messages": [
|
|
49
|
+
{ "role": "system", "content": "You are a helpful customer service agent for Acme Corp." }
|
|
50
|
+
],
|
|
51
|
+
"temperature": 0.7,
|
|
52
|
+
"tools": [
|
|
53
|
+
{
|
|
54
|
+
"type": "function",
|
|
55
|
+
"function": {
|
|
56
|
+
"name": "lookupOrder",
|
|
57
|
+
"description": "Look up an order by order number",
|
|
58
|
+
"parameters": {
|
|
59
|
+
"type": "object",
|
|
60
|
+
"properties": {
|
|
61
|
+
"orderNumber": { "type": "string" }
|
|
62
|
+
},
|
|
63
|
+
"required": ["orderNumber"]
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
]
|
|
68
|
+
}
|
|
69
|
+
]
|
|
70
|
+
},
|
|
71
|
+
"mcpServers": {
|
|
72
|
+
"type": "array",
|
|
73
|
+
"items": {
|
|
74
|
+
"type": "object",
|
|
75
|
+
"properties": {
|
|
76
|
+
"url": {
|
|
77
|
+
"type": "string",
|
|
78
|
+
"format": "uri",
|
|
79
|
+
"description": "The URL of the MCP server."
|
|
80
|
+
},
|
|
81
|
+
"auth": {
|
|
82
|
+
"type": "object",
|
|
83
|
+
"description": "Authentication for the MCP server.",
|
|
84
|
+
"additionalProperties": true
|
|
85
|
+
},
|
|
86
|
+
"roots": {
|
|
87
|
+
"type": "array",
|
|
88
|
+
"items": { "type": "object" },
|
|
89
|
+
"description": "MCP root definitions."
|
|
90
|
+
}
|
|
91
|
+
},
|
|
92
|
+
"required": ["url"]
|
|
93
|
+
},
|
|
94
|
+
"description": "Model Context Protocol servers to connect to. MCP servers provide tools that the LLM can invoke during the conversation."
|
|
95
|
+
},
|
|
96
|
+
"actionHook": {
|
|
97
|
+
"$ref": "../components/actionHook",
|
|
98
|
+
"description": "A webhook invoked when the LLM conversation ends. Receives conversation details and should return the next verbs to execute."
|
|
99
|
+
},
|
|
100
|
+
"eventHook": {
|
|
101
|
+
"$ref": "../components/actionHook",
|
|
102
|
+
"description": "A webhook invoked for real-time events during the LLM conversation (e.g. tool calls, transcription events)."
|
|
103
|
+
},
|
|
104
|
+
"toolHook": {
|
|
105
|
+
"$ref": "../components/actionHook",
|
|
106
|
+
"description": "A webhook invoked when the LLM calls a tool/function. Receives the tool name and arguments, and should return the tool result."
|
|
107
|
+
},
|
|
108
|
+
"events": {
|
|
109
|
+
"type": "array",
|
|
110
|
+
"items": { "type": "string" },
|
|
111
|
+
"description": "List of event types to receive via the eventHook."
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
"required": ["vendor", "llmOptions"],
|
|
115
|
+
"examples": [
|
|
116
|
+
{
|
|
117
|
+
"verb": "llm",
|
|
118
|
+
"vendor": "openai",
|
|
119
|
+
"model": "gpt-4o",
|
|
120
|
+
"auth": { "apiKey": "sk-..." },
|
|
121
|
+
"llmOptions": {
|
|
122
|
+
"messages": [
|
|
123
|
+
{ "role": "system", "content": "You are a helpful customer service agent. Be concise and friendly." }
|
|
124
|
+
],
|
|
125
|
+
"temperature": 0.7
|
|
126
|
+
},
|
|
127
|
+
"actionHook": "/llm-complete",
|
|
128
|
+
"toolHook": "/llm-tool-call"
|
|
129
|
+
}
|
|
130
|
+
]
|
|
131
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/message",
|
|
4
|
+
"title": "Message",
|
|
5
|
+
"description": "Sends an SMS or MMS message. Can be used during a voice call to send a text message to the caller or another party, or as a standalone action.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"verb": { "const": "message" },
|
|
9
|
+
"id": { "type": "string", "description": "An optional unique identifier for this verb instance." },
|
|
10
|
+
"to": { "type": "string", "description": "The destination phone number in E.164 format.", "examples": ["+15085551212"] },
|
|
11
|
+
"from": { "type": "string", "description": "The sender phone number in E.164 format. Must be a number provisioned on the jambonz platform.", "examples": ["+15085559876"] },
|
|
12
|
+
"text": { "type": "string", "description": "The text content of the message." },
|
|
13
|
+
"media": {
|
|
14
|
+
"oneOf": [
|
|
15
|
+
{ "type": "string", "format": "uri" },
|
|
16
|
+
{ "type": "array", "items": { "type": "string", "format": "uri" } }
|
|
17
|
+
],
|
|
18
|
+
"description": "URL(s) of media to attach to the message (MMS). Can be images, audio, or video.",
|
|
19
|
+
"examples": ["https://example.com/images/receipt.png"]
|
|
20
|
+
},
|
|
21
|
+
"carrier": { "type": "string", "description": "The messaging carrier to use. If not specified, the default carrier is used." },
|
|
22
|
+
"account_sid": { "type": "string", "description": "The account SID to use for sending. Defaults to the current account." },
|
|
23
|
+
"message_sid": { "type": "string", "description": "An optional message SID for tracking." },
|
|
24
|
+
"actionHook": { "$ref": "../components/actionHook", "description": "A webhook invoked when the message send completes or fails." }
|
|
25
|
+
},
|
|
26
|
+
"required": ["to", "from"],
|
|
27
|
+
"examples": [
|
|
28
|
+
{ "verb": "message", "to": "+15085551212", "from": "+15085559876", "text": "Your order has been confirmed. Order #12345." }
|
|
29
|
+
]
|
|
30
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/pause",
|
|
4
|
+
"title": "Pause",
|
|
5
|
+
"description": "Pauses execution for a specified number of seconds. The caller hears silence during the pause. Useful for adding delays between verbs.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"verb": {
|
|
9
|
+
"const": "pause",
|
|
10
|
+
"description": "The verb name."
|
|
11
|
+
},
|
|
12
|
+
"id": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "An optional unique identifier for this verb instance."
|
|
15
|
+
},
|
|
16
|
+
"length": {
|
|
17
|
+
"type": "number",
|
|
18
|
+
"description": "The duration of the pause in seconds.",
|
|
19
|
+
"examples": [1, 2, 5]
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
"required": ["length"],
|
|
23
|
+
"examples": [
|
|
24
|
+
{ "verb": "pause", "length": 2 }
|
|
25
|
+
]
|
|
26
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/pipeline",
|
|
4
|
+
"title": "Pipeline",
|
|
5
|
+
"description": "Configures a complete STT → LLM → TTS voice AI pipeline with integrated turn detection. Provides a higher-level abstraction than manually orchestrating the individual components. Optimized for building voice AI agents with proper turn-taking behavior.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"verb": { "const": "pipeline" },
|
|
9
|
+
"id": { "type": "string", "description": "An optional unique identifier for this verb instance." },
|
|
10
|
+
"stt": {
|
|
11
|
+
"$ref": "../components/recognizer",
|
|
12
|
+
"description": "Speech-to-text configuration for the pipeline."
|
|
13
|
+
},
|
|
14
|
+
"tts": {
|
|
15
|
+
"$ref": "../components/synthesizer",
|
|
16
|
+
"description": "Text-to-speech configuration for the pipeline."
|
|
17
|
+
},
|
|
18
|
+
"vad": {
|
|
19
|
+
"$ref": "../components/vad",
|
|
20
|
+
"description": "Voice activity detection configuration for the pipeline."
|
|
21
|
+
},
|
|
22
|
+
"turnDetection": {
|
|
23
|
+
"type": "object",
|
|
24
|
+
"description": "Turn detection configuration. Determines when the user has finished speaking and it's the AI's turn to respond.",
|
|
25
|
+
"properties": {
|
|
26
|
+
"vendor": { "type": "string", "enum": ["krisp"], "description": "The turn detection vendor." },
|
|
27
|
+
"threshold": { "type": "number", "description": "Confidence threshold for turn detection." },
|
|
28
|
+
"eagerEotThreshold": { "type": "number", "description": "Threshold for eager end-of-turn detection. Lower values cause earlier turn transitions." }
|
|
29
|
+
},
|
|
30
|
+
"required": ["vendor"]
|
|
31
|
+
},
|
|
32
|
+
"llm": {
|
|
33
|
+
"type": "object",
|
|
34
|
+
"description": "LLM configuration for the pipeline. See the 'llm' verb schema for details.",
|
|
35
|
+
"additionalProperties": true
|
|
36
|
+
},
|
|
37
|
+
"preflightLlm": {
|
|
38
|
+
"type": "boolean",
|
|
39
|
+
"description": "If true, establish the LLM connection before the call starts to reduce latency on the first interaction."
|
|
40
|
+
},
|
|
41
|
+
"actionHook": { "$ref": "../components/actionHook", "description": "A webhook invoked when the pipeline ends." },
|
|
42
|
+
"eventHook": { "$ref": "../components/actionHook", "description": "A webhook invoked for pipeline events." }
|
|
43
|
+
},
|
|
44
|
+
"required": ["stt", "llm", "tts"],
|
|
45
|
+
"examples": [
|
|
46
|
+
{
|
|
47
|
+
"verb": "pipeline",
|
|
48
|
+
"stt": { "vendor": "deepgram", "language": "en-US" },
|
|
49
|
+
"tts": { "vendor": "cartesia", "voice": "sonic-english" },
|
|
50
|
+
"llm": {
|
|
51
|
+
"vendor": "openai",
|
|
52
|
+
"model": "gpt-4o",
|
|
53
|
+
"llmOptions": {
|
|
54
|
+
"messages": [{ "role": "system", "content": "You are a helpful voice assistant." }]
|
|
55
|
+
}
|
|
56
|
+
},
|
|
57
|
+
"turnDetection": { "vendor": "krisp", "threshold": 0.5 },
|
|
58
|
+
"actionHook": "/pipeline-complete"
|
|
59
|
+
}
|
|
60
|
+
]
|
|
61
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/play",
|
|
4
|
+
"title": "Play",
|
|
5
|
+
"description": "Plays an audio file to the caller. Supports WAV and MP3 formats hosted at a URL. Can play a single file or cycle through a list of files.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"verb": {
|
|
9
|
+
"const": "play",
|
|
10
|
+
"description": "The verb name."
|
|
11
|
+
},
|
|
12
|
+
"id": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "An optional unique identifier for this verb instance."
|
|
15
|
+
},
|
|
16
|
+
"url": {
|
|
17
|
+
"oneOf": [
|
|
18
|
+
{ "type": "string", "format": "uri" },
|
|
19
|
+
{
|
|
20
|
+
"type": "array",
|
|
21
|
+
"items": { "type": "string", "format": "uri" }
|
|
22
|
+
}
|
|
23
|
+
],
|
|
24
|
+
"description": "The URL(s) of the audio file(s) to play. Supports WAV and MP3. If an array, files are played in sequence.",
|
|
25
|
+
"examples": [
|
|
26
|
+
"https://example.com/sounds/greeting.wav",
|
|
27
|
+
["https://example.com/sounds/part1.wav", "https://example.com/sounds/part2.wav"]
|
|
28
|
+
]
|
|
29
|
+
},
|
|
30
|
+
"loop": {
|
|
31
|
+
"oneOf": [
|
|
32
|
+
{ "type": "number" },
|
|
33
|
+
{ "type": "string" }
|
|
34
|
+
],
|
|
35
|
+
"description": "Number of times to repeat playback. Use 0 or 'forever' to loop indefinitely until interrupted.",
|
|
36
|
+
"examples": [3, "forever"]
|
|
37
|
+
},
|
|
38
|
+
"earlyMedia": {
|
|
39
|
+
"type": "boolean",
|
|
40
|
+
"description": "If true, play the audio as early media before the call is answered."
|
|
41
|
+
},
|
|
42
|
+
"seekOffset": {
|
|
43
|
+
"oneOf": [
|
|
44
|
+
{ "type": "number" },
|
|
45
|
+
{ "type": "string" }
|
|
46
|
+
],
|
|
47
|
+
"description": "Start playback at this offset in seconds from the beginning of the file."
|
|
48
|
+
},
|
|
49
|
+
"timeoutSecs": {
|
|
50
|
+
"oneOf": [
|
|
51
|
+
{ "type": "number" },
|
|
52
|
+
{ "type": "string" }
|
|
53
|
+
],
|
|
54
|
+
"description": "Maximum time in seconds to play the audio. Playback stops after this duration even if the file has not finished."
|
|
55
|
+
},
|
|
56
|
+
"actionHook": {
|
|
57
|
+
"$ref": "../components/actionHook",
|
|
58
|
+
"description": "A webhook to invoke when playback completes."
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
"required": ["url"],
|
|
62
|
+
"examples": [
|
|
63
|
+
{
|
|
64
|
+
"verb": "play",
|
|
65
|
+
"url": "https://example.com/sounds/hold-music.mp3",
|
|
66
|
+
"loop": "forever"
|
|
67
|
+
}
|
|
68
|
+
]
|
|
69
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/verbs/redirect",
|
|
4
|
+
"title": "Redirect",
|
|
5
|
+
"description": "Transfers call control to a different webhook URL. The current verb stack is abandoned and the new webhook's response becomes the active application. Useful for modular application design where different URLs handle different phases of a call.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"verb": { "const": "redirect" },
|
|
9
|
+
"id": { "type": "string", "description": "An optional unique identifier for this verb instance." },
|
|
10
|
+
"actionHook": {
|
|
11
|
+
"$ref": "../components/actionHook",
|
|
12
|
+
"description": "The webhook to transfer control to. Must return a new array of verbs."
|
|
13
|
+
},
|
|
14
|
+
"statusHook": {
|
|
15
|
+
"$ref": "../components/actionHook",
|
|
16
|
+
"description": "A webhook to receive call status events after the redirect."
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"required": ["actionHook"],
|
|
20
|
+
"examples": [
|
|
21
|
+
{ "verb": "redirect", "actionHook": "/new-handler" }
|
|
22
|
+
]
|
|
23
|
+
}
|