@jambonz/schema 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +974 -0
- package/callbacks/amd.schema.json +50 -0
- package/callbacks/base.schema.json +29 -0
- package/callbacks/call-status.schema.json +22 -0
- package/callbacks/conference-status.schema.json +24 -0
- package/callbacks/conference-wait.schema.json +11 -0
- package/callbacks/conference.schema.json +11 -0
- package/callbacks/dequeue.schema.json +19 -0
- package/callbacks/dial-dtmf.schema.json +18 -0
- package/callbacks/dial-hold.schema.json +22 -0
- package/callbacks/dial-refer.schema.json +28 -0
- package/callbacks/dial.schema.json +31 -0
- package/callbacks/enqueue-wait.schema.json +17 -0
- package/callbacks/enqueue.schema.json +27 -0
- package/callbacks/gather-partial.schema.json +54 -0
- package/callbacks/gather.schema.json +60 -0
- package/callbacks/listen.schema.json +21 -0
- package/callbacks/llm.schema.json +30 -0
- package/callbacks/message.schema.json +35 -0
- package/callbacks/pipeline-turn.schema.json +109 -0
- package/callbacks/play.schema.json +36 -0
- package/callbacks/session-new.schema.json +143 -0
- package/callbacks/session-reconnect.schema.json +9 -0
- package/callbacks/session-redirect.schema.json +38 -0
- package/callbacks/sip-refer-event.schema.json +20 -0
- package/callbacks/sip-refer.schema.json +22 -0
- package/callbacks/sip-request.schema.json +27 -0
- package/callbacks/transcribe-translation.schema.json +24 -0
- package/callbacks/transcribe.schema.json +46 -0
- package/callbacks/tts-streaming-event.schema.json +77 -0
- package/callbacks/verb-status.schema.json +57 -0
- package/components/actionHook.schema.json +36 -0
- package/components/actionHookDelayAction.schema.json +37 -0
- package/components/amd.schema.json +68 -0
- package/components/auth.schema.json +18 -0
- package/components/bidirectionalAudio.schema.json +22 -0
- package/components/fillerNoise.schema.json +25 -0
- package/components/llm-base.schema.json +94 -0
- package/components/recognizer-assemblyAiOptions.schema.json +66 -0
- package/components/recognizer-awsOptions.schema.json +52 -0
- package/components/recognizer-azureOptions.schema.json +32 -0
- package/components/recognizer-cobaltOptions.schema.json +34 -0
- package/components/recognizer-customOptions.schema.json +27 -0
- package/components/recognizer-deepgramOptions.schema.json +147 -0
- package/components/recognizer-elevenlabsOptions.schema.json +39 -0
- package/components/recognizer-gladiaOptions.schema.json +8 -0
- package/components/recognizer-googleOptions.schema.json +35 -0
- package/components/recognizer-houndifyOptions.schema.json +53 -0
- package/components/recognizer-ibmOptions.schema.json +54 -0
- package/components/recognizer-nuanceOptions.schema.json +150 -0
- package/components/recognizer-nvidiaOptions.schema.json +39 -0
- package/components/recognizer-openaiOptions.schema.json +59 -0
- package/components/recognizer-sonioxOptions.schema.json +46 -0
- package/components/recognizer-speechmaticsOptions.schema.json +100 -0
- package/components/recognizer-verbioOptions.schema.json +46 -0
- package/components/recognizer.schema.json +216 -0
- package/components/synthesizer.schema.json +82 -0
- package/components/target.schema.json +105 -0
- package/components/vad.schema.json +48 -0
- package/docs/components/recognizer.md +78 -0
- package/docs/components/synthesizer.md +27 -0
- package/docs/guides/session-commands.md +417 -0
- package/docs/verbs/conference.md +51 -0
- package/docs/verbs/deepgram_s2s.md +108 -0
- package/docs/verbs/dial.md +8 -0
- package/docs/verbs/listen.md +71 -0
- package/docs/verbs/pipeline.md +475 -0
- package/docs/verbs/stream.md +5 -0
- package/index.js +9 -0
- package/jambonz-app.schema.json +112 -0
- package/lib/normalize.js +72 -0
- package/lib/validator.js +137 -0
- package/package.json +39 -0
- package/verbs/alert.schema.json +34 -0
- package/verbs/answer.schema.json +22 -0
- package/verbs/conference.schema.json +107 -0
- package/verbs/config.schema.json +218 -0
- package/verbs/deepgram_s2s.schema.json +81 -0
- package/verbs/dequeue.schema.json +51 -0
- package/verbs/dial.schema.json +187 -0
- package/verbs/dialogflow.schema.json +148 -0
- package/verbs/dtmf.schema.json +49 -0
- package/verbs/dub.schema.json +103 -0
- package/verbs/elevenlabs_s2s.schema.json +81 -0
- package/verbs/enqueue.schema.json +53 -0
- package/verbs/gather.schema.json +188 -0
- package/verbs/google_s2s.schema.json +42 -0
- package/verbs/hangup.schema.json +36 -0
- package/verbs/leave.schema.json +22 -0
- package/verbs/listen.schema.json +127 -0
- package/verbs/llm.schema.json +44 -0
- package/verbs/message.schema.json +82 -0
- package/verbs/openai_s2s.schema.json +42 -0
- package/verbs/pause.schema.json +36 -0
- package/verbs/pipeline.schema.json +240 -0
- package/verbs/play.schema.json +96 -0
- package/verbs/redirect.schema.json +34 -0
- package/verbs/s2s.schema.json +39 -0
- package/verbs/say.schema.json +107 -0
- package/verbs/sip-decline.schema.json +58 -0
- package/verbs/sip-refer.schema.json +58 -0
- package/verbs/sip-request.schema.json +54 -0
- package/verbs/stream.schema.json +103 -0
- package/verbs/tag.schema.json +41 -0
- package/verbs/transcribe.schema.json +57 -0
- package/verbs/ultravox_s2s.schema.json +41 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/actionHookDelayAction",
|
|
4
|
+
"title": "ActionHookDelayAction",
|
|
5
|
+
"description": "Configuration for what to do when an actionHook (webhook) takes a long time to respond. Allows playing interim content (e.g. 'please wait' messages, hold music) while waiting for the webhook response, with configurable retry and give-up behavior.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"enabled": {
|
|
9
|
+
"type": "boolean",
|
|
10
|
+
"description": "Whether to enable delay handling for actionHooks."
|
|
11
|
+
},
|
|
12
|
+
"noResponseTimeout": {
|
|
13
|
+
"type": "number",
|
|
14
|
+
"description": "Time in seconds to wait before executing the delay actions. If the webhook responds before this timeout, the delay actions are skipped.",
|
|
15
|
+
"examples": [3, 5]
|
|
16
|
+
},
|
|
17
|
+
"noResponseGiveUpTimeout": {
|
|
18
|
+
"type": "number",
|
|
19
|
+
"description": "Total time in seconds to wait for a webhook response before giving up and executing the giveUpActions.",
|
|
20
|
+
"examples": [30, 60]
|
|
21
|
+
},
|
|
22
|
+
"retries": {
|
|
23
|
+
"type": "number",
|
|
24
|
+
"description": "Number of times to retry the delay actions while still waiting for the webhook response."
|
|
25
|
+
},
|
|
26
|
+
"actions": {
|
|
27
|
+
"type": "array",
|
|
28
|
+
"description": "An array of jambonz verbs to execute while waiting for the webhook response. Typically 'say' or 'play' verbs with messages like 'please hold'.",
|
|
29
|
+
"items": { "type": "object" }
|
|
30
|
+
},
|
|
31
|
+
"giveUpActions": {
|
|
32
|
+
"type": "array",
|
|
33
|
+
"description": "An array of jambonz verbs to execute if the webhook never responds within the giveUpTimeout. Typically an error message and/or hangup.",
|
|
34
|
+
"items": { "type": "object" }
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/amd",
|
|
4
|
+
"title": "Answering Machine Detection",
|
|
5
|
+
"description": "Configuration for answering machine detection (AMD). Detects whether an outbound or inbound call was answered by a human or a machine. Used as a nested property on the 'config' or 'dial' verb. IMPORTANT: AMD runs asynchronously in the background. When using AMD with the 'config' verb, you MUST follow it with a 'pause' verb (e.g. pause({ length: 25 })) to keep the call alive while AMD detection runs. Without a pause, the call will end immediately after config completes.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"actionHook": {
|
|
9
|
+
"$ref": "actionHook",
|
|
10
|
+
"description": "Webhook to receive AMD events (amd_human_detected, amd_machine_detected, amd_no_speech_detected, amd_decision_timeout, amd_machine_stopped_speaking, amd_tone_detected, amd_error, amd_stopped)."
|
|
11
|
+
},
|
|
12
|
+
"thresholdWordCount": {
|
|
13
|
+
"type": "number",
|
|
14
|
+
"description": "Number of spoken words in a greeting that triggers an amd_machine_detected result.",
|
|
15
|
+
"default": 9
|
|
16
|
+
},
|
|
17
|
+
"digitCount": {
|
|
18
|
+
"type": "number",
|
|
19
|
+
"description": "Number of digits in a greeting to trigger detection. 0 disables digit-based detection.",
|
|
20
|
+
"default": 0
|
|
21
|
+
},
|
|
22
|
+
"timers": {
|
|
23
|
+
"type": "object",
|
|
24
|
+
"description": "Timer settings controlling AMD detection windows.",
|
|
25
|
+
"properties": {
|
|
26
|
+
"noSpeechTimeoutMs": {
|
|
27
|
+
"type": "number",
|
|
28
|
+
"description": "Milliseconds to wait for any speech before returning amd_no_speech_detected.",
|
|
29
|
+
"default": 5000
|
|
30
|
+
},
|
|
31
|
+
"decisionTimeoutMs": {
|
|
32
|
+
"type": "number",
|
|
33
|
+
"description": "Milliseconds before returning amd_decision_timeout if no determination is made.",
|
|
34
|
+
"default": 15000
|
|
35
|
+
},
|
|
36
|
+
"toneTimeoutMs": {
|
|
37
|
+
"type": "number",
|
|
38
|
+
"description": "Milliseconds to wait for beep/tone detection.",
|
|
39
|
+
"default": 20000
|
|
40
|
+
},
|
|
41
|
+
"greetingCompletionTimeoutMs": {
|
|
42
|
+
"type": "number",
|
|
43
|
+
"description": "Milliseconds of silence after speech before determining the machine greeting is complete. Automatically reduced to 1000ms if a beep is detected.",
|
|
44
|
+
"default": 2000
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
"additionalProperties": false
|
|
48
|
+
},
|
|
49
|
+
"recognizer": {
|
|
50
|
+
"$ref": "recognizer",
|
|
51
|
+
"description": "Override the STT recognizer used for AMD speech detection. When omitted, AMD uses the session default recognizer with enhancedModel enabled."
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
"required": ["actionHook"],
|
|
55
|
+
"examples": [
|
|
56
|
+
{
|
|
57
|
+
"actionHook": "/amd-events"
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"actionHook": "/amd-events",
|
|
61
|
+
"thresholdWordCount": 6,
|
|
62
|
+
"timers": {
|
|
63
|
+
"noSpeechTimeoutMs": 3000,
|
|
64
|
+
"decisionTimeoutMs": 10000
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
]
|
|
68
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/auth",
|
|
4
|
+
"title": "Auth",
|
|
5
|
+
"description": "Basic authentication credentials, used for authenticating with external services such as websocket endpoints or SIP registrars.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"username": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "The username for authentication."
|
|
11
|
+
},
|
|
12
|
+
"password": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "The password for authentication."
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"required": ["username", "password"]
|
|
18
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/bidirectionalAudio",
|
|
4
|
+
"title": "BidirectionalAudio",
|
|
5
|
+
"description": "Configuration for bidirectional audio streaming over a websocket connection. When enabled, the remote websocket endpoint can send audio back to jambonz to be played to the caller.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"enabled": {
|
|
9
|
+
"type": "boolean",
|
|
10
|
+
"description": "Whether to enable bidirectional audio on the websocket connection."
|
|
11
|
+
},
|
|
12
|
+
"streaming": {
|
|
13
|
+
"type": "boolean",
|
|
14
|
+
"description": "If true, audio is streamed continuously rather than sent as complete messages."
|
|
15
|
+
},
|
|
16
|
+
"sampleRate": {
|
|
17
|
+
"type": "number",
|
|
18
|
+
"description": "The sample rate in Hz for bidirectional audio.",
|
|
19
|
+
"examples": [8000, 16000, 24000]
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/fillerNoise",
|
|
4
|
+
"title": "FillerNoise",
|
|
5
|
+
"description": "Configuration for playing background filler noise (e.g. keyboard typing, hold music) while the application is processing and the caller would otherwise hear silence. Commonly used during LLM response generation to indicate the system is working.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"enable": {
|
|
9
|
+
"type": "boolean",
|
|
10
|
+
"description": "Whether to enable filler noise."
|
|
11
|
+
},
|
|
12
|
+
"url": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"format": "uri",
|
|
15
|
+
"description": "URL of the audio file to play as filler noise. Should be a short, loopable audio clip.",
|
|
16
|
+
"examples": ["https://example.com/sounds/typing.wav"]
|
|
17
|
+
},
|
|
18
|
+
"startDelaySecs": {
|
|
19
|
+
"type": "number",
|
|
20
|
+
"description": "Number of seconds to wait before starting filler noise. Prevents filler noise from playing during brief processing pauses.",
|
|
21
|
+
"examples": [1, 2]
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"required": ["enable"]
|
|
25
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/llm-base",
|
|
4
|
+
"title": "LLM Base Properties",
|
|
5
|
+
"description": "Shared properties for llm, s2s, and vendor-specific s2s verb schemas.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"id": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "An optional unique identifier for this verb instance."
|
|
11
|
+
},
|
|
12
|
+
"vendor": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "The LLM vendor to use.",
|
|
15
|
+
"examples": ["openai", "anthropic", "google", "groq", "deepseek", "deepgram", "ultravox", "custom"]
|
|
16
|
+
},
|
|
17
|
+
"model": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"description": "The specific model to use from the vendor.",
|
|
20
|
+
"examples": ["gpt-4o", "claude-sonnet-4-20250514", "gemini-2.0-flash"]
|
|
21
|
+
},
|
|
22
|
+
"auth": {
|
|
23
|
+
"type": "object",
|
|
24
|
+
"description": "Authentication credentials for the LLM vendor API.",
|
|
25
|
+
"properties": {
|
|
26
|
+
"apiKey": {
|
|
27
|
+
"type": "string",
|
|
28
|
+
"description": "The API key for the LLM vendor."
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"additionalProperties": true
|
|
32
|
+
},
|
|
33
|
+
"connectOptions": {
|
|
34
|
+
"type": "object",
|
|
35
|
+
"description": "Additional connection options for the LLM vendor, such as custom base URLs or API versions.",
|
|
36
|
+
"additionalProperties": true
|
|
37
|
+
},
|
|
38
|
+
"llmOptions": {
|
|
39
|
+
"type": "object",
|
|
40
|
+
"description": "Configuration passed to the LLM including the system prompt, temperature, tools/functions, and other model parameters. The structure varies by vendor but typically includes 'messages' (conversation history), 'temperature', 'tools' (function definitions), and 'maxTokens'.",
|
|
41
|
+
"additionalProperties": true,
|
|
42
|
+
"examples": [
|
|
43
|
+
{
|
|
44
|
+
"messages": [
|
|
45
|
+
{ "role": "system", "content": "You are a helpful customer service agent for Acme Corp." }
|
|
46
|
+
],
|
|
47
|
+
"temperature": 0.7
|
|
48
|
+
}
|
|
49
|
+
]
|
|
50
|
+
},
|
|
51
|
+
"mcpServers": {
|
|
52
|
+
"type": "array",
|
|
53
|
+
"items": {
|
|
54
|
+
"type": "object",
|
|
55
|
+
"properties": {
|
|
56
|
+
"url": {
|
|
57
|
+
"type": "string",
|
|
58
|
+
"format": "uri",
|
|
59
|
+
"description": "The URL of the MCP server."
|
|
60
|
+
},
|
|
61
|
+
"auth": {
|
|
62
|
+
"type": "object",
|
|
63
|
+
"description": "Authentication for the MCP server.",
|
|
64
|
+
"additionalProperties": true
|
|
65
|
+
},
|
|
66
|
+
"roots": {
|
|
67
|
+
"type": "array",
|
|
68
|
+
"items": { "type": "object" },
|
|
69
|
+
"description": "MCP root definitions."
|
|
70
|
+
}
|
|
71
|
+
},
|
|
72
|
+
"required": ["url"]
|
|
73
|
+
},
|
|
74
|
+
"description": "Model Context Protocol servers to connect to. MCP servers provide tools that the LLM can invoke during the conversation."
|
|
75
|
+
},
|
|
76
|
+
"actionHook": {
|
|
77
|
+
"$ref": "actionHook",
|
|
78
|
+
"description": "A webhook invoked when the LLM conversation ends. Receives conversation details and should return the next verbs to execute."
|
|
79
|
+
},
|
|
80
|
+
"eventHook": {
|
|
81
|
+
"$ref": "actionHook",
|
|
82
|
+
"description": "A webhook invoked for real-time events during the LLM conversation (e.g. tool calls, transcription events)."
|
|
83
|
+
},
|
|
84
|
+
"toolHook": {
|
|
85
|
+
"$ref": "actionHook",
|
|
86
|
+
"description": "A webhook invoked when the LLM calls a tool/function. Receives the tool name and arguments, and should return the tool result."
|
|
87
|
+
},
|
|
88
|
+
"events": {
|
|
89
|
+
"type": "array",
|
|
90
|
+
"items": { "type": "string" },
|
|
91
|
+
"description": "List of event types to receive via the eventHook."
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-assemblyAiOptions",
|
|
4
|
+
"title": "AssemblyAI Recognizer Options",
|
|
5
|
+
"description": "AssemblyAI-specific STT options. Only applies when recognizer vendor is 'assemblyai'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"apiKey": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "AssemblyAI API key. Overrides credentials configured in jambonz."
|
|
11
|
+
},
|
|
12
|
+
"serviceVersion": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"enum": ["v2", "v3"],
|
|
15
|
+
"description": "AssemblyAI streaming API version."
|
|
16
|
+
},
|
|
17
|
+
"speechModel": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"description": "AssemblyAI speech model to use for recognition."
|
|
20
|
+
},
|
|
21
|
+
"formatTurns": {
|
|
22
|
+
"type": "boolean",
|
|
23
|
+
"description": "Enable turn-level formatting."
|
|
24
|
+
},
|
|
25
|
+
"endOfTurnConfidenceThreshold": {
|
|
26
|
+
"type": "number",
|
|
27
|
+
"description": "Confidence threshold for end-of-turn detection."
|
|
28
|
+
},
|
|
29
|
+
"minEndOfTurnSilenceWhenConfident": {
|
|
30
|
+
"type": "number",
|
|
31
|
+
"description": "Minimum silence duration (seconds) to trigger end-of-turn when confidence is met."
|
|
32
|
+
},
|
|
33
|
+
"maxTurnSilence": {
|
|
34
|
+
"type": "number",
|
|
35
|
+
"description": "Maximum silence duration (seconds) before forcing end-of-turn."
|
|
36
|
+
},
|
|
37
|
+
"minTurnSilence": {
|
|
38
|
+
"type": "number",
|
|
39
|
+
"description": "Minimum silence duration (seconds) before allowing end-of-turn."
|
|
40
|
+
},
|
|
41
|
+
"keyterms": {
|
|
42
|
+
"type": "array",
|
|
43
|
+
"items": {
|
|
44
|
+
"type": "string"
|
|
45
|
+
},
|
|
46
|
+
"description": "List of key terms to boost in recognition."
|
|
47
|
+
},
|
|
48
|
+
"prompt": {
|
|
49
|
+
"type": "string",
|
|
50
|
+
"description": "Prompt to guide the recognition model."
|
|
51
|
+
},
|
|
52
|
+
"languageDetection": {
|
|
53
|
+
"type": "boolean",
|
|
54
|
+
"description": "Enable automatic language detection."
|
|
55
|
+
},
|
|
56
|
+
"vadThreshold": {
|
|
57
|
+
"type": "number",
|
|
58
|
+
"description": "Voice activity detection threshold."
|
|
59
|
+
},
|
|
60
|
+
"inactivityTimeout": {
|
|
61
|
+
"type": "number",
|
|
62
|
+
"description": "Timeout (seconds) for inactivity before closing the stream."
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
"additionalProperties": false
|
|
66
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-awsOptions",
|
|
4
|
+
"title": "AWS Recognizer Options",
|
|
5
|
+
"description": "AWS Transcribe specific options. Only applies when recognizer vendor is 'aws'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"accessKey": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "AWS access key ID. Overrides credentials configured in jambonz."
|
|
11
|
+
},
|
|
12
|
+
"secretKey": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "AWS secret access key."
|
|
15
|
+
},
|
|
16
|
+
"securityToken": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "AWS temporary security token (for STS/assumed roles)."
|
|
19
|
+
},
|
|
20
|
+
"region": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "AWS region for the Transcribe service."
|
|
23
|
+
},
|
|
24
|
+
"vocabularyName": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "Name of a custom vocabulary to use."
|
|
27
|
+
},
|
|
28
|
+
"vocabularyFilterName": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "Name of a vocabulary filter to apply."
|
|
31
|
+
},
|
|
32
|
+
"vocabularyFilterMethod": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"enum": ["remove", "mask", "tag"],
|
|
35
|
+
"description": "How filtered vocabulary words should be handled."
|
|
36
|
+
},
|
|
37
|
+
"languageModelName": {
|
|
38
|
+
"type": "string",
|
|
39
|
+
"description": "Name of a custom language model."
|
|
40
|
+
},
|
|
41
|
+
"piiEntityTypes": {
|
|
42
|
+
"type": "array",
|
|
43
|
+
"items": { "type": "string" },
|
|
44
|
+
"description": "PII entity types to identify (e.g. 'BANK_ACCOUNT_NUMBER', 'CREDIT_DEBIT_NUMBER')."
|
|
45
|
+
},
|
|
46
|
+
"piiIdentifyEntities": {
|
|
47
|
+
"type": "boolean",
|
|
48
|
+
"description": "Enable PII entity identification."
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
"additionalProperties": false
|
|
52
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-azureOptions",
|
|
4
|
+
"title": "Azure Recognizer Options",
|
|
5
|
+
"description": "Azure Speech Services specific options. Only applies when recognizer vendor is 'microsoft'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"speechSegmentationSilenceTimeoutMs": {
|
|
9
|
+
"type": "number",
|
|
10
|
+
"description": "Silence timeout in milliseconds for speech segmentation."
|
|
11
|
+
},
|
|
12
|
+
"postProcessing": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "Post-processing mode for transcription results."
|
|
15
|
+
},
|
|
16
|
+
"audioLogging": {
|
|
17
|
+
"type": "boolean",
|
|
18
|
+
"description": "Enable audio logging for diagnostics."
|
|
19
|
+
},
|
|
20
|
+
"languageIdMode": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"enum": ["AtStart", "Continuous"],
|
|
23
|
+
"description": "Language identification mode when using multiple languages."
|
|
24
|
+
},
|
|
25
|
+
"speechRecognitionMode": {
|
|
26
|
+
"type": "string",
|
|
27
|
+
"enum": ["CONVERSATION", "DICTATION", "INTERACTIVE"],
|
|
28
|
+
"description": "Speech recognition mode optimized for the interaction type."
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"additionalProperties": false
|
|
32
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-cobaltOptions",
|
|
4
|
+
"title": "Cobalt Recognizer Options",
|
|
5
|
+
"description": "Cobalt-specific STT options. Only applies when recognizer vendor is 'cobalt'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"serverUri": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "Cobalt server URI."
|
|
11
|
+
},
|
|
12
|
+
"enableConfusionNetwork": {
|
|
13
|
+
"type": "boolean",
|
|
14
|
+
"description": "Enable confusion network output."
|
|
15
|
+
},
|
|
16
|
+
"metadata": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "Metadata string to pass to the server."
|
|
19
|
+
},
|
|
20
|
+
"compiledContextData": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "Compiled context data for biasing recognition."
|
|
23
|
+
},
|
|
24
|
+
"wordTimeOffsets": {
|
|
25
|
+
"type": "boolean",
|
|
26
|
+
"description": "Include word-level timestamps."
|
|
27
|
+
},
|
|
28
|
+
"contextToken": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "Context token for server-side context."
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"additionalProperties": false
|
|
34
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-customOptions",
|
|
4
|
+
"title": "Custom Recognizer Options",
|
|
5
|
+
"description": "Options for custom STT vendors. Only applies when recognizer vendor is 'custom'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"authToken": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "Authentication token for the custom STT service."
|
|
11
|
+
},
|
|
12
|
+
"uri": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "WebSocket URI of the custom STT service."
|
|
15
|
+
},
|
|
16
|
+
"sampleRate": {
|
|
17
|
+
"type": "number",
|
|
18
|
+
"description": "Audio sample rate in Hz."
|
|
19
|
+
},
|
|
20
|
+
"options": {
|
|
21
|
+
"type": "object",
|
|
22
|
+
"description": "Additional vendor-specific options passed through to the custom service.",
|
|
23
|
+
"additionalProperties": true
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
"additionalProperties": false
|
|
27
|
+
}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-deepgramOptions",
|
|
4
|
+
"title": "Deepgram Recognizer Options",
|
|
5
|
+
"description": "Deepgram-specific STT options. Only applies when recognizer vendor is 'deepgram'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"deepgramSttUri": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "Custom Deepgram STT endpoint URI."
|
|
11
|
+
},
|
|
12
|
+
"deepgramSttUseTls": {
|
|
13
|
+
"type": "boolean",
|
|
14
|
+
"description": "Whether to use TLS when connecting to the Deepgram STT endpoint."
|
|
15
|
+
},
|
|
16
|
+
"apiKey": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "Deepgram API key. Overrides the key configured in jambonz."
|
|
19
|
+
},
|
|
20
|
+
"tier": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "Deepgram model tier."
|
|
23
|
+
},
|
|
24
|
+
"model": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "Deepgram model name (e.g. 'nova-2', 'nova-2-general')."
|
|
27
|
+
},
|
|
28
|
+
"customModel": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "ID of a custom-trained Deepgram model."
|
|
31
|
+
},
|
|
32
|
+
"version": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"description": "Model version."
|
|
35
|
+
},
|
|
36
|
+
"punctuate": {
|
|
37
|
+
"type": "boolean",
|
|
38
|
+
"description": "Enable automatic punctuation."
|
|
39
|
+
},
|
|
40
|
+
"smartFormatting": {
|
|
41
|
+
"type": "boolean",
|
|
42
|
+
"description": "Enable Deepgram smart formatting (dates, numbers, etc.)."
|
|
43
|
+
},
|
|
44
|
+
"noDelay": {
|
|
45
|
+
"type": "boolean",
|
|
46
|
+
"description": "Disable Deepgram's internal buffering for lower latency."
|
|
47
|
+
},
|
|
48
|
+
"profanityFilter": {
|
|
49
|
+
"type": "boolean",
|
|
50
|
+
"description": "Filter profanity from transcripts."
|
|
51
|
+
},
|
|
52
|
+
"redact": {
|
|
53
|
+
"type": "string",
|
|
54
|
+
"enum": ["pci", "numbers", "true", "ssn"],
|
|
55
|
+
"description": "Redact sensitive information from transcripts."
|
|
56
|
+
},
|
|
57
|
+
"diarize": {
|
|
58
|
+
"type": "boolean",
|
|
59
|
+
"description": "Enable speaker diarization."
|
|
60
|
+
},
|
|
61
|
+
"diarizeVersion": {
|
|
62
|
+
"type": "string",
|
|
63
|
+
"description": "Diarization model version."
|
|
64
|
+
},
|
|
65
|
+
"ner": {
|
|
66
|
+
"type": "boolean",
|
|
67
|
+
"description": "Enable named entity recognition."
|
|
68
|
+
},
|
|
69
|
+
"multichannel": {
|
|
70
|
+
"type": "boolean",
|
|
71
|
+
"description": "Enable multichannel processing."
|
|
72
|
+
},
|
|
73
|
+
"alternatives": {
|
|
74
|
+
"type": "number",
|
|
75
|
+
"description": "Number of alternative transcripts to return."
|
|
76
|
+
},
|
|
77
|
+
"numerals": {
|
|
78
|
+
"type": "boolean",
|
|
79
|
+
"description": "Convert spoken numbers to digits."
|
|
80
|
+
},
|
|
81
|
+
"search": {
|
|
82
|
+
"type": "array",
|
|
83
|
+
"items": { "type": "string" },
|
|
84
|
+
"description": "Terms to search for in the transcript."
|
|
85
|
+
},
|
|
86
|
+
"replace": {
|
|
87
|
+
"type": "array",
|
|
88
|
+
"items": { "type": "string" },
|
|
89
|
+
"description": "Terms to replace in the transcript."
|
|
90
|
+
},
|
|
91
|
+
"keywords": {
|
|
92
|
+
"type": "array",
|
|
93
|
+
"items": { "type": "string" },
|
|
94
|
+
"description": "Keywords to boost recognition for."
|
|
95
|
+
},
|
|
96
|
+
"keyterms": {
|
|
97
|
+
"type": "array",
|
|
98
|
+
"items": { "type": "string" },
|
|
99
|
+
"description": "Key terms to boost recognition for."
|
|
100
|
+
},
|
|
101
|
+
"endpointing": {
|
|
102
|
+
"type": ["boolean", "number"],
|
|
103
|
+
"description": "Endpointing sensitivity. Boolean to enable/disable, or number of milliseconds."
|
|
104
|
+
},
|
|
105
|
+
"utteranceEndMs": {
|
|
106
|
+
"type": "number",
|
|
107
|
+
"description": "Milliseconds of silence to detect end of utterance."
|
|
108
|
+
},
|
|
109
|
+
"shortUtterance": {
|
|
110
|
+
"type": "boolean",
|
|
111
|
+
"description": "Optimize for short utterances."
|
|
112
|
+
},
|
|
113
|
+
"vadTurnoff": {
|
|
114
|
+
"type": "number",
|
|
115
|
+
"description": "Milliseconds of silence before VAD turns off."
|
|
116
|
+
},
|
|
117
|
+
"tag": {
|
|
118
|
+
"type": "string",
|
|
119
|
+
"description": "Tag to associate with the request for tracking."
|
|
120
|
+
},
|
|
121
|
+
"fillerWords": {
|
|
122
|
+
"type": "boolean",
|
|
123
|
+
"description": "Include filler words (um, uh) in transcript."
|
|
124
|
+
},
|
|
125
|
+
"eotThreshold": {
|
|
126
|
+
"type": "number",
|
|
127
|
+
"description": "End-of-turn confidence threshold (0-1)."
|
|
128
|
+
},
|
|
129
|
+
"eotTimeoutMs": {
|
|
130
|
+
"type": "number",
|
|
131
|
+
"description": "End-of-turn timeout in milliseconds."
|
|
132
|
+
},
|
|
133
|
+
"mipOptOut": {
|
|
134
|
+
"type": "boolean",
|
|
135
|
+
"description": "Opt out of Deepgram's model improvement program."
|
|
136
|
+
},
|
|
137
|
+
"entityPrompt": {
|
|
138
|
+
"type": "string",
|
|
139
|
+
"description": "Prompt to guide entity detection."
|
|
140
|
+
},
|
|
141
|
+
"eagerEotThreshold": {
|
|
142
|
+
"type": "number",
|
|
143
|
+
"description": "Eager end-of-turn threshold for faster response."
|
|
144
|
+
}
|
|
145
|
+
},
|
|
146
|
+
"additionalProperties": false
|
|
147
|
+
}
|