jambonz-python-sdk 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jambonz_python_sdk-0.2.0.dist-info/METADATA +179 -0
- jambonz_python_sdk-0.2.0.dist-info/RECORD +119 -0
- jambonz_python_sdk-0.2.0.dist-info/WHEEL +4 -0
- jambonz_sdk/__init__.py +52 -0
- jambonz_sdk/_signature.py +73 -0
- jambonz_sdk/client/__init__.py +15 -0
- jambonz_sdk/client/api.py +241 -0
- jambonz_sdk/schema/callbacks/amd.schema.json +50 -0
- jambonz_sdk/schema/callbacks/base.schema.json +29 -0
- jambonz_sdk/schema/callbacks/call-status.schema.json +22 -0
- jambonz_sdk/schema/callbacks/conference-status.schema.json +24 -0
- jambonz_sdk/schema/callbacks/conference-wait.schema.json +11 -0
- jambonz_sdk/schema/callbacks/conference.schema.json +11 -0
- jambonz_sdk/schema/callbacks/dequeue.schema.json +19 -0
- jambonz_sdk/schema/callbacks/dial-dtmf.schema.json +18 -0
- jambonz_sdk/schema/callbacks/dial-hold.schema.json +22 -0
- jambonz_sdk/schema/callbacks/dial-refer.schema.json +28 -0
- jambonz_sdk/schema/callbacks/dial.schema.json +31 -0
- jambonz_sdk/schema/callbacks/enqueue-wait.schema.json +17 -0
- jambonz_sdk/schema/callbacks/enqueue.schema.json +27 -0
- jambonz_sdk/schema/callbacks/gather-partial.schema.json +54 -0
- jambonz_sdk/schema/callbacks/gather.schema.json +60 -0
- jambonz_sdk/schema/callbacks/listen.schema.json +21 -0
- jambonz_sdk/schema/callbacks/llm.schema.json +30 -0
- jambonz_sdk/schema/callbacks/message.schema.json +35 -0
- jambonz_sdk/schema/callbacks/pipeline-turn.schema.json +109 -0
- jambonz_sdk/schema/callbacks/play.schema.json +36 -0
- jambonz_sdk/schema/callbacks/session-new.schema.json +143 -0
- jambonz_sdk/schema/callbacks/session-reconnect.schema.json +9 -0
- jambonz_sdk/schema/callbacks/session-redirect.schema.json +38 -0
- jambonz_sdk/schema/callbacks/sip-refer-event.schema.json +20 -0
- jambonz_sdk/schema/callbacks/sip-refer.schema.json +22 -0
- jambonz_sdk/schema/callbacks/sip-request.schema.json +27 -0
- jambonz_sdk/schema/callbacks/transcribe-translation.schema.json +24 -0
- jambonz_sdk/schema/callbacks/transcribe.schema.json +46 -0
- jambonz_sdk/schema/callbacks/tts-streaming-event.schema.json +77 -0
- jambonz_sdk/schema/callbacks/verb-status.schema.json +57 -0
- jambonz_sdk/schema/components/actionHook.schema.json +36 -0
- jambonz_sdk/schema/components/actionHookDelayAction.schema.json +37 -0
- jambonz_sdk/schema/components/amd.schema.json +68 -0
- jambonz_sdk/schema/components/auth.schema.json +18 -0
- jambonz_sdk/schema/components/bidirectionalAudio.schema.json +22 -0
- jambonz_sdk/schema/components/fillerNoise.schema.json +25 -0
- jambonz_sdk/schema/components/llm-base.schema.json +94 -0
- jambonz_sdk/schema/components/recognizer-assemblyAiOptions.schema.json +66 -0
- jambonz_sdk/schema/components/recognizer-awsOptions.schema.json +52 -0
- jambonz_sdk/schema/components/recognizer-azureOptions.schema.json +32 -0
- jambonz_sdk/schema/components/recognizer-cobaltOptions.schema.json +34 -0
- jambonz_sdk/schema/components/recognizer-customOptions.schema.json +27 -0
- jambonz_sdk/schema/components/recognizer-deepgramOptions.schema.json +147 -0
- jambonz_sdk/schema/components/recognizer-elevenlabsOptions.schema.json +39 -0
- jambonz_sdk/schema/components/recognizer-gladiaOptions.schema.json +8 -0
- jambonz_sdk/schema/components/recognizer-googleOptions.schema.json +35 -0
- jambonz_sdk/schema/components/recognizer-houndifyOptions.schema.json +53 -0
- jambonz_sdk/schema/components/recognizer-ibmOptions.schema.json +54 -0
- jambonz_sdk/schema/components/recognizer-nuanceOptions.schema.json +150 -0
- jambonz_sdk/schema/components/recognizer-nvidiaOptions.schema.json +39 -0
- jambonz_sdk/schema/components/recognizer-openaiOptions.schema.json +59 -0
- jambonz_sdk/schema/components/recognizer-sonioxOptions.schema.json +46 -0
- jambonz_sdk/schema/components/recognizer-speechmaticsOptions.schema.json +100 -0
- jambonz_sdk/schema/components/recognizer-verbioOptions.schema.json +46 -0
- jambonz_sdk/schema/components/recognizer.schema.json +216 -0
- jambonz_sdk/schema/components/synthesizer.schema.json +82 -0
- jambonz_sdk/schema/components/target.schema.json +105 -0
- jambonz_sdk/schema/components/vad.schema.json +48 -0
- jambonz_sdk/schema/jambonz-app.schema.json +113 -0
- jambonz_sdk/schema/verbs/alert.schema.json +34 -0
- jambonz_sdk/schema/verbs/answer.schema.json +22 -0
- jambonz_sdk/schema/verbs/conference.schema.json +107 -0
- jambonz_sdk/schema/verbs/config.schema.json +221 -0
- jambonz_sdk/schema/verbs/deepgram_s2s.schema.json +81 -0
- jambonz_sdk/schema/verbs/dequeue.schema.json +51 -0
- jambonz_sdk/schema/verbs/dial.schema.json +200 -0
- jambonz_sdk/schema/verbs/dialogflow.schema.json +148 -0
- jambonz_sdk/schema/verbs/dtmf.schema.json +49 -0
- jambonz_sdk/schema/verbs/dub.schema.json +103 -0
- jambonz_sdk/schema/verbs/elevenlabs_s2s.schema.json +81 -0
- jambonz_sdk/schema/verbs/enqueue.schema.json +53 -0
- jambonz_sdk/schema/verbs/gather.schema.json +190 -0
- jambonz_sdk/schema/verbs/google_s2s.schema.json +42 -0
- jambonz_sdk/schema/verbs/hangup.schema.json +36 -0
- jambonz_sdk/schema/verbs/leave.schema.json +22 -0
- jambonz_sdk/schema/verbs/listen.schema.json +127 -0
- jambonz_sdk/schema/verbs/llm.schema.json +44 -0
- jambonz_sdk/schema/verbs/message.schema.json +82 -0
- jambonz_sdk/schema/verbs/openai_s2s.schema.json +42 -0
- jambonz_sdk/schema/verbs/pause.schema.json +36 -0
- jambonz_sdk/schema/verbs/pipeline.schema.json +240 -0
- jambonz_sdk/schema/verbs/play.schema.json +96 -0
- jambonz_sdk/schema/verbs/redirect.schema.json +34 -0
- jambonz_sdk/schema/verbs/rest_dial.schema.json +113 -0
- jambonz_sdk/schema/verbs/s2s.schema.json +39 -0
- jambonz_sdk/schema/verbs/say.schema.json +107 -0
- jambonz_sdk/schema/verbs/sip-decline.schema.json +58 -0
- jambonz_sdk/schema/verbs/sip-refer.schema.json +58 -0
- jambonz_sdk/schema/verbs/sip-request.schema.json +54 -0
- jambonz_sdk/schema/verbs/stream.schema.json +103 -0
- jambonz_sdk/schema/verbs/tag.schema.json +41 -0
- jambonz_sdk/schema/verbs/transcribe.schema.json +57 -0
- jambonz_sdk/schema/verbs/ultravox_s2s.schema.json +41 -0
- jambonz_sdk/types/__init__.py +139 -0
- jambonz_sdk/types/components.py +250 -0
- jambonz_sdk/types/rest.py +59 -0
- jambonz_sdk/types/session.py +55 -0
- jambonz_sdk/types/verbs.py +572 -0
- jambonz_sdk/validator.py +107 -0
- jambonz_sdk/verb_builder.py +316 -0
- jambonz_sdk/verb_builder.pyi +1133 -0
- jambonz_sdk/verb_registry.py +102 -0
- jambonz_sdk/webhook/__init__.py +10 -0
- jambonz_sdk/webhook/middleware.py +63 -0
- jambonz_sdk/webhook/response.py +43 -0
- jambonz_sdk/websocket/__init__.py +15 -0
- jambonz_sdk/websocket/audio_client.py +11 -0
- jambonz_sdk/websocket/audio_stream.py +151 -0
- jambonz_sdk/websocket/client.py +165 -0
- jambonz_sdk/websocket/endpoint.py +193 -0
- jambonz_sdk/websocket/router.py +87 -0
- jambonz_sdk/websocket/session.py +259 -0
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-deepgramOptions",
|
|
4
|
+
"title": "Deepgram Recognizer Options",
|
|
5
|
+
"description": "Deepgram-specific STT options. Only applies when recognizer vendor is 'deepgram'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"deepgramSttUri": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "Custom Deepgram STT endpoint URI."
|
|
11
|
+
},
|
|
12
|
+
"deepgramSttUseTls": {
|
|
13
|
+
"type": "boolean",
|
|
14
|
+
"description": "Whether to use TLS when connecting to the Deepgram STT endpoint."
|
|
15
|
+
},
|
|
16
|
+
"apiKey": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "Deepgram API key. Overrides the key configured in jambonz."
|
|
19
|
+
},
|
|
20
|
+
"tier": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "Deepgram model tier."
|
|
23
|
+
},
|
|
24
|
+
"model": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "Deepgram model name (e.g. 'nova-2', 'nova-2-general')."
|
|
27
|
+
},
|
|
28
|
+
"customModel": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "ID of a custom-trained Deepgram model."
|
|
31
|
+
},
|
|
32
|
+
"version": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"description": "Model version."
|
|
35
|
+
},
|
|
36
|
+
"punctuate": {
|
|
37
|
+
"type": "boolean",
|
|
38
|
+
"description": "Enable automatic punctuation."
|
|
39
|
+
},
|
|
40
|
+
"smartFormatting": {
|
|
41
|
+
"type": "boolean",
|
|
42
|
+
"description": "Enable Deepgram smart formatting (dates, numbers, etc.)."
|
|
43
|
+
},
|
|
44
|
+
"noDelay": {
|
|
45
|
+
"type": "boolean",
|
|
46
|
+
"description": "Disable Deepgram's internal buffering for lower latency."
|
|
47
|
+
},
|
|
48
|
+
"profanityFilter": {
|
|
49
|
+
"type": "boolean",
|
|
50
|
+
"description": "Filter profanity from transcripts."
|
|
51
|
+
},
|
|
52
|
+
"redact": {
|
|
53
|
+
"type": "string",
|
|
54
|
+
"enum": ["pci", "numbers", "true", "ssn"],
|
|
55
|
+
"description": "Redact sensitive information from transcripts."
|
|
56
|
+
},
|
|
57
|
+
"diarize": {
|
|
58
|
+
"type": "boolean",
|
|
59
|
+
"description": "Enable speaker diarization."
|
|
60
|
+
},
|
|
61
|
+
"diarizeVersion": {
|
|
62
|
+
"type": "string",
|
|
63
|
+
"description": "Diarization model version."
|
|
64
|
+
},
|
|
65
|
+
"ner": {
|
|
66
|
+
"type": "boolean",
|
|
67
|
+
"description": "Enable named entity recognition."
|
|
68
|
+
},
|
|
69
|
+
"multichannel": {
|
|
70
|
+
"type": "boolean",
|
|
71
|
+
"description": "Enable multichannel processing."
|
|
72
|
+
},
|
|
73
|
+
"alternatives": {
|
|
74
|
+
"type": "number",
|
|
75
|
+
"description": "Number of alternative transcripts to return."
|
|
76
|
+
},
|
|
77
|
+
"numerals": {
|
|
78
|
+
"type": "boolean",
|
|
79
|
+
"description": "Convert spoken numbers to digits."
|
|
80
|
+
},
|
|
81
|
+
"search": {
|
|
82
|
+
"type": "array",
|
|
83
|
+
"items": { "type": "string" },
|
|
84
|
+
"description": "Terms to search for in the transcript."
|
|
85
|
+
},
|
|
86
|
+
"replace": {
|
|
87
|
+
"type": "array",
|
|
88
|
+
"items": { "type": "string" },
|
|
89
|
+
"description": "Terms to replace in the transcript."
|
|
90
|
+
},
|
|
91
|
+
"keywords": {
|
|
92
|
+
"type": "array",
|
|
93
|
+
"items": { "type": "string" },
|
|
94
|
+
"description": "Keywords to boost recognition for."
|
|
95
|
+
},
|
|
96
|
+
"keyterms": {
|
|
97
|
+
"type": "array",
|
|
98
|
+
"items": { "type": "string" },
|
|
99
|
+
"description": "Key terms to boost recognition for."
|
|
100
|
+
},
|
|
101
|
+
"endpointing": {
|
|
102
|
+
"type": ["boolean", "number"],
|
|
103
|
+
"description": "Endpointing sensitivity. Boolean to enable/disable, or number of milliseconds."
|
|
104
|
+
},
|
|
105
|
+
"utteranceEndMs": {
|
|
106
|
+
"type": "number",
|
|
107
|
+
"description": "Milliseconds of silence to detect end of utterance."
|
|
108
|
+
},
|
|
109
|
+
"shortUtterance": {
|
|
110
|
+
"type": "boolean",
|
|
111
|
+
"description": "Optimize for short utterances."
|
|
112
|
+
},
|
|
113
|
+
"vadTurnoff": {
|
|
114
|
+
"type": "number",
|
|
115
|
+
"description": "Milliseconds of silence before VAD turns off."
|
|
116
|
+
},
|
|
117
|
+
"tag": {
|
|
118
|
+
"type": "string",
|
|
119
|
+
"description": "Tag to associate with the request for tracking."
|
|
120
|
+
},
|
|
121
|
+
"fillerWords": {
|
|
122
|
+
"type": "boolean",
|
|
123
|
+
"description": "Include filler words (um, uh) in transcript."
|
|
124
|
+
},
|
|
125
|
+
"eotThreshold": {
|
|
126
|
+
"type": "number",
|
|
127
|
+
"description": "End-of-turn confidence threshold (0-1)."
|
|
128
|
+
},
|
|
129
|
+
"eotTimeoutMs": {
|
|
130
|
+
"type": "number",
|
|
131
|
+
"description": "End-of-turn timeout in milliseconds."
|
|
132
|
+
},
|
|
133
|
+
"mipOptOut": {
|
|
134
|
+
"type": "boolean",
|
|
135
|
+
"description": "Opt out of Deepgram's model improvement program."
|
|
136
|
+
},
|
|
137
|
+
"entityPrompt": {
|
|
138
|
+
"type": "string",
|
|
139
|
+
"description": "Prompt to guide entity detection."
|
|
140
|
+
},
|
|
141
|
+
"eagerEotThreshold": {
|
|
142
|
+
"type": "number",
|
|
143
|
+
"description": "Eager end-of-turn threshold for faster response."
|
|
144
|
+
}
|
|
145
|
+
},
|
|
146
|
+
"additionalProperties": false
|
|
147
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-elevenlabsOptions",
|
|
4
|
+
"title": "ElevenLabs Recognizer Options",
|
|
5
|
+
"description": "ElevenLabs-specific STT options. Only applies when recognizer vendor is 'elevenlabs'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"includeTimestamps": {
|
|
9
|
+
"type": "boolean",
|
|
10
|
+
"description": "Include word-level timestamps in results."
|
|
11
|
+
},
|
|
12
|
+
"commitStrategy": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"enum": ["manual", "vad"],
|
|
15
|
+
"description": "How audio chunks are committed. 'manual' for explicit commits, 'vad' for voice activity detection."
|
|
16
|
+
},
|
|
17
|
+
"vadSilenceThresholdSecs": {
|
|
18
|
+
"type": "number",
|
|
19
|
+
"description": "Silence duration in seconds to trigger VAD commit."
|
|
20
|
+
},
|
|
21
|
+
"vadThreshold": {
|
|
22
|
+
"type": "number",
|
|
23
|
+
"description": "VAD activation threshold."
|
|
24
|
+
},
|
|
25
|
+
"minSpeechDurationMs": {
|
|
26
|
+
"type": "number",
|
|
27
|
+
"description": "Minimum speech duration in milliseconds to accept."
|
|
28
|
+
},
|
|
29
|
+
"minSilenceDurationMs": {
|
|
30
|
+
"type": "number",
|
|
31
|
+
"description": "Minimum silence duration in milliseconds to trigger end of speech."
|
|
32
|
+
},
|
|
33
|
+
"enableLogging": {
|
|
34
|
+
"type": "boolean",
|
|
35
|
+
"description": "Enable server-side logging."
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
"additionalProperties": false
|
|
39
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-gladiaOptions",
|
|
4
|
+
"title": "Gladia Recognizer Options",
|
|
5
|
+
"description": "Gladia-specific STT options. Only applies when recognizer vendor is 'gladia'. See Gladia API documentation for available options.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"additionalProperties": false
|
|
8
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-googleOptions",
|
|
4
|
+
"title": "Google Recognizer Options",
|
|
5
|
+
"description": "Google Speech-to-Text specific options. Only applies when recognizer vendor is 'google'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"serviceVersion": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"enum": ["v1", "v2"],
|
|
11
|
+
"description": "Google Speech-to-Text API version."
|
|
12
|
+
},
|
|
13
|
+
"recognizerId": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"description": "ID of a Google Speech recognizer resource (v2 only)."
|
|
16
|
+
},
|
|
17
|
+
"speechStartTimeoutMs": {
|
|
18
|
+
"type": "number",
|
|
19
|
+
"description": "Timeout in milliseconds to wait for speech to start."
|
|
20
|
+
},
|
|
21
|
+
"speechEndTimeoutMs": {
|
|
22
|
+
"type": "number",
|
|
23
|
+
"description": "Timeout in milliseconds to detect end of speech."
|
|
24
|
+
},
|
|
25
|
+
"enableVoiceActivityEvents": {
|
|
26
|
+
"type": "boolean",
|
|
27
|
+
"description": "Enable voice activity detection events."
|
|
28
|
+
},
|
|
29
|
+
"transcriptNormalization": {
|
|
30
|
+
"type": "array",
|
|
31
|
+
"description": "Array of transcript normalization rules."
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
"additionalProperties": false
|
|
35
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-houndifyOptions",
|
|
4
|
+
"title": "Houndify Recognizer Options",
|
|
5
|
+
"description": "Houndify-specific STT options. Only applies when recognizer vendor is 'houndify'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"requestInfo": {
|
|
9
|
+
"type": "object",
|
|
10
|
+
"description": "Houndify RequestInfo object with context data.",
|
|
11
|
+
"additionalProperties": true
|
|
12
|
+
},
|
|
13
|
+
"sampleRate": { "type": "number", "description": "Audio sample rate in Hz." },
|
|
14
|
+
"latitude": { "type": "number", "description": "User latitude for location-aware queries." },
|
|
15
|
+
"longitude": { "type": "number", "description": "User longitude for location-aware queries." },
|
|
16
|
+
"city": { "type": "string", "description": "User city." },
|
|
17
|
+
"state": { "type": "string", "description": "User state." },
|
|
18
|
+
"country": { "type": "string", "description": "User country." },
|
|
19
|
+
"timeZone": { "type": "string", "description": "User timezone." },
|
|
20
|
+
"domain": { "type": "string", "description": "Houndify domain." },
|
|
21
|
+
"audioEndpoint": { "type": "string", "description": "Custom audio endpoint URL." },
|
|
22
|
+
"maxSilenceSeconds": { "type": "number", "description": "Maximum silence before stopping." },
|
|
23
|
+
"maxSilenceAfterFullQuerySeconds": { "type": "number", "description": "Silence timeout after a complete query." },
|
|
24
|
+
"maxSilenceAfterPartialQuerySeconds": { "type": "number", "description": "Silence timeout after a partial query." },
|
|
25
|
+
"vadSensitivity": { "type": "number", "description": "VAD sensitivity level." },
|
|
26
|
+
"vadTimeout": { "type": "number", "description": "VAD timeout in milliseconds." },
|
|
27
|
+
"vadMode": { "type": "string", "description": "VAD mode." },
|
|
28
|
+
"vadVoiceMs": { "type": "number", "description": "Milliseconds of voice to trigger VAD." },
|
|
29
|
+
"vadSilenceMs": { "type": "number", "description": "Milliseconds of silence to trigger VAD." },
|
|
30
|
+
"vadDebug": { "type": "boolean", "description": "Enable VAD debug logging." },
|
|
31
|
+
"audioFormat": { "type": "string", "description": "Audio format." },
|
|
32
|
+
"enableNoiseReduction": { "type": "boolean", "description": "Enable noise reduction." },
|
|
33
|
+
"enableProfanityFilter": { "type": "boolean", "description": "Filter profanity." },
|
|
34
|
+
"enablePunctuation": { "type": "boolean", "description": "Enable punctuation." },
|
|
35
|
+
"enableCapitalization": { "type": "boolean", "description": "Enable capitalization." },
|
|
36
|
+
"confidenceThreshold": { "type": "number", "description": "Minimum confidence threshold." },
|
|
37
|
+
"enableDisfluencyFilter": { "type": "boolean", "description": "Filter disfluencies (um, uh)." },
|
|
38
|
+
"maxResults": { "type": "number", "description": "Maximum number of results." },
|
|
39
|
+
"enableWordTimestamps": { "type": "boolean", "description": "Include word timestamps." },
|
|
40
|
+
"maxAlternatives": { "type": "number", "description": "Maximum alternative transcripts." },
|
|
41
|
+
"partialTranscriptInterval": { "type": "number", "description": "Interval for partial transcript delivery." },
|
|
42
|
+
"sessionTimeout": { "type": "number", "description": "Session timeout." },
|
|
43
|
+
"connectionTimeout": { "type": "number", "description": "Connection timeout." },
|
|
44
|
+
"customVocabulary": {
|
|
45
|
+
"type": "array",
|
|
46
|
+
"items": { "type": "string" },
|
|
47
|
+
"description": "Custom vocabulary terms."
|
|
48
|
+
},
|
|
49
|
+
"languageModel": { "type": "string", "description": "Language model to use." },
|
|
50
|
+
"audioQueryAbsoluteTimeout": { "type": "number", "description": "Absolute timeout for audio queries." }
|
|
51
|
+
},
|
|
52
|
+
"additionalProperties": false
|
|
53
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-ibmOptions",
|
|
4
|
+
"title": "IBM Recognizer Options",
|
|
5
|
+
"description": "IBM Watson Speech-to-Text specific options. Only applies when recognizer vendor is 'ibm'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"sttApiKey": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "IBM STT API key. Overrides credentials configured in jambonz."
|
|
11
|
+
},
|
|
12
|
+
"sttRegion": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "IBM STT region."
|
|
15
|
+
},
|
|
16
|
+
"ttsApiKey": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "IBM TTS API key."
|
|
19
|
+
},
|
|
20
|
+
"ttsRegion": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "IBM TTS region."
|
|
23
|
+
},
|
|
24
|
+
"instanceId": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "IBM Watson instance ID."
|
|
27
|
+
},
|
|
28
|
+
"model": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "Recognition model name."
|
|
31
|
+
},
|
|
32
|
+
"languageCustomizationId": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"description": "ID of a custom language model."
|
|
35
|
+
},
|
|
36
|
+
"acousticCustomizationId": {
|
|
37
|
+
"type": "string",
|
|
38
|
+
"description": "ID of a custom acoustic model."
|
|
39
|
+
},
|
|
40
|
+
"baseModelVersion": {
|
|
41
|
+
"type": "string",
|
|
42
|
+
"description": "Base model version to use."
|
|
43
|
+
},
|
|
44
|
+
"watsonMetadata": {
|
|
45
|
+
"type": "string",
|
|
46
|
+
"description": "Customer ID metadata for data labeling."
|
|
47
|
+
},
|
|
48
|
+
"watsonLearningOptOut": {
|
|
49
|
+
"type": "boolean",
|
|
50
|
+
"description": "Opt out of IBM data collection for service improvements."
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
"additionalProperties": false
|
|
54
|
+
}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-nuanceOptions",
|
|
4
|
+
"title": "Nuance Recognizer Options",
|
|
5
|
+
"description": "Nuance Mix specific options. Only applies when recognizer vendor is 'nuance'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"clientId": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "Nuance Mix client ID."
|
|
11
|
+
},
|
|
12
|
+
"secret": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "Nuance Mix client secret."
|
|
15
|
+
},
|
|
16
|
+
"kryptonEndpoint": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "Custom Nuance Krypton endpoint URL."
|
|
19
|
+
},
|
|
20
|
+
"topic": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "Recognition topic (domain)."
|
|
23
|
+
},
|
|
24
|
+
"utteranceDetectionMode": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"enum": ["single", "multiple", "disabled"],
|
|
27
|
+
"description": "How utterance boundaries are detected."
|
|
28
|
+
},
|
|
29
|
+
"punctuation": {
|
|
30
|
+
"type": "boolean",
|
|
31
|
+
"description": "Enable automatic punctuation."
|
|
32
|
+
},
|
|
33
|
+
"profanityFilter": {
|
|
34
|
+
"type": "boolean",
|
|
35
|
+
"description": "Filter profanity from results."
|
|
36
|
+
},
|
|
37
|
+
"includeTokenization": {
|
|
38
|
+
"type": "boolean",
|
|
39
|
+
"description": "Include tokenization data in results."
|
|
40
|
+
},
|
|
41
|
+
"discardSpeakerAdaptation": {
|
|
42
|
+
"type": "boolean",
|
|
43
|
+
"description": "Discard speaker adaptation data."
|
|
44
|
+
},
|
|
45
|
+
"suppressCallRecording": {
|
|
46
|
+
"type": "boolean",
|
|
47
|
+
"description": "Suppress call recording on the Nuance side."
|
|
48
|
+
},
|
|
49
|
+
"maskLoadFailures": {
|
|
50
|
+
"type": "boolean",
|
|
51
|
+
"description": "Mask resource load failures."
|
|
52
|
+
},
|
|
53
|
+
"suppressInitialCapitalization": {
|
|
54
|
+
"type": "boolean",
|
|
55
|
+
"description": "Suppress initial capitalization of results."
|
|
56
|
+
},
|
|
57
|
+
"allowZeroBaseLmWeight": {
|
|
58
|
+
"type": "boolean",
|
|
59
|
+
"description": "Allow zero base language model weight."
|
|
60
|
+
},
|
|
61
|
+
"filterWakeupWord": {
|
|
62
|
+
"type": "boolean",
|
|
63
|
+
"description": "Filter wakeup words from results."
|
|
64
|
+
},
|
|
65
|
+
"resultType": {
|
|
66
|
+
"type": "string",
|
|
67
|
+
"enum": ["final", "partial", "immutable_partial"],
|
|
68
|
+
"description": "Type of results to return."
|
|
69
|
+
},
|
|
70
|
+
"noInputTimeoutMs": {
|
|
71
|
+
"type": "number",
|
|
72
|
+
"description": "Timeout in milliseconds before no-input event."
|
|
73
|
+
},
|
|
74
|
+
"recognitionTimeoutMs": {
|
|
75
|
+
"type": "number",
|
|
76
|
+
"description": "Maximum recognition duration in milliseconds."
|
|
77
|
+
},
|
|
78
|
+
"utteranceEndSilenceMs": {
|
|
79
|
+
"type": "number",
|
|
80
|
+
"description": "Silence duration in milliseconds to detect end of utterance."
|
|
81
|
+
},
|
|
82
|
+
"maxHypotheses": {
|
|
83
|
+
"type": "number",
|
|
84
|
+
"description": "Maximum number of recognition hypotheses to return."
|
|
85
|
+
},
|
|
86
|
+
"speechDomain": {
|
|
87
|
+
"type": "string",
|
|
88
|
+
"description": "Speech domain for optimized recognition."
|
|
89
|
+
},
|
|
90
|
+
"formatting": {
|
|
91
|
+
"type": "object",
|
|
92
|
+
"description": "Formatting options for recognition results.",
|
|
93
|
+
"properties": {
|
|
94
|
+
"scheme": { "type": "string", "description": "Formatting scheme name." },
|
|
95
|
+
"options": { "type": "object", "description": "Scheme-specific formatting options." }
|
|
96
|
+
},
|
|
97
|
+
"required": ["scheme", "options"]
|
|
98
|
+
},
|
|
99
|
+
"clientData": {
|
|
100
|
+
"type": "object",
|
|
101
|
+
"description": "Custom client data to pass to Nuance.",
|
|
102
|
+
"additionalProperties": true
|
|
103
|
+
},
|
|
104
|
+
"userId": {
|
|
105
|
+
"type": "string",
|
|
106
|
+
"description": "User ID for speaker adaptation."
|
|
107
|
+
},
|
|
108
|
+
"speechDetectionSensitivity": {
|
|
109
|
+
"type": "number",
|
|
110
|
+
"description": "Speech detection sensitivity (0-1)."
|
|
111
|
+
},
|
|
112
|
+
"resources": {
|
|
113
|
+
"type": "array",
|
|
114
|
+
"description": "Array of Nuance recognition resources (grammars, wordsets, etc.).",
|
|
115
|
+
"items": {
|
|
116
|
+
"type": "object",
|
|
117
|
+
"properties": {
|
|
118
|
+
"externalReference": {
|
|
119
|
+
"type": "object",
|
|
120
|
+
"description": "External resource reference.",
|
|
121
|
+
"properties": {
|
|
122
|
+
"type": {
|
|
123
|
+
"type": "string",
|
|
124
|
+
"enum": ["undefined_resource_type", "wordset", "compiled_wordset", "domain_lm", "speaker_profile", "grammar", "settings"]
|
|
125
|
+
},
|
|
126
|
+
"uri": { "type": "string" },
|
|
127
|
+
"maxLoadFailures": { "type": "boolean" },
|
|
128
|
+
"requestTimeoutMs": { "type": "number" },
|
|
129
|
+
"headers": { "type": "object" }
|
|
130
|
+
}
|
|
131
|
+
},
|
|
132
|
+
"inlineWordset": { "type": "string", "description": "Inline wordset JSON string." },
|
|
133
|
+
"builtin": { "type": "string", "description": "Built-in grammar name." },
|
|
134
|
+
"inlineGrammar": { "type": "string", "description": "Inline SRGS grammar." },
|
|
135
|
+
"wakeupWord": { "type": "array", "items": { "type": "string" }, "description": "Wakeup words." },
|
|
136
|
+
"weightName": {
|
|
137
|
+
"type": "string",
|
|
138
|
+
"enum": ["defaultWeight", "lowest", "low", "medium", "high", "highest"]
|
|
139
|
+
},
|
|
140
|
+
"weightValue": { "type": "number" },
|
|
141
|
+
"reuse": {
|
|
142
|
+
"type": "string",
|
|
143
|
+
"enum": ["undefined_reuse", "low_reuse", "high_reuse"]
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
},
|
|
149
|
+
"additionalProperties": false
|
|
150
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-nvidiaOptions",
|
|
4
|
+
"title": "NVIDIA Recognizer Options",
|
|
5
|
+
"description": "NVIDIA Riva specific options. Only applies when recognizer vendor is 'nvidia'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"rivaUri": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "NVIDIA Riva server URI."
|
|
11
|
+
},
|
|
12
|
+
"maxAlternatives": {
|
|
13
|
+
"type": "number",
|
|
14
|
+
"description": "Maximum number of alternative transcripts."
|
|
15
|
+
},
|
|
16
|
+
"profanityFilter": {
|
|
17
|
+
"type": "boolean",
|
|
18
|
+
"description": "Filter profanity from results."
|
|
19
|
+
},
|
|
20
|
+
"punctuation": {
|
|
21
|
+
"type": "boolean",
|
|
22
|
+
"description": "Enable automatic punctuation."
|
|
23
|
+
},
|
|
24
|
+
"wordTimeOffsets": {
|
|
25
|
+
"type": "boolean",
|
|
26
|
+
"description": "Include word-level timestamps."
|
|
27
|
+
},
|
|
28
|
+
"verbatimTranscripts": {
|
|
29
|
+
"type": "boolean",
|
|
30
|
+
"description": "Return verbatim (unformatted) transcripts."
|
|
31
|
+
},
|
|
32
|
+
"customConfiguration": {
|
|
33
|
+
"type": "object",
|
|
34
|
+
"description": "Custom Riva configuration parameters.",
|
|
35
|
+
"additionalProperties": true
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
"additionalProperties": false
|
|
39
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-openaiOptions",
|
|
4
|
+
"title": "OpenAI Recognizer Options",
|
|
5
|
+
"description": "OpenAI Whisper/Realtime specific STT options. Only applies when recognizer vendor is 'openai'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"apiKey": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "OpenAI API key. Overrides credentials configured in jambonz."
|
|
11
|
+
},
|
|
12
|
+
"model": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "OpenAI STT model name."
|
|
15
|
+
},
|
|
16
|
+
"prompt": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "Prompt to guide the recognition model."
|
|
19
|
+
},
|
|
20
|
+
"promptTemplates": {
|
|
21
|
+
"type": "object",
|
|
22
|
+
"description": "Templates for dynamic prompt generation.",
|
|
23
|
+
"properties": {
|
|
24
|
+
"hintsTemplate": { "type": "string", "description": "Template for injecting hints into the prompt." },
|
|
25
|
+
"conversationHistoryTemplate": { "type": "string", "description": "Template for injecting conversation history." }
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"language": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "Language code for recognition."
|
|
31
|
+
},
|
|
32
|
+
"input_audio_noise_reduction": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"enum": ["near_field", "far_field"],
|
|
35
|
+
"description": "Input audio noise reduction mode."
|
|
36
|
+
},
|
|
37
|
+
"turn_detection": {
|
|
38
|
+
"type": "object",
|
|
39
|
+
"description": "Turn detection configuration for the OpenAI Realtime API.",
|
|
40
|
+
"properties": {
|
|
41
|
+
"type": {
|
|
42
|
+
"type": "string",
|
|
43
|
+
"enum": ["none", "server_vad", "semantic_vad"],
|
|
44
|
+
"description": "Turn detection strategy."
|
|
45
|
+
},
|
|
46
|
+
"eagerness": {
|
|
47
|
+
"type": "string",
|
|
48
|
+
"enum": ["low", "medium", "high", "auto"],
|
|
49
|
+
"description": "How eagerly the model should respond."
|
|
50
|
+
},
|
|
51
|
+
"threshold": { "type": "number", "description": "VAD activation threshold (0-1)." },
|
|
52
|
+
"prefix_padding_ms": { "type": "number", "description": "Milliseconds of audio to include before detected speech." },
|
|
53
|
+
"silence_duration_ms": { "type": "number", "description": "Milliseconds of silence to detect end of speech." }
|
|
54
|
+
},
|
|
55
|
+
"required": ["type"]
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
"additionalProperties": false
|
|
59
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-sonioxOptions",
|
|
4
|
+
"title": "Soniox Recognizer Options",
|
|
5
|
+
"description": "Soniox-specific STT options. Only applies when recognizer vendor is 'soniox'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"apiKey": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "Soniox API key."
|
|
11
|
+
},
|
|
12
|
+
"model": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "Soniox recognition model."
|
|
15
|
+
},
|
|
16
|
+
"endpointDetection": {
|
|
17
|
+
"type": "boolean",
|
|
18
|
+
"description": "Enable endpoint detection."
|
|
19
|
+
},
|
|
20
|
+
"profanityFilter": {
|
|
21
|
+
"type": "boolean",
|
|
22
|
+
"description": "Filter profanity from results."
|
|
23
|
+
},
|
|
24
|
+
"speechContext": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "Speech context for improved recognition."
|
|
27
|
+
},
|
|
28
|
+
"clientRequestReference": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "Client request reference for tracking."
|
|
31
|
+
},
|
|
32
|
+
"storage": {
|
|
33
|
+
"type": "object",
|
|
34
|
+
"description": "Soniox storage configuration for persisting transcripts.",
|
|
35
|
+
"properties": {
|
|
36
|
+
"id": { "type": "string", "description": "Storage ID." },
|
|
37
|
+
"title": { "type": "string", "description": "Storage title." },
|
|
38
|
+
"disableStoreAudio": { "type": "boolean", "description": "Disable audio storage." },
|
|
39
|
+
"disableStoreTranscript": { "type": "boolean", "description": "Disable transcript storage." },
|
|
40
|
+
"disableSearch": { "type": "boolean", "description": "Disable search indexing." },
|
|
41
|
+
"metadata": { "type": "object", "description": "Custom metadata.", "additionalProperties": true }
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
},
|
|
45
|
+
"additionalProperties": false
|
|
46
|
+
}
|