@jambonz/schema 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +974 -0
- package/callbacks/amd.schema.json +50 -0
- package/callbacks/base.schema.json +29 -0
- package/callbacks/call-status.schema.json +22 -0
- package/callbacks/conference-status.schema.json +24 -0
- package/callbacks/conference-wait.schema.json +11 -0
- package/callbacks/conference.schema.json +11 -0
- package/callbacks/dequeue.schema.json +19 -0
- package/callbacks/dial-dtmf.schema.json +18 -0
- package/callbacks/dial-hold.schema.json +22 -0
- package/callbacks/dial-refer.schema.json +28 -0
- package/callbacks/dial.schema.json +31 -0
- package/callbacks/enqueue-wait.schema.json +17 -0
- package/callbacks/enqueue.schema.json +27 -0
- package/callbacks/gather-partial.schema.json +54 -0
- package/callbacks/gather.schema.json +60 -0
- package/callbacks/listen.schema.json +21 -0
- package/callbacks/llm.schema.json +30 -0
- package/callbacks/message.schema.json +35 -0
- package/callbacks/pipeline-turn.schema.json +109 -0
- package/callbacks/play.schema.json +36 -0
- package/callbacks/session-new.schema.json +143 -0
- package/callbacks/session-reconnect.schema.json +9 -0
- package/callbacks/session-redirect.schema.json +38 -0
- package/callbacks/sip-refer-event.schema.json +20 -0
- package/callbacks/sip-refer.schema.json +22 -0
- package/callbacks/sip-request.schema.json +27 -0
- package/callbacks/transcribe-translation.schema.json +24 -0
- package/callbacks/transcribe.schema.json +46 -0
- package/callbacks/tts-streaming-event.schema.json +77 -0
- package/callbacks/verb-status.schema.json +57 -0
- package/components/actionHook.schema.json +36 -0
- package/components/actionHookDelayAction.schema.json +37 -0
- package/components/amd.schema.json +68 -0
- package/components/auth.schema.json +18 -0
- package/components/bidirectionalAudio.schema.json +22 -0
- package/components/fillerNoise.schema.json +25 -0
- package/components/llm-base.schema.json +94 -0
- package/components/recognizer-assemblyAiOptions.schema.json +66 -0
- package/components/recognizer-awsOptions.schema.json +52 -0
- package/components/recognizer-azureOptions.schema.json +32 -0
- package/components/recognizer-cobaltOptions.schema.json +34 -0
- package/components/recognizer-customOptions.schema.json +27 -0
- package/components/recognizer-deepgramOptions.schema.json +147 -0
- package/components/recognizer-elevenlabsOptions.schema.json +39 -0
- package/components/recognizer-gladiaOptions.schema.json +8 -0
- package/components/recognizer-googleOptions.schema.json +35 -0
- package/components/recognizer-houndifyOptions.schema.json +53 -0
- package/components/recognizer-ibmOptions.schema.json +54 -0
- package/components/recognizer-nuanceOptions.schema.json +150 -0
- package/components/recognizer-nvidiaOptions.schema.json +39 -0
- package/components/recognizer-openaiOptions.schema.json +59 -0
- package/components/recognizer-sonioxOptions.schema.json +46 -0
- package/components/recognizer-speechmaticsOptions.schema.json +100 -0
- package/components/recognizer-verbioOptions.schema.json +46 -0
- package/components/recognizer.schema.json +216 -0
- package/components/synthesizer.schema.json +82 -0
- package/components/target.schema.json +105 -0
- package/components/vad.schema.json +48 -0
- package/docs/components/recognizer.md +78 -0
- package/docs/components/synthesizer.md +27 -0
- package/docs/guides/session-commands.md +417 -0
- package/docs/verbs/conference.md +51 -0
- package/docs/verbs/deepgram_s2s.md +108 -0
- package/docs/verbs/dial.md +8 -0
- package/docs/verbs/listen.md +71 -0
- package/docs/verbs/pipeline.md +475 -0
- package/docs/verbs/stream.md +5 -0
- package/index.js +9 -0
- package/jambonz-app.schema.json +112 -0
- package/lib/normalize.js +72 -0
- package/lib/validator.js +137 -0
- package/package.json +39 -0
- package/verbs/alert.schema.json +34 -0
- package/verbs/answer.schema.json +22 -0
- package/verbs/conference.schema.json +107 -0
- package/verbs/config.schema.json +218 -0
- package/verbs/deepgram_s2s.schema.json +81 -0
- package/verbs/dequeue.schema.json +51 -0
- package/verbs/dial.schema.json +187 -0
- package/verbs/dialogflow.schema.json +148 -0
- package/verbs/dtmf.schema.json +49 -0
- package/verbs/dub.schema.json +103 -0
- package/verbs/elevenlabs_s2s.schema.json +81 -0
- package/verbs/enqueue.schema.json +53 -0
- package/verbs/gather.schema.json +188 -0
- package/verbs/google_s2s.schema.json +42 -0
- package/verbs/hangup.schema.json +36 -0
- package/verbs/leave.schema.json +22 -0
- package/verbs/listen.schema.json +127 -0
- package/verbs/llm.schema.json +44 -0
- package/verbs/message.schema.json +82 -0
- package/verbs/openai_s2s.schema.json +42 -0
- package/verbs/pause.schema.json +36 -0
- package/verbs/pipeline.schema.json +240 -0
- package/verbs/play.schema.json +96 -0
- package/verbs/redirect.schema.json +34 -0
- package/verbs/s2s.schema.json +39 -0
- package/verbs/say.schema.json +107 -0
- package/verbs/sip-decline.schema.json +58 -0
- package/verbs/sip-refer.schema.json +58 -0
- package/verbs/sip-request.schema.json +54 -0
- package/verbs/stream.schema.json +103 -0
- package/verbs/tag.schema.json +41 -0
- package/verbs/transcribe.schema.json +57 -0
- package/verbs/ultravox_s2s.schema.json +41 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-elevenlabsOptions",
|
|
4
|
+
"title": "ElevenLabs Recognizer Options",
|
|
5
|
+
"description": "ElevenLabs-specific STT options. Only applies when recognizer vendor is 'elevenlabs'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"includeTimestamps": {
|
|
9
|
+
"type": "boolean",
|
|
10
|
+
"description": "Include word-level timestamps in results."
|
|
11
|
+
},
|
|
12
|
+
"commitStrategy": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"enum": ["manual", "vad"],
|
|
15
|
+
"description": "How audio chunks are committed. 'manual' for explicit commits, 'vad' for voice activity detection."
|
|
16
|
+
},
|
|
17
|
+
"vadSilenceThresholdSecs": {
|
|
18
|
+
"type": "number",
|
|
19
|
+
"description": "Silence duration in seconds to trigger VAD commit."
|
|
20
|
+
},
|
|
21
|
+
"vadThreshold": {
|
|
22
|
+
"type": "number",
|
|
23
|
+
"description": "VAD activation threshold."
|
|
24
|
+
},
|
|
25
|
+
"minSpeechDurationMs": {
|
|
26
|
+
"type": "number",
|
|
27
|
+
"description": "Minimum speech duration in milliseconds to accept."
|
|
28
|
+
},
|
|
29
|
+
"minSilenceDurationMs": {
|
|
30
|
+
"type": "number",
|
|
31
|
+
"description": "Minimum silence duration in milliseconds to trigger end of speech."
|
|
32
|
+
},
|
|
33
|
+
"enableLogging": {
|
|
34
|
+
"type": "boolean",
|
|
35
|
+
"description": "Enable server-side logging."
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
"additionalProperties": false
|
|
39
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-gladiaOptions",
|
|
4
|
+
"title": "Gladia Recognizer Options",
|
|
5
|
+
"description": "Gladia-specific STT options. Only applies when recognizer vendor is 'gladia'. See Gladia API documentation for available options.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"additionalProperties": false
|
|
8
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-googleOptions",
|
|
4
|
+
"title": "Google Recognizer Options",
|
|
5
|
+
"description": "Google Speech-to-Text specific options. Only applies when recognizer vendor is 'google'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"serviceVersion": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"enum": ["v1", "v2"],
|
|
11
|
+
"description": "Google Speech-to-Text API version."
|
|
12
|
+
},
|
|
13
|
+
"recognizerId": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"description": "ID of a Google Speech recognizer resource (v2 only)."
|
|
16
|
+
},
|
|
17
|
+
"speechStartTimeoutMs": {
|
|
18
|
+
"type": "number",
|
|
19
|
+
"description": "Timeout in milliseconds to wait for speech to start."
|
|
20
|
+
},
|
|
21
|
+
"speechEndTimeoutMs": {
|
|
22
|
+
"type": "number",
|
|
23
|
+
"description": "Timeout in milliseconds to detect end of speech."
|
|
24
|
+
},
|
|
25
|
+
"enableVoiceActivityEvents": {
|
|
26
|
+
"type": "boolean",
|
|
27
|
+
"description": "Enable voice activity detection events."
|
|
28
|
+
},
|
|
29
|
+
"transcriptNormalization": {
|
|
30
|
+
"type": "array",
|
|
31
|
+
"description": "Array of transcript normalization rules."
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
"additionalProperties": false
|
|
35
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-houndifyOptions",
|
|
4
|
+
"title": "Houndify Recognizer Options",
|
|
5
|
+
"description": "Houndify-specific STT options. Only applies when recognizer vendor is 'houndify'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"requestInfo": {
|
|
9
|
+
"type": "object",
|
|
10
|
+
"description": "Houndify RequestInfo object with context data.",
|
|
11
|
+
"additionalProperties": true
|
|
12
|
+
},
|
|
13
|
+
"sampleRate": { "type": "number", "description": "Audio sample rate in Hz." },
|
|
14
|
+
"latitude": { "type": "number", "description": "User latitude for location-aware queries." },
|
|
15
|
+
"longitude": { "type": "number", "description": "User longitude for location-aware queries." },
|
|
16
|
+
"city": { "type": "string", "description": "User city." },
|
|
17
|
+
"state": { "type": "string", "description": "User state." },
|
|
18
|
+
"country": { "type": "string", "description": "User country." },
|
|
19
|
+
"timeZone": { "type": "string", "description": "User timezone." },
|
|
20
|
+
"domain": { "type": "string", "description": "Houndify domain." },
|
|
21
|
+
"audioEndpoint": { "type": "string", "description": "Custom audio endpoint URL." },
|
|
22
|
+
"maxSilenceSeconds": { "type": "number", "description": "Maximum silence before stopping." },
|
|
23
|
+
"maxSilenceAfterFullQuerySeconds": { "type": "number", "description": "Silence timeout after a complete query." },
|
|
24
|
+
"maxSilenceAfterPartialQuerySeconds": { "type": "number", "description": "Silence timeout after a partial query." },
|
|
25
|
+
"vadSensitivity": { "type": "number", "description": "VAD sensitivity level." },
|
|
26
|
+
"vadTimeout": { "type": "number", "description": "VAD timeout in milliseconds." },
|
|
27
|
+
"vadMode": { "type": "string", "description": "VAD mode." },
|
|
28
|
+
"vadVoiceMs": { "type": "number", "description": "Milliseconds of voice to trigger VAD." },
|
|
29
|
+
"vadSilenceMs": { "type": "number", "description": "Milliseconds of silence to trigger VAD." },
|
|
30
|
+
"vadDebug": { "type": "boolean", "description": "Enable VAD debug logging." },
|
|
31
|
+
"audioFormat": { "type": "string", "description": "Audio format." },
|
|
32
|
+
"enableNoiseReduction": { "type": "boolean", "description": "Enable noise reduction." },
|
|
33
|
+
"enableProfanityFilter": { "type": "boolean", "description": "Filter profanity." },
|
|
34
|
+
"enablePunctuation": { "type": "boolean", "description": "Enable punctuation." },
|
|
35
|
+
"enableCapitalization": { "type": "boolean", "description": "Enable capitalization." },
|
|
36
|
+
"confidenceThreshold": { "type": "number", "description": "Minimum confidence threshold." },
|
|
37
|
+
"enableDisfluencyFilter": { "type": "boolean", "description": "Filter disfluencies (um, uh)." },
|
|
38
|
+
"maxResults": { "type": "number", "description": "Maximum number of results." },
|
|
39
|
+
"enableWordTimestamps": { "type": "boolean", "description": "Include word timestamps." },
|
|
40
|
+
"maxAlternatives": { "type": "number", "description": "Maximum alternative transcripts." },
|
|
41
|
+
"partialTranscriptInterval": { "type": "number", "description": "Interval for partial transcript delivery." },
|
|
42
|
+
"sessionTimeout": { "type": "number", "description": "Session timeout." },
|
|
43
|
+
"connectionTimeout": { "type": "number", "description": "Connection timeout." },
|
|
44
|
+
"customVocabulary": {
|
|
45
|
+
"type": "array",
|
|
46
|
+
"items": { "type": "string" },
|
|
47
|
+
"description": "Custom vocabulary terms."
|
|
48
|
+
},
|
|
49
|
+
"languageModel": { "type": "string", "description": "Language model to use." },
|
|
50
|
+
"audioQueryAbsoluteTimeout": { "type": "number", "description": "Absolute timeout for audio queries." }
|
|
51
|
+
},
|
|
52
|
+
"additionalProperties": false
|
|
53
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-ibmOptions",
|
|
4
|
+
"title": "IBM Recognizer Options",
|
|
5
|
+
"description": "IBM Watson Speech-to-Text specific options. Only applies when recognizer vendor is 'ibm'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"sttApiKey": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "IBM STT API key. Overrides credentials configured in jambonz."
|
|
11
|
+
},
|
|
12
|
+
"sttRegion": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "IBM STT region."
|
|
15
|
+
},
|
|
16
|
+
"ttsApiKey": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "IBM TTS API key."
|
|
19
|
+
},
|
|
20
|
+
"ttsRegion": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "IBM TTS region."
|
|
23
|
+
},
|
|
24
|
+
"instanceId": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "IBM Watson instance ID."
|
|
27
|
+
},
|
|
28
|
+
"model": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "Recognition model name."
|
|
31
|
+
},
|
|
32
|
+
"languageCustomizationId": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"description": "ID of a custom language model."
|
|
35
|
+
},
|
|
36
|
+
"acousticCustomizationId": {
|
|
37
|
+
"type": "string",
|
|
38
|
+
"description": "ID of a custom acoustic model."
|
|
39
|
+
},
|
|
40
|
+
"baseModelVersion": {
|
|
41
|
+
"type": "string",
|
|
42
|
+
"description": "Base model version to use."
|
|
43
|
+
},
|
|
44
|
+
"watsonMetadata": {
|
|
45
|
+
"type": "string",
|
|
46
|
+
"description": "Customer ID metadata for data labeling."
|
|
47
|
+
},
|
|
48
|
+
"watsonLearningOptOut": {
|
|
49
|
+
"type": "boolean",
|
|
50
|
+
"description": "Opt out of IBM data collection for service improvements."
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
"additionalProperties": false
|
|
54
|
+
}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-nuanceOptions",
|
|
4
|
+
"title": "Nuance Recognizer Options",
|
|
5
|
+
"description": "Nuance Mix specific options. Only applies when recognizer vendor is 'nuance'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"clientId": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "Nuance Mix client ID."
|
|
11
|
+
},
|
|
12
|
+
"secret": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "Nuance Mix client secret."
|
|
15
|
+
},
|
|
16
|
+
"kryptonEndpoint": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "Custom Nuance Krypton endpoint URL."
|
|
19
|
+
},
|
|
20
|
+
"topic": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "Recognition topic (domain)."
|
|
23
|
+
},
|
|
24
|
+
"utteranceDetectionMode": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"enum": ["single", "multiple", "disabled"],
|
|
27
|
+
"description": "How utterance boundaries are detected."
|
|
28
|
+
},
|
|
29
|
+
"punctuation": {
|
|
30
|
+
"type": "boolean",
|
|
31
|
+
"description": "Enable automatic punctuation."
|
|
32
|
+
},
|
|
33
|
+
"profanityFilter": {
|
|
34
|
+
"type": "boolean",
|
|
35
|
+
"description": "Filter profanity from results."
|
|
36
|
+
},
|
|
37
|
+
"includeTokenization": {
|
|
38
|
+
"type": "boolean",
|
|
39
|
+
"description": "Include tokenization data in results."
|
|
40
|
+
},
|
|
41
|
+
"discardSpeakerAdaptation": {
|
|
42
|
+
"type": "boolean",
|
|
43
|
+
"description": "Discard speaker adaptation data."
|
|
44
|
+
},
|
|
45
|
+
"suppressCallRecording": {
|
|
46
|
+
"type": "boolean",
|
|
47
|
+
"description": "Suppress call recording on the Nuance side."
|
|
48
|
+
},
|
|
49
|
+
"maskLoadFailures": {
|
|
50
|
+
"type": "boolean",
|
|
51
|
+
"description": "Mask resource load failures."
|
|
52
|
+
},
|
|
53
|
+
"suppressInitialCapitalization": {
|
|
54
|
+
"type": "boolean",
|
|
55
|
+
"description": "Suppress initial capitalization of results."
|
|
56
|
+
},
|
|
57
|
+
"allowZeroBaseLmWeight": {
|
|
58
|
+
"type": "boolean",
|
|
59
|
+
"description": "Allow zero base language model weight."
|
|
60
|
+
},
|
|
61
|
+
"filterWakeupWord": {
|
|
62
|
+
"type": "boolean",
|
|
63
|
+
"description": "Filter wakeup words from results."
|
|
64
|
+
},
|
|
65
|
+
"resultType": {
|
|
66
|
+
"type": "string",
|
|
67
|
+
"enum": ["final", "partial", "immutable_partial"],
|
|
68
|
+
"description": "Type of results to return."
|
|
69
|
+
},
|
|
70
|
+
"noInputTimeoutMs": {
|
|
71
|
+
"type": "number",
|
|
72
|
+
"description": "Timeout in milliseconds before no-input event."
|
|
73
|
+
},
|
|
74
|
+
"recognitionTimeoutMs": {
|
|
75
|
+
"type": "number",
|
|
76
|
+
"description": "Maximum recognition duration in milliseconds."
|
|
77
|
+
},
|
|
78
|
+
"utteranceEndSilenceMs": {
|
|
79
|
+
"type": "number",
|
|
80
|
+
"description": "Silence duration in milliseconds to detect end of utterance."
|
|
81
|
+
},
|
|
82
|
+
"maxHypotheses": {
|
|
83
|
+
"type": "number",
|
|
84
|
+
"description": "Maximum number of recognition hypotheses to return."
|
|
85
|
+
},
|
|
86
|
+
"speechDomain": {
|
|
87
|
+
"type": "string",
|
|
88
|
+
"description": "Speech domain for optimized recognition."
|
|
89
|
+
},
|
|
90
|
+
"formatting": {
|
|
91
|
+
"type": "object",
|
|
92
|
+
"description": "Formatting options for recognition results.",
|
|
93
|
+
"properties": {
|
|
94
|
+
"scheme": { "type": "string", "description": "Formatting scheme name." },
|
|
95
|
+
"options": { "type": "object", "description": "Scheme-specific formatting options." }
|
|
96
|
+
},
|
|
97
|
+
"required": ["scheme", "options"]
|
|
98
|
+
},
|
|
99
|
+
"clientData": {
|
|
100
|
+
"type": "object",
|
|
101
|
+
"description": "Custom client data to pass to Nuance.",
|
|
102
|
+
"additionalProperties": true
|
|
103
|
+
},
|
|
104
|
+
"userId": {
|
|
105
|
+
"type": "string",
|
|
106
|
+
"description": "User ID for speaker adaptation."
|
|
107
|
+
},
|
|
108
|
+
"speechDetectionSensitivity": {
|
|
109
|
+
"type": "number",
|
|
110
|
+
"description": "Speech detection sensitivity (0-1)."
|
|
111
|
+
},
|
|
112
|
+
"resources": {
|
|
113
|
+
"type": "array",
|
|
114
|
+
"description": "Array of Nuance recognition resources (grammars, wordsets, etc.).",
|
|
115
|
+
"items": {
|
|
116
|
+
"type": "object",
|
|
117
|
+
"properties": {
|
|
118
|
+
"externalReference": {
|
|
119
|
+
"type": "object",
|
|
120
|
+
"description": "External resource reference.",
|
|
121
|
+
"properties": {
|
|
122
|
+
"type": {
|
|
123
|
+
"type": "string",
|
|
124
|
+
"enum": ["undefined_resource_type", "wordset", "compiled_wordset", "domain_lm", "speaker_profile", "grammar", "settings"]
|
|
125
|
+
},
|
|
126
|
+
"uri": { "type": "string" },
|
|
127
|
+
"maxLoadFailures": { "type": "boolean" },
|
|
128
|
+
"requestTimeoutMs": { "type": "number" },
|
|
129
|
+
"headers": { "type": "object" }
|
|
130
|
+
}
|
|
131
|
+
},
|
|
132
|
+
"inlineWordset": { "type": "string", "description": "Inline wordset JSON string." },
|
|
133
|
+
"builtin": { "type": "string", "description": "Built-in grammar name." },
|
|
134
|
+
"inlineGrammar": { "type": "string", "description": "Inline SRGS grammar." },
|
|
135
|
+
"wakeupWord": { "type": "array", "items": { "type": "string" }, "description": "Wakeup words." },
|
|
136
|
+
"weightName": {
|
|
137
|
+
"type": "string",
|
|
138
|
+
"enum": ["defaultWeight", "lowest", "low", "medium", "high", "highest"]
|
|
139
|
+
},
|
|
140
|
+
"weightValue": { "type": "number" },
|
|
141
|
+
"reuse": {
|
|
142
|
+
"type": "string",
|
|
143
|
+
"enum": ["undefined_reuse", "low_reuse", "high_reuse"]
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
},
|
|
149
|
+
"additionalProperties": false
|
|
150
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-nvidiaOptions",
|
|
4
|
+
"title": "NVIDIA Recognizer Options",
|
|
5
|
+
"description": "NVIDIA Riva specific options. Only applies when recognizer vendor is 'nvidia'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"rivaUri": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "NVIDIA Riva server URI."
|
|
11
|
+
},
|
|
12
|
+
"maxAlternatives": {
|
|
13
|
+
"type": "number",
|
|
14
|
+
"description": "Maximum number of alternative transcripts."
|
|
15
|
+
},
|
|
16
|
+
"profanityFilter": {
|
|
17
|
+
"type": "boolean",
|
|
18
|
+
"description": "Filter profanity from results."
|
|
19
|
+
},
|
|
20
|
+
"punctuation": {
|
|
21
|
+
"type": "boolean",
|
|
22
|
+
"description": "Enable automatic punctuation."
|
|
23
|
+
},
|
|
24
|
+
"wordTimeOffsets": {
|
|
25
|
+
"type": "boolean",
|
|
26
|
+
"description": "Include word-level timestamps."
|
|
27
|
+
},
|
|
28
|
+
"verbatimTranscripts": {
|
|
29
|
+
"type": "boolean",
|
|
30
|
+
"description": "Return verbatim (unformatted) transcripts."
|
|
31
|
+
},
|
|
32
|
+
"customConfiguration": {
|
|
33
|
+
"type": "object",
|
|
34
|
+
"description": "Custom Riva configuration parameters.",
|
|
35
|
+
"additionalProperties": true
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
"additionalProperties": false
|
|
39
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-openaiOptions",
|
|
4
|
+
"title": "OpenAI Recognizer Options",
|
|
5
|
+
"description": "OpenAI Whisper/Realtime specific STT options. Only applies when recognizer vendor is 'openai'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"apiKey": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "OpenAI API key. Overrides credentials configured in jambonz."
|
|
11
|
+
},
|
|
12
|
+
"model": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "OpenAI STT model name."
|
|
15
|
+
},
|
|
16
|
+
"prompt": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "Prompt to guide the recognition model."
|
|
19
|
+
},
|
|
20
|
+
"promptTemplates": {
|
|
21
|
+
"type": "object",
|
|
22
|
+
"description": "Templates for dynamic prompt generation.",
|
|
23
|
+
"properties": {
|
|
24
|
+
"hintsTemplate": { "type": "string", "description": "Template for injecting hints into the prompt." },
|
|
25
|
+
"conversationHistoryTemplate": { "type": "string", "description": "Template for injecting conversation history." }
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"language": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "Language code for recognition."
|
|
31
|
+
},
|
|
32
|
+
"input_audio_noise_reduction": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"enum": ["near_field", "far_field"],
|
|
35
|
+
"description": "Input audio noise reduction mode."
|
|
36
|
+
},
|
|
37
|
+
"turn_detection": {
|
|
38
|
+
"type": "object",
|
|
39
|
+
"description": "Turn detection configuration for the OpenAI Realtime API.",
|
|
40
|
+
"properties": {
|
|
41
|
+
"type": {
|
|
42
|
+
"type": "string",
|
|
43
|
+
"enum": ["none", "server_vad", "semantic_vad"],
|
|
44
|
+
"description": "Turn detection strategy."
|
|
45
|
+
},
|
|
46
|
+
"eagerness": {
|
|
47
|
+
"type": "string",
|
|
48
|
+
"enum": ["low", "medium", "high", "auto"],
|
|
49
|
+
"description": "How eagerly the model should respond."
|
|
50
|
+
},
|
|
51
|
+
"threshold": { "type": "number", "description": "VAD activation threshold (0-1)." },
|
|
52
|
+
"prefix_padding_ms": { "type": "number", "description": "Milliseconds of audio to include before detected speech." },
|
|
53
|
+
"silence_duration_ms": { "type": "number", "description": "Milliseconds of silence to detect end of speech." }
|
|
54
|
+
},
|
|
55
|
+
"required": ["type"]
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
"additionalProperties": false
|
|
59
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-sonioxOptions",
|
|
4
|
+
"title": "Soniox Recognizer Options",
|
|
5
|
+
"description": "Soniox-specific STT options. Only applies when recognizer vendor is 'soniox'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"apiKey": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "Soniox API key."
|
|
11
|
+
},
|
|
12
|
+
"model": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"description": "Soniox recognition model."
|
|
15
|
+
},
|
|
16
|
+
"endpointDetection": {
|
|
17
|
+
"type": "boolean",
|
|
18
|
+
"description": "Enable endpoint detection."
|
|
19
|
+
},
|
|
20
|
+
"profanityFilter": {
|
|
21
|
+
"type": "boolean",
|
|
22
|
+
"description": "Filter profanity from results."
|
|
23
|
+
},
|
|
24
|
+
"speechContext": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "Speech context for improved recognition."
|
|
27
|
+
},
|
|
28
|
+
"clientRequestReference": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "Client request reference for tracking."
|
|
31
|
+
},
|
|
32
|
+
"storage": {
|
|
33
|
+
"type": "object",
|
|
34
|
+
"description": "Soniox storage configuration for persisting transcripts.",
|
|
35
|
+
"properties": {
|
|
36
|
+
"id": { "type": "string", "description": "Storage ID." },
|
|
37
|
+
"title": { "type": "string", "description": "Storage title." },
|
|
38
|
+
"disableStoreAudio": { "type": "boolean", "description": "Disable audio storage." },
|
|
39
|
+
"disableStoreTranscript": { "type": "boolean", "description": "Disable transcript storage." },
|
|
40
|
+
"disableSearch": { "type": "boolean", "description": "Disable search indexing." },
|
|
41
|
+
"metadata": { "type": "object", "description": "Custom metadata.", "additionalProperties": true }
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
},
|
|
45
|
+
"additionalProperties": false
|
|
46
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-speechmaticsOptions",
|
|
4
|
+
"title": "Speechmatics Recognizer Options",
|
|
5
|
+
"description": "Speechmatics-specific STT options. Only applies when recognizer vendor is 'speechmatics'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"host": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "Speechmatics host URL."
|
|
11
|
+
},
|
|
12
|
+
"profile": {
|
|
13
|
+
"type": "string",
|
|
14
|
+
"enum": ["adaptive", "agile", "smart", "external"],
|
|
15
|
+
"description": "Speechmatics profile."
|
|
16
|
+
},
|
|
17
|
+
"transcription_config": {
|
|
18
|
+
"type": "object",
|
|
19
|
+
"description": "Speechmatics transcription configuration.",
|
|
20
|
+
"properties": {
|
|
21
|
+
"language": { "type": "string", "description": "Language code." },
|
|
22
|
+
"domain": { "type": "string", "description": "Domain model." },
|
|
23
|
+
"additional_vocab": { "type": "array", "description": "Additional vocabulary entries." },
|
|
24
|
+
"diarization": { "type": "string", "description": "Diarization mode." },
|
|
25
|
+
"speaker_diarization_config": {
|
|
26
|
+
"type": "object",
|
|
27
|
+
"properties": {
|
|
28
|
+
"speaker_sensitivity": { "type": "number" },
|
|
29
|
+
"max_speakers": { "type": "number" }
|
|
30
|
+
}
|
|
31
|
+
},
|
|
32
|
+
"conversation_config": {
|
|
33
|
+
"type": "object",
|
|
34
|
+
"properties": {
|
|
35
|
+
"end_of_utterance_silence_trigger": { "type": "number" }
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
"enable_partials": { "type": "boolean", "description": "Enable partial transcripts." },
|
|
39
|
+
"max_delay": { "type": "number", "description": "Maximum delay in seconds." },
|
|
40
|
+
"max_delay_mode": {
|
|
41
|
+
"type": "string",
|
|
42
|
+
"enum": ["fixed", "flexible"],
|
|
43
|
+
"description": "Delay mode."
|
|
44
|
+
},
|
|
45
|
+
"output_locale": { "type": "string", "description": "Output locale for formatting." },
|
|
46
|
+
"punctuation_overrides": {
|
|
47
|
+
"type": "object",
|
|
48
|
+
"properties": {
|
|
49
|
+
"permitted_marks": { "type": "array", "items": { "type": "string" } },
|
|
50
|
+
"sensitivity": { "type": "number" }
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
"operating_point": { "type": "string", "description": "Operating point (standard or enhanced)." },
|
|
54
|
+
"enable_entities": { "type": "boolean", "description": "Enable entity detection." },
|
|
55
|
+
"audio_filtering_config": {
|
|
56
|
+
"type": "object",
|
|
57
|
+
"properties": {
|
|
58
|
+
"volume_threshold": { "type": "number" }
|
|
59
|
+
},
|
|
60
|
+
"required": ["volume_threshold"]
|
|
61
|
+
},
|
|
62
|
+
"transcript_filtering_config": {
|
|
63
|
+
"type": "object",
|
|
64
|
+
"properties": {
|
|
65
|
+
"remove_disfluencies": { "type": "boolean" }
|
|
66
|
+
},
|
|
67
|
+
"required": ["remove_disfluencies"]
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
},
|
|
71
|
+
"translation_config": {
|
|
72
|
+
"type": "object",
|
|
73
|
+
"description": "Speechmatics translation configuration.",
|
|
74
|
+
"properties": {
|
|
75
|
+
"target_languages": {
|
|
76
|
+
"type": "array",
|
|
77
|
+
"items": { "type": "string" },
|
|
78
|
+
"description": "Target languages for translation."
|
|
79
|
+
},
|
|
80
|
+
"enable_partials": { "type": "boolean", "description": "Enable partial translations." }
|
|
81
|
+
},
|
|
82
|
+
"required": ["target_languages"]
|
|
83
|
+
},
|
|
84
|
+
"audio_events_config": {
|
|
85
|
+
"type": "object",
|
|
86
|
+
"description": "Audio event detection configuration.",
|
|
87
|
+
"properties": {
|
|
88
|
+
"types": {
|
|
89
|
+
"type": "array",
|
|
90
|
+
"items": {
|
|
91
|
+
"type": "string",
|
|
92
|
+
"enum": ["applause", "music", "laughter"]
|
|
93
|
+
},
|
|
94
|
+
"description": "Audio event types to detect."
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
},
|
|
99
|
+
"additionalProperties": false
|
|
100
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://jambonz.org/schema/components/recognizer-verbioOptions",
|
|
4
|
+
"title": "Verbio Recognizer Options",
|
|
5
|
+
"description": "Verbio-specific STT options. Only applies when recognizer vendor is 'verbio'.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"enable_formatting": {
|
|
9
|
+
"type": "boolean",
|
|
10
|
+
"description": "Enable text formatting of results."
|
|
11
|
+
},
|
|
12
|
+
"enable_diarization": {
|
|
13
|
+
"type": "boolean",
|
|
14
|
+
"description": "Enable speaker diarization."
|
|
15
|
+
},
|
|
16
|
+
"topic": {
|
|
17
|
+
"type": "number",
|
|
18
|
+
"description": "Topic ID for domain-specific recognition."
|
|
19
|
+
},
|
|
20
|
+
"inline_grammar": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"description": "Inline SRGS grammar for constrained recognition."
|
|
23
|
+
},
|
|
24
|
+
"grammar_uri": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"description": "URI of an external grammar resource."
|
|
27
|
+
},
|
|
28
|
+
"label": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"description": "Label for the recognition session."
|
|
31
|
+
},
|
|
32
|
+
"recognition_timeout": {
|
|
33
|
+
"type": "number",
|
|
34
|
+
"description": "Maximum recognition duration in seconds."
|
|
35
|
+
},
|
|
36
|
+
"speech_complete_timeout": {
|
|
37
|
+
"type": "number",
|
|
38
|
+
"description": "Silence duration in seconds after complete speech."
|
|
39
|
+
},
|
|
40
|
+
"speech_incomplete_timeout": {
|
|
41
|
+
"type": "number",
|
|
42
|
+
"description": "Silence duration in seconds after incomplete speech."
|
|
43
|
+
}
|
|
44
|
+
},
|
|
45
|
+
"additionalProperties": false
|
|
46
|
+
}
|