@jambonz/mcp-schema-server 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/AGENTS.md +545 -7
  2. package/dist/index.js +141 -0
  3. package/dist/index.js.map +1 -1
  4. package/docs/verbs/conference.md +51 -0
  5. package/docs/verbs/listen.md +71 -0
  6. package/docs/verbs/stream.md +5 -0
  7. package/package.json +13 -5
  8. package/schema/callbacks/base.schema.json +29 -0
  9. package/schema/callbacks/call-status.schema.json +22 -0
  10. package/schema/callbacks/conference-status.schema.json +24 -0
  11. package/schema/callbacks/conference-wait.schema.json +11 -0
  12. package/schema/callbacks/conference.schema.json +11 -0
  13. package/schema/callbacks/dequeue.schema.json +19 -0
  14. package/schema/callbacks/dial-dtmf.schema.json +18 -0
  15. package/schema/callbacks/dial-hold.schema.json +22 -0
  16. package/schema/callbacks/dial-refer.schema.json +28 -0
  17. package/schema/callbacks/dial.schema.json +31 -0
  18. package/schema/callbacks/enqueue-wait.schema.json +17 -0
  19. package/schema/callbacks/enqueue.schema.json +27 -0
  20. package/schema/callbacks/gather-partial.schema.json +54 -0
  21. package/schema/callbacks/gather.schema.json +60 -0
  22. package/schema/callbacks/listen.schema.json +21 -0
  23. package/schema/callbacks/llm.schema.json +30 -0
  24. package/schema/callbacks/message.schema.json +35 -0
  25. package/schema/callbacks/play.schema.json +36 -0
  26. package/schema/callbacks/session-new.schema.json +143 -0
  27. package/schema/callbacks/session-reconnect.schema.json +9 -0
  28. package/schema/callbacks/session-redirect.schema.json +38 -0
  29. package/schema/callbacks/sip-refer-event.schema.json +20 -0
  30. package/schema/callbacks/sip-refer.schema.json +22 -0
  31. package/schema/callbacks/sip-request.schema.json +27 -0
  32. package/schema/callbacks/transcribe-translation.schema.json +24 -0
  33. package/schema/callbacks/transcribe.schema.json +46 -0
  34. package/schema/callbacks/verb-status.schema.json +57 -0
  35. package/schema/components/actionHook.schema.json +1 -1
  36. package/schema/components/amd.schema.json +68 -0
  37. package/schema/components/recognizer-assemblyAiOptions.schema.json +35 -0
  38. package/schema/components/recognizer-awsOptions.schema.json +52 -0
  39. package/schema/components/recognizer-azureOptions.schema.json +32 -0
  40. package/schema/components/recognizer-cobaltOptions.schema.json +34 -0
  41. package/schema/components/recognizer-customOptions.schema.json +27 -0
  42. package/schema/components/recognizer-deepgramOptions.schema.json +147 -0
  43. package/schema/components/recognizer-elevenlabsOptions.schema.json +39 -0
  44. package/schema/components/recognizer-gladiaOptions.schema.json +8 -0
  45. package/schema/components/recognizer-googleOptions.schema.json +35 -0
  46. package/schema/components/recognizer-houndifyOptions.schema.json +53 -0
  47. package/schema/components/recognizer-ibmOptions.schema.json +54 -0
  48. package/schema/components/recognizer-nuanceOptions.schema.json +150 -0
  49. package/schema/components/recognizer-nvidiaOptions.schema.json +39 -0
  50. package/schema/components/recognizer-openaiOptions.schema.json +59 -0
  51. package/schema/components/recognizer-sonioxOptions.schema.json +46 -0
  52. package/schema/components/recognizer-speechmaticsOptions.schema.json +91 -0
  53. package/schema/components/recognizer-verbioOptions.schema.json +46 -0
  54. package/schema/components/recognizer.schema.json +17 -85
  55. package/schema/verbs/config.schema.json +8 -12
  56. package/schema/verbs/dial.schema.json +10 -14
  57. package/schema/verbs/gather.schema.json +1 -1
  58. package/schema/verbs/listen.schema.json +2 -3
  59. package/schema/verbs/llm.schema.json +1 -1
  60. package/schema/verbs/stream.schema.json +1 -1
  61. package/schema/verbs/transcribe.schema.json +4 -0
@@ -0,0 +1,57 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://jambonz.org/schema/callbacks/verb-status",
4
+ "title": "Verb Status Event",
5
+ "description": "Real-time verb lifecycle events sent over WebSocket when notifyEvents is enabled on the session. These are informational — no response is expected.",
6
+ "type": "object",
7
+ "properties": {
8
+ "event": {
9
+ "type": "string",
10
+ "enum": [
11
+ "starting",
12
+ "finished",
13
+ "start-playback",
14
+ "stop-playback",
15
+ "kill-playback",
16
+ "dtmf-bargein-detected",
17
+ "speech-bargein-detected",
18
+ "synthesized-audio"
19
+ ],
20
+ "description": "The verb lifecycle event."
21
+ },
22
+ "verb": {
23
+ "type": "string",
24
+ "description": "The verb name (e.g. 'say', 'play', 'gather'). Present on synthesized-audio, start-playback, stop-playback, kill-playback, and dtmf/speech-bargein events."
25
+ },
26
+ "name": {
27
+ "type": "string",
28
+ "description": "The verb name. Present on 'starting' and 'finished' events (these use 'name' instead of 'verb')."
29
+ },
30
+ "id": {
31
+ "type": "string",
32
+ "description": "The verb instance id, if one was assigned by the application."
33
+ },
34
+ "vendor": {
35
+ "type": "string",
36
+ "description": "TTS vendor name. Present on synthesized-audio events."
37
+ },
38
+ "language": {
39
+ "type": "string",
40
+ "description": "TTS language code. Present on synthesized-audio events."
41
+ },
42
+ "characters": {
43
+ "type": "integer",
44
+ "description": "Number of characters synthesized. Present on synthesized-audio events when not served from cache."
45
+ },
46
+ "elapsed_time": {
47
+ "type": "number",
48
+ "description": "TTS round-trip time in milliseconds. Present on synthesized-audio events when not served from cache."
49
+ },
50
+ "served_from_cache": {
51
+ "type": "boolean",
52
+ "description": "Whether the TTS audio was served from cache. Present on synthesized-audio events."
53
+ }
54
+ },
55
+ "required": ["event"],
56
+ "additionalProperties": true
57
+ }
@@ -2,7 +2,7 @@
2
2
  "$schema": "https://json-schema.org/draft/2020-12/schema",
3
3
  "$id": "https://jambonz.org/schema/components/actionHook",
4
4
  "title": "ActionHook",
5
- "description": "A webhook or websocket callback that jambonz invokes during call processing. Most jambonz verbs use actionHooks to report results (e.g. speech recognition results from 'gather') and to receive the next set of verbs to execute. Can be specified as a simple URL string or as an object with additional options.",
5
+ "description": "A webhook or websocket callback that jambonz invokes when a verb completes. Reports verb results (e.g. speech recognition from 'gather', dial outcome) and receives the next verbs to execute. In webhook mode: jambonz POSTs to this URL and the HTTP response body is the next verb array. In WebSocket mode: this value becomes an event name emitted on the session — bind session.on('/hookName', (evt) => {...}) and respond with session.reply() (NOT session.send()). The callback payload always includes 'reason' plus verb-specific fields (e.g. 'speech', 'digits' for gather). Can be a simple URL/path string or an object with additional options.",
6
6
  "oneOf": [
7
7
  {
8
8
  "type": "string",
@@ -0,0 +1,68 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://jambonz.org/schema/components/amd",
4
+ "title": "Answering Machine Detection",
5
+ "description": "Configuration for answering machine detection (AMD). Detects whether an outbound or inbound call was answered by a human or a machine. Used as a nested property on the 'config' or 'dial' verb.",
6
+ "type": "object",
7
+ "properties": {
8
+ "actionHook": {
9
+ "$ref": "actionHook",
10
+ "description": "Webhook to receive AMD events (amd_human_detected, amd_machine_detected, amd_no_speech_detected, amd_decision_timeout, amd_machine_stopped_speaking, amd_tone_detected, amd_error, amd_stopped)."
11
+ },
12
+ "thresholdWordCount": {
13
+ "type": "number",
14
+ "description": "Number of spoken words in a greeting that triggers an amd_machine_detected result.",
15
+ "default": 9
16
+ },
17
+ "digitCount": {
18
+ "type": "number",
19
+ "description": "Number of digits in a greeting to trigger detection. 0 disables digit-based detection.",
20
+ "default": 0
21
+ },
22
+ "timers": {
23
+ "type": "object",
24
+ "description": "Timer settings controlling AMD detection windows.",
25
+ "properties": {
26
+ "noSpeechTimeoutMs": {
27
+ "type": "number",
28
+ "description": "Milliseconds to wait for any speech before returning amd_no_speech_detected.",
29
+ "default": 5000
30
+ },
31
+ "decisionTimeoutMs": {
32
+ "type": "number",
33
+ "description": "Milliseconds before returning amd_decision_timeout if no determination is made.",
34
+ "default": 15000
35
+ },
36
+ "toneTimeoutMs": {
37
+ "type": "number",
38
+ "description": "Milliseconds to wait for beep/tone detection.",
39
+ "default": 20000
40
+ },
41
+ "greetingCompletionTimeoutMs": {
42
+ "type": "number",
43
+ "description": "Milliseconds of silence after speech before determining the machine greeting is complete. Automatically reduced to 1000ms if a beep is detected.",
44
+ "default": 2000
45
+ }
46
+ },
47
+ "additionalProperties": false
48
+ },
49
+ "recognizer": {
50
+ "$ref": "recognizer",
51
+ "description": "Override the STT recognizer used for AMD speech detection. When omitted, AMD uses the session default recognizer with enhancedModel enabled."
52
+ }
53
+ },
54
+ "required": ["actionHook"],
55
+ "examples": [
56
+ {
57
+ "actionHook": "/amd-events"
58
+ },
59
+ {
60
+ "actionHook": "/amd-events",
61
+ "thresholdWordCount": 6,
62
+ "timers": {
63
+ "noSpeechTimeoutMs": 3000,
64
+ "decisionTimeoutMs": 10000
65
+ }
66
+ }
67
+ ]
68
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://jambonz.org/schema/components/recognizer-assemblyAiOptions",
4
+ "title": "AssemblyAI Recognizer Options",
5
+ "description": "AssemblyAI-specific STT options. Only applies when recognizer vendor is 'assemblyai'.",
6
+ "type": "object",
7
+ "properties": {
8
+ "apiKey": {
9
+ "type": "string",
10
+ "description": "AssemblyAI API key. Overrides credentials configured in jambonz."
11
+ },
12
+ "serviceVersion": {
13
+ "type": "string",
14
+ "enum": ["v2", "v3"],
15
+ "description": "AssemblyAI streaming API version."
16
+ },
17
+ "formatTurns": {
18
+ "type": "boolean",
19
+ "description": "Enable turn-level formatting."
20
+ },
21
+ "endOfTurnConfidenceThreshold": {
22
+ "type": "number",
23
+ "description": "Confidence threshold for end-of-turn detection."
24
+ },
25
+ "minEndOfTurnSilenceWhenConfident": {
26
+ "type": "number",
27
+ "description": "Minimum silence duration (seconds) to trigger end-of-turn when confidence is met."
28
+ },
29
+ "maxTurnSilence": {
30
+ "type": "number",
31
+ "description": "Maximum silence duration (seconds) before forcing end-of-turn."
32
+ }
33
+ },
34
+ "additionalProperties": true
35
+ }
@@ -0,0 +1,52 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://jambonz.org/schema/components/recognizer-awsOptions",
4
+ "title": "AWS Recognizer Options",
5
+ "description": "AWS Transcribe specific options. Only applies when recognizer vendor is 'aws'.",
6
+ "type": "object",
7
+ "properties": {
8
+ "accessKey": {
9
+ "type": "string",
10
+ "description": "AWS access key ID. Overrides credentials configured in jambonz."
11
+ },
12
+ "secretKey": {
13
+ "type": "string",
14
+ "description": "AWS secret access key."
15
+ },
16
+ "securityToken": {
17
+ "type": "string",
18
+ "description": "AWS temporary security token (for STS/assumed roles)."
19
+ },
20
+ "region": {
21
+ "type": "string",
22
+ "description": "AWS region for the Transcribe service."
23
+ },
24
+ "vocabularyName": {
25
+ "type": "string",
26
+ "description": "Name of a custom vocabulary to use."
27
+ },
28
+ "vocabularyFilterName": {
29
+ "type": "string",
30
+ "description": "Name of a vocabulary filter to apply."
31
+ },
32
+ "vocabularyFilterMethod": {
33
+ "type": "string",
34
+ "enum": ["remove", "mask", "tag"],
35
+ "description": "How filtered vocabulary words should be handled."
36
+ },
37
+ "languageModelName": {
38
+ "type": "string",
39
+ "description": "Name of a custom language model."
40
+ },
41
+ "piiEntityTypes": {
42
+ "type": "array",
43
+ "items": { "type": "string" },
44
+ "description": "PII entity types to identify (e.g. 'BANK_ACCOUNT_NUMBER', 'CREDIT_DEBIT_NUMBER')."
45
+ },
46
+ "piiIdentifyEntities": {
47
+ "type": "boolean",
48
+ "description": "Enable PII entity identification."
49
+ }
50
+ },
51
+ "additionalProperties": true
52
+ }
@@ -0,0 +1,32 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://jambonz.org/schema/components/recognizer-azureOptions",
4
+ "title": "Azure Recognizer Options",
5
+ "description": "Azure Speech Services specific options. Only applies when recognizer vendor is 'microsoft'.",
6
+ "type": "object",
7
+ "properties": {
8
+ "speechSegmentationSilenceTimeoutMs": {
9
+ "type": "number",
10
+ "description": "Silence timeout in milliseconds for speech segmentation."
11
+ },
12
+ "postProcessing": {
13
+ "type": "string",
14
+ "description": "Post-processing mode for transcription results."
15
+ },
16
+ "audioLogging": {
17
+ "type": "boolean",
18
+ "description": "Enable audio logging for diagnostics."
19
+ },
20
+ "languageIdMode": {
21
+ "type": "string",
22
+ "enum": ["AtStart", "Continuous"],
23
+ "description": "Language identification mode when using multiple languages."
24
+ },
25
+ "speechRecognitionMode": {
26
+ "type": "string",
27
+ "enum": ["CONVERSATION", "DICTATION", "INTERACTIVE"],
28
+ "description": "Speech recognition mode optimized for the interaction type."
29
+ }
30
+ },
31
+ "additionalProperties": true
32
+ }
@@ -0,0 +1,34 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://jambonz.org/schema/components/recognizer-cobaltOptions",
4
+ "title": "Cobalt Recognizer Options",
5
+ "description": "Cobalt-specific STT options. Only applies when recognizer vendor is 'cobalt'.",
6
+ "type": "object",
7
+ "properties": {
8
+ "serverUri": {
9
+ "type": "string",
10
+ "description": "Cobalt server URI."
11
+ },
12
+ "enableConfusionNetwork": {
13
+ "type": "boolean",
14
+ "description": "Enable confusion network output."
15
+ },
16
+ "metadata": {
17
+ "type": "string",
18
+ "description": "Metadata string to pass to the server."
19
+ },
20
+ "compiledContextData": {
21
+ "type": "string",
22
+ "description": "Compiled context data for biasing recognition."
23
+ },
24
+ "wordTimeOffsets": {
25
+ "type": "boolean",
26
+ "description": "Include word-level timestamps."
27
+ },
28
+ "contextToken": {
29
+ "type": "string",
30
+ "description": "Context token for server-side context."
31
+ }
32
+ },
33
+ "additionalProperties": true
34
+ }
@@ -0,0 +1,27 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://jambonz.org/schema/components/recognizer-customOptions",
4
+ "title": "Custom Recognizer Options",
5
+ "description": "Options for custom STT vendors. Only applies when recognizer vendor is 'custom'.",
6
+ "type": "object",
7
+ "properties": {
8
+ "authToken": {
9
+ "type": "string",
10
+ "description": "Authentication token for the custom STT service."
11
+ },
12
+ "uri": {
13
+ "type": "string",
14
+ "description": "WebSocket URI of the custom STT service."
15
+ },
16
+ "sampleRate": {
17
+ "type": "number",
18
+ "description": "Audio sample rate in Hz."
19
+ },
20
+ "options": {
21
+ "type": "object",
22
+ "description": "Additional vendor-specific options passed through to the custom service.",
23
+ "additionalProperties": true
24
+ }
25
+ },
26
+ "additionalProperties": true
27
+ }
@@ -0,0 +1,147 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://jambonz.org/schema/components/recognizer-deepgramOptions",
4
+ "title": "Deepgram Recognizer Options",
5
+ "description": "Deepgram-specific STT options. Only applies when recognizer vendor is 'deepgram'.",
6
+ "type": "object",
7
+ "properties": {
8
+ "deepgramSttUri": {
9
+ "type": "string",
10
+ "description": "Custom Deepgram STT endpoint URI."
11
+ },
12
+ "deepgramSttUseTls": {
13
+ "type": "boolean",
14
+ "description": "Whether to use TLS when connecting to the Deepgram STT endpoint."
15
+ },
16
+ "apiKey": {
17
+ "type": "string",
18
+ "description": "Deepgram API key. Overrides the key configured in jambonz."
19
+ },
20
+ "tier": {
21
+ "type": "string",
22
+ "description": "Deepgram model tier."
23
+ },
24
+ "model": {
25
+ "type": "string",
26
+ "description": "Deepgram model name (e.g. 'nova-2', 'nova-2-general')."
27
+ },
28
+ "customModel": {
29
+ "type": "string",
30
+ "description": "ID of a custom-trained Deepgram model."
31
+ },
32
+ "version": {
33
+ "type": "string",
34
+ "description": "Model version."
35
+ },
36
+ "punctuate": {
37
+ "type": "boolean",
38
+ "description": "Enable automatic punctuation."
39
+ },
40
+ "smartFormatting": {
41
+ "type": "boolean",
42
+ "description": "Enable Deepgram smart formatting (dates, numbers, etc.)."
43
+ },
44
+ "noDelay": {
45
+ "type": "boolean",
46
+ "description": "Disable Deepgram's internal buffering for lower latency."
47
+ },
48
+ "profanityFilter": {
49
+ "type": "boolean",
50
+ "description": "Filter profanity from transcripts."
51
+ },
52
+ "redact": {
53
+ "type": "string",
54
+ "enum": ["pci", "numbers", "true", "ssn"],
55
+ "description": "Redact sensitive information from transcripts."
56
+ },
57
+ "diarize": {
58
+ "type": "boolean",
59
+ "description": "Enable speaker diarization."
60
+ },
61
+ "diarizeVersion": {
62
+ "type": "string",
63
+ "description": "Diarization model version."
64
+ },
65
+ "ner": {
66
+ "type": "boolean",
67
+ "description": "Enable named entity recognition."
68
+ },
69
+ "multichannel": {
70
+ "type": "boolean",
71
+ "description": "Enable multichannel processing."
72
+ },
73
+ "alternatives": {
74
+ "type": "number",
75
+ "description": "Number of alternative transcripts to return."
76
+ },
77
+ "numerals": {
78
+ "type": "boolean",
79
+ "description": "Convert spoken numbers to digits."
80
+ },
81
+ "search": {
82
+ "type": "array",
83
+ "items": { "type": "string" },
84
+ "description": "Terms to search for in the transcript."
85
+ },
86
+ "replace": {
87
+ "type": "array",
88
+ "items": { "type": "string" },
89
+ "description": "Terms to replace in the transcript."
90
+ },
91
+ "keywords": {
92
+ "type": "array",
93
+ "items": { "type": "string" },
94
+ "description": "Keywords to boost recognition for."
95
+ },
96
+ "keyterms": {
97
+ "type": "array",
98
+ "items": { "type": "string" },
99
+ "description": "Key terms to boost recognition for."
100
+ },
101
+ "endpointing": {
102
+ "type": ["boolean", "number"],
103
+ "description": "Endpointing sensitivity. Boolean to enable/disable, or number of milliseconds."
104
+ },
105
+ "utteranceEndMs": {
106
+ "type": "number",
107
+ "description": "Milliseconds of silence to detect end of utterance."
108
+ },
109
+ "shortUtterance": {
110
+ "type": "boolean",
111
+ "description": "Optimize for short utterances."
112
+ },
113
+ "vadTurnoff": {
114
+ "type": "number",
115
+ "description": "Milliseconds of silence before VAD turns off."
116
+ },
117
+ "tag": {
118
+ "type": "string",
119
+ "description": "Tag to associate with the request for tracking."
120
+ },
121
+ "fillerWords": {
122
+ "type": "boolean",
123
+ "description": "Include filler words (um, uh) in transcript."
124
+ },
125
+ "eotThreshold": {
126
+ "type": "number",
127
+ "description": "End-of-turn confidence threshold (0-1)."
128
+ },
129
+ "eotTimeoutMs": {
130
+ "type": "number",
131
+ "description": "End-of-turn timeout in milliseconds."
132
+ },
133
+ "mipOptOut": {
134
+ "type": "boolean",
135
+ "description": "Opt out of Deepgram's model improvement program."
136
+ },
137
+ "entityPrompt": {
138
+ "type": "string",
139
+ "description": "Prompt to guide entity detection."
140
+ },
141
+ "eagerEotThreshold": {
142
+ "type": "number",
143
+ "description": "Eager end-of-turn threshold for faster response."
144
+ }
145
+ },
146
+ "additionalProperties": true
147
+ }
@@ -0,0 +1,39 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://jambonz.org/schema/components/recognizer-elevenlabsOptions",
4
+ "title": "ElevenLabs Recognizer Options",
5
+ "description": "ElevenLabs-specific STT options. Only applies when recognizer vendor is 'elevenlabs'.",
6
+ "type": "object",
7
+ "properties": {
8
+ "includeTimestamps": {
9
+ "type": "boolean",
10
+ "description": "Include word-level timestamps in results."
11
+ },
12
+ "commitStrategy": {
13
+ "type": "string",
14
+ "enum": ["manual", "vad"],
15
+ "description": "How audio chunks are committed. 'manual' for explicit commits, 'vad' for voice activity detection."
16
+ },
17
+ "vadSilenceThresholdSecs": {
18
+ "type": "number",
19
+ "description": "Silence duration in seconds to trigger VAD commit."
20
+ },
21
+ "vadThreshold": {
22
+ "type": "number",
23
+ "description": "VAD activation threshold."
24
+ },
25
+ "minSpeechDurationMs": {
26
+ "type": "number",
27
+ "description": "Minimum speech duration in milliseconds to accept."
28
+ },
29
+ "minSilenceDurationMs": {
30
+ "type": "number",
31
+ "description": "Minimum silence duration in milliseconds to trigger end of speech."
32
+ },
33
+ "enableLogging": {
34
+ "type": "boolean",
35
+ "description": "Enable server-side logging."
36
+ }
37
+ },
38
+ "additionalProperties": true
39
+ }
@@ -0,0 +1,8 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://jambonz.org/schema/components/recognizer-gladiaOptions",
4
+ "title": "Gladia Recognizer Options",
5
+ "description": "Gladia-specific STT options. Only applies when recognizer vendor is 'gladia'. See Gladia API documentation for available options.",
6
+ "type": "object",
7
+ "additionalProperties": true
8
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://jambonz.org/schema/components/recognizer-googleOptions",
4
+ "title": "Google Recognizer Options",
5
+ "description": "Google Speech-to-Text specific options. Only applies when recognizer vendor is 'google'.",
6
+ "type": "object",
7
+ "properties": {
8
+ "serviceVersion": {
9
+ "type": "string",
10
+ "enum": ["v1", "v2"],
11
+ "description": "Google Speech-to-Text API version."
12
+ },
13
+ "recognizerId": {
14
+ "type": "string",
15
+ "description": "ID of a Google Speech recognizer resource (v2 only)."
16
+ },
17
+ "speechStartTimeoutMs": {
18
+ "type": "number",
19
+ "description": "Timeout in milliseconds to wait for speech to start."
20
+ },
21
+ "speechEndTimeoutMs": {
22
+ "type": "number",
23
+ "description": "Timeout in milliseconds to detect end of speech."
24
+ },
25
+ "enableVoiceActivityEvents": {
26
+ "type": "boolean",
27
+ "description": "Enable voice activity detection events."
28
+ },
29
+ "transcriptNormalization": {
30
+ "type": "array",
31
+ "description": "Array of transcript normalization rules."
32
+ }
33
+ },
34
+ "additionalProperties": true
35
+ }
@@ -0,0 +1,53 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://jambonz.org/schema/components/recognizer-houndifyOptions",
4
+ "title": "Houndify Recognizer Options",
5
+ "description": "Houndify-specific STT options. Only applies when recognizer vendor is 'houndify'.",
6
+ "type": "object",
7
+ "properties": {
8
+ "requestInfo": {
9
+ "type": "object",
10
+ "description": "Houndify RequestInfo object with context data.",
11
+ "additionalProperties": true
12
+ },
13
+ "sampleRate": { "type": "number", "description": "Audio sample rate in Hz." },
14
+ "latitude": { "type": "number", "description": "User latitude for location-aware queries." },
15
+ "longitude": { "type": "number", "description": "User longitude for location-aware queries." },
16
+ "city": { "type": "string", "description": "User city." },
17
+ "state": { "type": "string", "description": "User state." },
18
+ "country": { "type": "string", "description": "User country." },
19
+ "timeZone": { "type": "string", "description": "User timezone." },
20
+ "domain": { "type": "string", "description": "Houndify domain." },
21
+ "audioEndpoint": { "type": "string", "description": "Custom audio endpoint URL." },
22
+ "maxSilenceSeconds": { "type": "number", "description": "Maximum silence before stopping." },
23
+ "maxSilenceAfterFullQuerySeconds": { "type": "number", "description": "Silence timeout after a complete query." },
24
+ "maxSilenceAfterPartialQuerySeconds": { "type": "number", "description": "Silence timeout after a partial query." },
25
+ "vadSensitivity": { "type": "number", "description": "VAD sensitivity level." },
26
+ "vadTimeout": { "type": "number", "description": "VAD timeout in milliseconds." },
27
+ "vadMode": { "type": "string", "description": "VAD mode." },
28
+ "vadVoiceMs": { "type": "number", "description": "Milliseconds of voice to trigger VAD." },
29
+ "vadSilenceMs": { "type": "number", "description": "Milliseconds of silence to trigger VAD." },
30
+ "vadDebug": { "type": "boolean", "description": "Enable VAD debug logging." },
31
+ "audioFormat": { "type": "string", "description": "Audio format." },
32
+ "enableNoiseReduction": { "type": "boolean", "description": "Enable noise reduction." },
33
+ "enableProfanityFilter": { "type": "boolean", "description": "Filter profanity." },
34
+ "enablePunctuation": { "type": "boolean", "description": "Enable punctuation." },
35
+ "enableCapitalization": { "type": "boolean", "description": "Enable capitalization." },
36
+ "confidenceThreshold": { "type": "number", "description": "Minimum confidence threshold." },
37
+ "enableDisfluencyFilter": { "type": "boolean", "description": "Filter disfluencies (um, uh)." },
38
+ "maxResults": { "type": "number", "description": "Maximum number of results." },
39
+ "enableWordTimestamps": { "type": "boolean", "description": "Include word timestamps." },
40
+ "maxAlternatives": { "type": "number", "description": "Maximum alternative transcripts." },
41
+ "partialTranscriptInterval": { "type": "number", "description": "Interval for partial transcript delivery." },
42
+ "sessionTimeout": { "type": "number", "description": "Session timeout." },
43
+ "connectionTimeout": { "type": "number", "description": "Connection timeout." },
44
+ "customVocabulary": {
45
+ "type": "array",
46
+ "items": { "type": "string" },
47
+ "description": "Custom vocabulary terms."
48
+ },
49
+ "languageModel": { "type": "string", "description": "Language model to use." },
50
+ "audioQueryAbsoluteTimeout": { "type": "number", "description": "Absolute timeout for audio queries." }
51
+ },
52
+ "additionalProperties": true
53
+ }