@jambonz/schema 0.3.11 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,19 +11,91 @@
11
11
  },
12
12
  "model": {
13
13
  "type": "string",
14
- "description": "Soniox recognition model."
14
+ "description": "Soniox recognition model (e.g. 'stt-rt-v5')."
15
+ },
16
+ "languageHints": {
17
+ "type": "array",
18
+ "items": { "type": "string" },
19
+ "description": "Language hints for the v5 multilingual model, as ISO language codes (e.g. ['en','es']). Improves accuracy and speeds up language detection."
20
+ },
21
+ "enableLanguageIdentification": {
22
+ "type": "boolean",
23
+ "description": "Enable language identification (v5); each recognized token is tagged with its detected language."
24
+ },
25
+ "enableSpeakerDiarization": {
26
+ "type": "boolean",
27
+ "description": "Enable speaker diarization (v5); each recognized token is tagged with a speaker."
15
28
  },
16
29
  "endpointDetection": {
17
30
  "type": "boolean",
18
31
  "description": "Enable endpoint detection."
19
32
  },
33
+ "endpointSensitivity": {
34
+ "type": "number",
35
+ "minimum": -1,
36
+ "maximum": 1,
37
+ "description": "Endpoint detection sensitivity (v5), -1.0 to 1.0 (default 0.0). Higher values finalize endpoints faster; lower values are more conservative."
38
+ },
39
+ "maxEndpointDelayMs": {
40
+ "type": "integer",
41
+ "minimum": 500,
42
+ "maximum": 3000,
43
+ "description": "Maximum delay in milliseconds before an endpoint is forced (v5), 500-3000 (default 2000)."
44
+ },
45
+ "maxNonFinalTokensDurationMs": {
46
+ "type": "integer",
47
+ "minimum": 0,
48
+ "description": "Maximum duration in milliseconds that tokens may remain non-final before forced finalization (v5)."
49
+ },
20
50
  "profanityFilter": {
21
51
  "type": "boolean",
22
52
  "description": "Filter profanity from results."
23
53
  },
54
+ "context": {
55
+ "type": "object",
56
+ "additionalProperties": false,
57
+ "description": "Soniox v5 recognition context to improve accuracy (max ~8000 tokens). See https://soniox.com/docs/stt/concepts/context. When supplied this takes precedence outright over the 'speechContext' and 'hints' shortcuts.",
58
+ "properties": {
59
+ "general": {
60
+ "type": "array",
61
+ "description": "Structured key/value metadata describing the domain, topic, participants, setting, etc.",
62
+ "items": {
63
+ "type": "object",
64
+ "additionalProperties": false,
65
+ "properties": {
66
+ "key": { "type": "string" },
67
+ "value": { "type": "string" }
68
+ },
69
+ "required": ["key", "value"]
70
+ }
71
+ },
72
+ "text": {
73
+ "type": "string",
74
+ "description": "Free-form background text (history of prior interactions, reference documents, meeting notes, summaries)."
75
+ },
76
+ "terms": {
77
+ "type": "array",
78
+ "items": { "type": "string" },
79
+ "description": "Domain-specific vocabulary to boost recognition."
80
+ },
81
+ "translation_terms": {
82
+ "type": "array",
83
+ "description": "Source/target term mappings; only used when translation is enabled.",
84
+ "items": {
85
+ "type": "object",
86
+ "additionalProperties": false,
87
+ "properties": {
88
+ "source": { "type": "string" },
89
+ "target": { "type": "string" }
90
+ },
91
+ "required": ["source", "target"]
92
+ }
93
+ }
94
+ }
95
+ },
24
96
  "speechContext": {
25
97
  "type": "string",
26
- "description": "Speech context for improved recognition."
98
+ "description": "Shortcut for context.text (free-text background). Ignored when the full 'context' object is supplied. The generic recognizer 'hints' are likewise sent as context.terms unless 'context' is supplied."
27
99
  },
28
100
  "clientRequestReference": {
29
101
  "type": "string",
@@ -8,7 +8,7 @@
8
8
  "vendor": {
9
9
  "type": "string",
10
10
  "description": "The STT vendor to use. Must match a vendor configured in the jambonz platform.",
11
- "examples": ["google", "aws", "microsoft", "deepgram", "nuance", "ibm", "nvidia", "soniox", "cobalt", "assemblyai", "speechmatics", "openai", "houndify", "gladia", "elevenlabs", "verbio", "custom"]
11
+ "examples": ["google", "aws", "microsoft", "deepgram", "ibm", "nvidia", "soniox", "assemblyai", "speechmatics", "openai", "houndify", "gladia", "elevenlabs", "custom"]
12
12
  },
13
13
  "label": {
14
14
  "type": "string",
@@ -195,18 +195,15 @@
195
195
  "googleOptions": { "$ref": "recognizer-googleOptions" },
196
196
  "awsOptions": { "$ref": "recognizer-awsOptions" },
197
197
  "azureOptions": { "$ref": "recognizer-azureOptions" },
198
- "nuanceOptions": { "$ref": "recognizer-nuanceOptions" },
199
198
  "ibmOptions": { "$ref": "recognizer-ibmOptions" },
200
199
  "nvidiaOptions": { "$ref": "recognizer-nvidiaOptions" },
201
200
  "sonioxOptions": { "$ref": "recognizer-sonioxOptions" },
202
- "cobaltOptions": { "$ref": "recognizer-cobaltOptions" },
203
201
  "assemblyAiOptions": { "$ref": "recognizer-assemblyAiOptions" },
204
202
  "speechmaticsOptions": { "$ref": "recognizer-speechmaticsOptions" },
205
203
  "openaiOptions": { "$ref": "recognizer-openaiOptions" },
206
204
  "houndifyOptions": { "$ref": "recognizer-houndifyOptions" },
207
205
  "gladiaOptions": { "$ref": "recognizer-gladiaOptions" },
208
206
  "elevenlabsOptions": { "$ref": "recognizer-elevenlabsOptions" },
209
- "verbioOptions": { "$ref": "recognizer-verbioOptions" },
210
207
  "customOptions": { "$ref": "recognizer-customOptions" }
211
208
  },
212
209
  "required": ["vendor"],
@@ -8,7 +8,7 @@
8
8
  "vendor": {
9
9
  "type": "string",
10
10
  "description": "The TTS vendor to use. Must match a vendor configured in the jambonz platform.",
11
- "examples": ["google", "aws", "microsoft", "elevenlabs", "cartesia", "deepgram", "ibm", "nuance", "nvidia", "wellsaid", "whisper", "verbio", "custom"]
11
+ "examples": ["google", "aws", "microsoft", "elevenlabs", "cartesia", "deepgram", "ibm", "nvidia", "wellsaid", "whisper", "custom"]
12
12
  },
13
13
  "label": {
14
14
  "type": "string",
@@ -65,14 +65,6 @@ See [Voice Agent API documentation](https://docs.speechmatics.com/private/voice-
65
65
 
66
66
  - [STT Models](https://soniox.com/docs/stt/models)
67
67
 
68
- ### Verbio
69
-
70
- - [Supported Languages](https://www.verbio.com/supported-languages)
71
-
72
68
  ### Gladia
73
69
 
74
70
  - [Supported Languages](https://docs.gladia.io/chapters/language/supported-languages)
75
-
76
- ### Nuance
77
-
78
- - [ASR gRPC API](https://docs.mix.nuance.com/asr-grpc/v1/) (Nuance is now part of Microsoft; Azure Speech Service is the successor)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jambonz/schema",
3
- "version": "0.3.11",
3
+ "version": "0.3.13",
4
4
  "description": "JSON Schema definitions and validation for jambonz verb applications",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -199,6 +199,11 @@
199
199
  "minimum": 0,
200
200
  "description": "Sampling temperature."
201
201
  },
202
+ "reasoningEffort": {
203
+ "type": "string",
204
+ "enum": ["minimal", "low", "medium", "high"],
205
+ "description": "Vendor-neutral thinking/reasoning effort. Mapped per-vendor by the LLM adapter (Gemini thinkingLevel, OpenAI reasoning_effort, Anthropic extended thinking); ignored by vendors without a native equivalent. 'minimal' minimizes thinking for lowest TTFT on latency-sensitive voice turns."
206
+ },
202
207
  "tools": {
203
208
  "type": "array",
204
209
  "description": "Tool / function definitions available to the model. The MCP-flat shape `{name, description, parameters}` is canonical; the OpenAI-wrapped form `{type:'function', function:{...}}` is also accepted.",
@@ -1,34 +0,0 @@
1
- {
2
- "$schema": "https://json-schema.org/draft/2020-12/schema",
3
- "$id": "https://jambonz.org/schema/components/recognizer-cobaltOptions",
4
- "title": "Cobalt Recognizer Options",
5
- "description": "Cobalt-specific STT options. Only applies when recognizer vendor is 'cobalt'.",
6
- "type": "object",
7
- "properties": {
8
- "serverUri": {
9
- "type": "string",
10
- "description": "Cobalt server URI."
11
- },
12
- "enableConfusionNetwork": {
13
- "type": "boolean",
14
- "description": "Enable confusion network output."
15
- },
16
- "metadata": {
17
- "type": "string",
18
- "description": "Metadata string to pass to the server."
19
- },
20
- "compiledContextData": {
21
- "type": "string",
22
- "description": "Compiled context data for biasing recognition."
23
- },
24
- "wordTimeOffsets": {
25
- "type": "boolean",
26
- "description": "Include word-level timestamps."
27
- },
28
- "contextToken": {
29
- "type": "string",
30
- "description": "Context token for server-side context."
31
- }
32
- },
33
- "additionalProperties": false
34
- }
@@ -1,150 +0,0 @@
1
- {
2
- "$schema": "https://json-schema.org/draft/2020-12/schema",
3
- "$id": "https://jambonz.org/schema/components/recognizer-nuanceOptions",
4
- "title": "Nuance Recognizer Options",
5
- "description": "Nuance Mix specific options. Only applies when recognizer vendor is 'nuance'.",
6
- "type": "object",
7
- "properties": {
8
- "clientId": {
9
- "type": "string",
10
- "description": "Nuance Mix client ID."
11
- },
12
- "secret": {
13
- "type": "string",
14
- "description": "Nuance Mix client secret."
15
- },
16
- "kryptonEndpoint": {
17
- "type": "string",
18
- "description": "Custom Nuance Krypton endpoint URL."
19
- },
20
- "topic": {
21
- "type": "string",
22
- "description": "Recognition topic (domain)."
23
- },
24
- "utteranceDetectionMode": {
25
- "type": "string",
26
- "enum": ["single", "multiple", "disabled"],
27
- "description": "How utterance boundaries are detected."
28
- },
29
- "punctuation": {
30
- "type": "boolean",
31
- "description": "Enable automatic punctuation."
32
- },
33
- "profanityFilter": {
34
- "type": "boolean",
35
- "description": "Filter profanity from results."
36
- },
37
- "includeTokenization": {
38
- "type": "boolean",
39
- "description": "Include tokenization data in results."
40
- },
41
- "discardSpeakerAdaptation": {
42
- "type": "boolean",
43
- "description": "Discard speaker adaptation data."
44
- },
45
- "suppressCallRecording": {
46
- "type": "boolean",
47
- "description": "Suppress call recording on the Nuance side."
48
- },
49
- "maskLoadFailures": {
50
- "type": "boolean",
51
- "description": "Mask resource load failures."
52
- },
53
- "suppressInitialCapitalization": {
54
- "type": "boolean",
55
- "description": "Suppress initial capitalization of results."
56
- },
57
- "allowZeroBaseLmWeight": {
58
- "type": "boolean",
59
- "description": "Allow zero base language model weight."
60
- },
61
- "filterWakeupWord": {
62
- "type": "boolean",
63
- "description": "Filter wakeup words from results."
64
- },
65
- "resultType": {
66
- "type": "string",
67
- "enum": ["final", "partial", "immutable_partial"],
68
- "description": "Type of results to return."
69
- },
70
- "noInputTimeoutMs": {
71
- "type": "number",
72
- "description": "Timeout in milliseconds before no-input event."
73
- },
74
- "recognitionTimeoutMs": {
75
- "type": "number",
76
- "description": "Maximum recognition duration in milliseconds."
77
- },
78
- "utteranceEndSilenceMs": {
79
- "type": "number",
80
- "description": "Silence duration in milliseconds to detect end of utterance."
81
- },
82
- "maxHypotheses": {
83
- "type": "number",
84
- "description": "Maximum number of recognition hypotheses to return."
85
- },
86
- "speechDomain": {
87
- "type": "string",
88
- "description": "Speech domain for optimized recognition."
89
- },
90
- "formatting": {
91
- "type": "object",
92
- "description": "Formatting options for recognition results.",
93
- "properties": {
94
- "scheme": { "type": "string", "description": "Formatting scheme name." },
95
- "options": { "type": "object", "description": "Scheme-specific formatting options." }
96
- },
97
- "required": ["scheme", "options"]
98
- },
99
- "clientData": {
100
- "type": "object",
101
- "description": "Custom client data to pass to Nuance.",
102
- "additionalProperties": true
103
- },
104
- "userId": {
105
- "type": "string",
106
- "description": "User ID for speaker adaptation."
107
- },
108
- "speechDetectionSensitivity": {
109
- "type": "number",
110
- "description": "Speech detection sensitivity (0-1)."
111
- },
112
- "resources": {
113
- "type": "array",
114
- "description": "Array of Nuance recognition resources (grammars, wordsets, etc.).",
115
- "items": {
116
- "type": "object",
117
- "properties": {
118
- "externalReference": {
119
- "type": "object",
120
- "description": "External resource reference.",
121
- "properties": {
122
- "type": {
123
- "type": "string",
124
- "enum": ["undefined_resource_type", "wordset", "compiled_wordset", "domain_lm", "speaker_profile", "grammar", "settings"]
125
- },
126
- "uri": { "type": "string" },
127
- "maxLoadFailures": { "type": "boolean" },
128
- "requestTimeoutMs": { "type": "number" },
129
- "headers": { "type": "object" }
130
- }
131
- },
132
- "inlineWordset": { "type": "string", "description": "Inline wordset JSON string." },
133
- "builtin": { "type": "string", "description": "Built-in grammar name." },
134
- "inlineGrammar": { "type": "string", "description": "Inline SRGS grammar." },
135
- "wakeupWord": { "type": "array", "items": { "type": "string" }, "description": "Wakeup words." },
136
- "weightName": {
137
- "type": "string",
138
- "enum": ["defaultWeight", "lowest", "low", "medium", "high", "highest"]
139
- },
140
- "weightValue": { "type": "number" },
141
- "reuse": {
142
- "type": "string",
143
- "enum": ["undefined_reuse", "low_reuse", "high_reuse"]
144
- }
145
- }
146
- }
147
- }
148
- },
149
- "additionalProperties": false
150
- }
@@ -1,46 +0,0 @@
1
- {
2
- "$schema": "https://json-schema.org/draft/2020-12/schema",
3
- "$id": "https://jambonz.org/schema/components/recognizer-verbioOptions",
4
- "title": "Verbio Recognizer Options",
5
- "description": "Verbio-specific STT options. Only applies when recognizer vendor is 'verbio'.",
6
- "type": "object",
7
- "properties": {
8
- "enable_formatting": {
9
- "type": "boolean",
10
- "description": "Enable text formatting of results."
11
- },
12
- "enable_diarization": {
13
- "type": "boolean",
14
- "description": "Enable speaker diarization."
15
- },
16
- "topic": {
17
- "type": "number",
18
- "description": "Topic ID for domain-specific recognition."
19
- },
20
- "inline_grammar": {
21
- "type": "string",
22
- "description": "Inline SRGS grammar for constrained recognition."
23
- },
24
- "grammar_uri": {
25
- "type": "string",
26
- "description": "URI of an external grammar resource."
27
- },
28
- "label": {
29
- "type": "string",
30
- "description": "Label for the recognition session."
31
- },
32
- "recognition_timeout": {
33
- "type": "number",
34
- "description": "Maximum recognition duration in seconds."
35
- },
36
- "speech_complete_timeout": {
37
- "type": "number",
38
- "description": "Silence duration in seconds after complete speech."
39
- },
40
- "speech_incomplete_timeout": {
41
- "type": "number",
42
- "description": "Silence duration in seconds after incomplete speech."
43
- }
44
- },
45
- "additionalProperties": false
46
- }