npm - @jambonz/schema - Versions diffs - 0.3.11 → 0.3.13 - Mend

@jambonz/schema 0.3.11 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/components/recognizer-sonioxOptions.schema.json +74 -2
package/components/recognizer.schema.json +1 -4
package/components/synthesizer.schema.json +1 -1
package/docs/components/recognizer.md +0 -8
package/package.json +1 -1
package/verbs/agent.schema.json +5 -0
package/components/recognizer-cobaltOptions.schema.json +0 -34
package/components/recognizer-nuanceOptions.schema.json +0 -150
package/components/recognizer-verbioOptions.schema.json +0 -46

package/components/recognizer-sonioxOptions.schema.json CHANGED Viewed

@@ -11,19 +11,91 @@
     },
     "model": {
       "type": "string",
-      "description": "Soniox recognition model."
+      "description": "Soniox recognition model (e.g. 'stt-rt-v5')."
+    },
+    "languageHints": {
+      "type": "array",
+      "items": { "type": "string" },
+      "description": "Language hints for the v5 multilingual model, as ISO language codes (e.g. ['en','es']). Improves accuracy and speeds up language detection."
+    },
+    "enableLanguageIdentification": {
+      "type": "boolean",
+      "description": "Enable language identification (v5); each recognized token is tagged with its detected language."
+    },
+    "enableSpeakerDiarization": {
+      "type": "boolean",
+      "description": "Enable speaker diarization (v5); each recognized token is tagged with a speaker."
     },
     "endpointDetection": {
       "type": "boolean",
       "description": "Enable endpoint detection."
     },
+    "endpointSensitivity": {
+      "type": "number",
+      "minimum": -1,
+      "maximum": 1,
+      "description": "Endpoint detection sensitivity (v5), -1.0 to 1.0 (default 0.0). Higher values finalize endpoints faster; lower values are more conservative."
+    },
+    "maxEndpointDelayMs": {
+      "type": "integer",
+      "minimum": 500,
+      "maximum": 3000,
+      "description": "Maximum delay in milliseconds before an endpoint is forced (v5), 500-3000 (default 2000)."
+    },
+    "maxNonFinalTokensDurationMs": {
+      "type": "integer",
+      "minimum": 0,
+      "description": "Maximum duration in milliseconds that tokens may remain non-final before forced finalization (v5)."
+    },
     "profanityFilter": {
       "type": "boolean",
       "description": "Filter profanity from results."
     },
+    "context": {
+      "type": "object",
+      "additionalProperties": false,
+      "description": "Soniox v5 recognition context to improve accuracy (max ~8000 tokens). See https://soniox.com/docs/stt/concepts/context. When supplied this takes precedence outright over the 'speechContext' and 'hints' shortcuts.",
+      "properties": {
+        "general": {
+          "type": "array",
+          "description": "Structured key/value metadata describing the domain, topic, participants, setting, etc.",
+          "items": {
+            "type": "object",
+            "additionalProperties": false,
+            "properties": {
+              "key": { "type": "string" },
+              "value": { "type": "string" }
+            },
+            "required": ["key", "value"]
+          }
+        },
+        "text": {
+          "type": "string",
+          "description": "Free-form background text (history of prior interactions, reference documents, meeting notes, summaries)."
+        },
+        "terms": {
+          "type": "array",
+          "items": { "type": "string" },
+          "description": "Domain-specific vocabulary to boost recognition."
+        },
+        "translation_terms": {
+          "type": "array",
+          "description": "Source/target term mappings; only used when translation is enabled.",
+          "items": {
+            "type": "object",
+            "additionalProperties": false,
+            "properties": {
+              "source": { "type": "string" },
+              "target": { "type": "string" }
+            },
+            "required": ["source", "target"]
+          }
+        }
+      }
+    },
     "speechContext": {
       "type": "string",
-      "description": "Speech context for improved recognition."
+      "description": "Shortcut for context.text (free-text background). Ignored when the full 'context' object is supplied. The generic recognizer 'hints' are likewise sent as context.terms unless 'context' is supplied."
     },
     "clientRequestReference": {
       "type": "string",

package/components/recognizer.schema.json CHANGED Viewed

@@ -8,7 +8,7 @@
     "vendor": {
       "type": "string",
       "description": "The STT vendor to use. Must match a vendor configured in the jambonz platform.",
-      "examples": ["google", "aws", "microsoft", "deepgram", "nuance", "ibm", "nvidia", "soniox", "cobalt", "assemblyai", "speechmatics", "openai", "houndify", "gladia", "elevenlabs", "verbio", "custom"]
+      "examples": ["google", "aws", "microsoft", "deepgram", "ibm", "nvidia", "soniox", "assemblyai", "speechmatics", "openai", "houndify", "gladia", "elevenlabs", "custom"]
     },
     "label": {
       "type": "string",
@@ -195,18 +195,15 @@
     "googleOptions": { "$ref": "recognizer-googleOptions" },
     "awsOptions": { "$ref": "recognizer-awsOptions" },
     "azureOptions": { "$ref": "recognizer-azureOptions" },
-    "nuanceOptions": { "$ref": "recognizer-nuanceOptions" },
     "ibmOptions": { "$ref": "recognizer-ibmOptions" },
     "nvidiaOptions": { "$ref": "recognizer-nvidiaOptions" },
     "sonioxOptions": { "$ref": "recognizer-sonioxOptions" },
-    "cobaltOptions": { "$ref": "recognizer-cobaltOptions" },
     "assemblyAiOptions": { "$ref": "recognizer-assemblyAiOptions" },
     "speechmaticsOptions": { "$ref": "recognizer-speechmaticsOptions" },
     "openaiOptions": { "$ref": "recognizer-openaiOptions" },
     "houndifyOptions": { "$ref": "recognizer-houndifyOptions" },
     "gladiaOptions": { "$ref": "recognizer-gladiaOptions" },
     "elevenlabsOptions": { "$ref": "recognizer-elevenlabsOptions" },
-    "verbioOptions": { "$ref": "recognizer-verbioOptions" },
     "customOptions": { "$ref": "recognizer-customOptions" }
   },
   "required": ["vendor"],

package/components/synthesizer.schema.json CHANGED Viewed

@@ -8,7 +8,7 @@
     "vendor": {
       "type": "string",
       "description": "The TTS vendor to use. Must match a vendor configured in the jambonz platform.",
-      "examples": ["google", "aws", "microsoft", "elevenlabs", "cartesia", "deepgram", "ibm", "nuance", "nvidia", "wellsaid", "whisper", "verbio", "custom"]
+      "examples": ["google", "aws", "microsoft", "elevenlabs", "cartesia", "deepgram", "ibm", "nvidia", "wellsaid", "whisper", "custom"]
     },
     "label": {
       "type": "string",

package/docs/components/recognizer.md CHANGED Viewed

@@ -65,14 +65,6 @@ See [Voice Agent API documentation](https://docs.speechmatics.com/private/voice-
 - [STT Models](https://soniox.com/docs/stt/models)
-### Verbio
-- [Supported Languages](https://www.verbio.com/supported-languages)
 ### Gladia
 - [Supported Languages](https://docs.gladia.io/chapters/language/supported-languages)
-### Nuance
-- [ASR gRPC API](https://docs.mix.nuance.com/asr-grpc/v1/) (Nuance is now part of Microsoft; Azure Speech Service is the successor)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@jambonz/schema",
-  "version": "0.3.11",
+  "version": "0.3.13",
   "description": "JSON Schema definitions and validation for jambonz verb applications",
   "main": "index.js",
   "scripts": {

package/verbs/agent.schema.json CHANGED Viewed

@@ -199,6 +199,11 @@
               "minimum": 0,
               "description": "Sampling temperature."
             },
+            "reasoningEffort": {
+              "type": "string",
+              "enum": ["minimal", "low", "medium", "high"],
+              "description": "Vendor-neutral thinking/reasoning effort. Mapped per-vendor by the LLM adapter (Gemini thinkingLevel, OpenAI reasoning_effort, Anthropic extended thinking); ignored by vendors without a native equivalent. 'minimal' minimizes thinking for lowest TTFT on latency-sensitive voice turns."
+            },
             "tools": {
               "type": "array",
               "description": "Tool / function definitions available to the model. The MCP-flat shape `{name, description, parameters}` is canonical; the OpenAI-wrapped form `{type:'function', function:{...}}` is also accepted.",

package/components/recognizer-cobaltOptions.schema.json DELETED Viewed

@@ -1,34 +0,0 @@
-{
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "$id": "https://jambonz.org/schema/components/recognizer-cobaltOptions",
-  "title": "Cobalt Recognizer Options",
-  "description": "Cobalt-specific STT options. Only applies when recognizer vendor is 'cobalt'.",
-  "type": "object",
-  "properties": {
-    "serverUri": {
-      "type": "string",
-      "description": "Cobalt server URI."
-    },
-    "enableConfusionNetwork": {
-      "type": "boolean",
-      "description": "Enable confusion network output."
-    },
-    "metadata": {
-      "type": "string",
-      "description": "Metadata string to pass to the server."
-    },
-    "compiledContextData": {
-      "type": "string",
-      "description": "Compiled context data for biasing recognition."
-    },
-    "wordTimeOffsets": {
-      "type": "boolean",
-      "description": "Include word-level timestamps."
-    },
-    "contextToken": {
-      "type": "string",
-      "description": "Context token for server-side context."
-    }
-  },
-  "additionalProperties": false
-}

package/components/recognizer-nuanceOptions.schema.json DELETED Viewed

@@ -1,150 +0,0 @@
-{
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "$id": "https://jambonz.org/schema/components/recognizer-nuanceOptions",
-  "title": "Nuance Recognizer Options",
-  "description": "Nuance Mix specific options. Only applies when recognizer vendor is 'nuance'.",
-  "type": "object",
-  "properties": {
-    "clientId": {
-      "type": "string",
-      "description": "Nuance Mix client ID."
-    },
-    "secret": {
-      "type": "string",
-      "description": "Nuance Mix client secret."
-    },
-    "kryptonEndpoint": {
-      "type": "string",
-      "description": "Custom Nuance Krypton endpoint URL."
-    },
-    "topic": {
-      "type": "string",
-      "description": "Recognition topic (domain)."
-    },
-    "utteranceDetectionMode": {
-      "type": "string",
-      "enum": ["single", "multiple", "disabled"],
-      "description": "How utterance boundaries are detected."
-    },
-    "punctuation": {
-      "type": "boolean",
-      "description": "Enable automatic punctuation."
-    },
-    "profanityFilter": {
-      "type": "boolean",
-      "description": "Filter profanity from results."
-    },
-    "includeTokenization": {
-      "type": "boolean",
-      "description": "Include tokenization data in results."
-    },
-    "discardSpeakerAdaptation": {
-      "type": "boolean",
-      "description": "Discard speaker adaptation data."
-    },
-    "suppressCallRecording": {
-      "type": "boolean",
-      "description": "Suppress call recording on the Nuance side."
-    },
-    "maskLoadFailures": {
-      "type": "boolean",
-      "description": "Mask resource load failures."
-    },
-    "suppressInitialCapitalization": {
-      "type": "boolean",
-      "description": "Suppress initial capitalization of results."
-    },
-    "allowZeroBaseLmWeight": {
-      "type": "boolean",
-      "description": "Allow zero base language model weight."
-    },
-    "filterWakeupWord": {
-      "type": "boolean",
-      "description": "Filter wakeup words from results."
-    },
-    "resultType": {
-      "type": "string",
-      "enum": ["final", "partial", "immutable_partial"],
-      "description": "Type of results to return."
-    },
-    "noInputTimeoutMs": {
-      "type": "number",
-      "description": "Timeout in milliseconds before no-input event."
-    },
-    "recognitionTimeoutMs": {
-      "type": "number",
-      "description": "Maximum recognition duration in milliseconds."
-    },
-    "utteranceEndSilenceMs": {
-      "type": "number",
-      "description": "Silence duration in milliseconds to detect end of utterance."
-    },
-    "maxHypotheses": {
-      "type": "number",
-      "description": "Maximum number of recognition hypotheses to return."
-    },
-    "speechDomain": {
-      "type": "string",
-      "description": "Speech domain for optimized recognition."
-    },
-    "formatting": {
-      "type": "object",
-      "description": "Formatting options for recognition results.",
-      "properties": {
-        "scheme": { "type": "string", "description": "Formatting scheme name." },
-        "options": { "type": "object", "description": "Scheme-specific formatting options." }
-      },
-      "required": ["scheme", "options"]
-    },
-    "clientData": {
-      "type": "object",
-      "description": "Custom client data to pass to Nuance.",
-      "additionalProperties": true
-    },
-    "userId": {
-      "type": "string",
-      "description": "User ID for speaker adaptation."
-    },
-    "speechDetectionSensitivity": {
-      "type": "number",
-      "description": "Speech detection sensitivity (0-1)."
-    },
-    "resources": {
-      "type": "array",
-      "description": "Array of Nuance recognition resources (grammars, wordsets, etc.).",
-      "items": {
-        "type": "object",
-        "properties": {
-          "externalReference": {
-            "type": "object",
-            "description": "External resource reference.",
-            "properties": {
-              "type": {
-                "type": "string",
-                "enum": ["undefined_resource_type", "wordset", "compiled_wordset", "domain_lm", "speaker_profile", "grammar", "settings"]
-              },
-              "uri": { "type": "string" },
-              "maxLoadFailures": { "type": "boolean" },
-              "requestTimeoutMs": { "type": "number" },
-              "headers": { "type": "object" }
-            }
-          },
-          "inlineWordset": { "type": "string", "description": "Inline wordset JSON string." },
-          "builtin": { "type": "string", "description": "Built-in grammar name." },
-          "inlineGrammar": { "type": "string", "description": "Inline SRGS grammar." },
-          "wakeupWord": { "type": "array", "items": { "type": "string" }, "description": "Wakeup words." },
-          "weightName": {
-            "type": "string",
-            "enum": ["defaultWeight", "lowest", "low", "medium", "high", "highest"]
-          },
-          "weightValue": { "type": "number" },
-          "reuse": {
-            "type": "string",
-            "enum": ["undefined_reuse", "low_reuse", "high_reuse"]
-          }
-        }
-      }
-    }
-  },
-  "additionalProperties": false
-}

package/components/recognizer-verbioOptions.schema.json DELETED Viewed

@@ -1,46 +0,0 @@
-{
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "$id": "https://jambonz.org/schema/components/recognizer-verbioOptions",
-  "title": "Verbio Recognizer Options",
-  "description": "Verbio-specific STT options. Only applies when recognizer vendor is 'verbio'.",
-  "type": "object",
-  "properties": {
-    "enable_formatting": {
-      "type": "boolean",
-      "description": "Enable text formatting of results."
-    },
-    "enable_diarization": {
-      "type": "boolean",
-      "description": "Enable speaker diarization."
-    },
-    "topic": {
-      "type": "number",
-      "description": "Topic ID for domain-specific recognition."
-    },
-    "inline_grammar": {
-      "type": "string",
-      "description": "Inline SRGS grammar for constrained recognition."
-    },
-    "grammar_uri": {
-      "type": "string",
-      "description": "URI of an external grammar resource."
-    },
-    "label": {
-      "type": "string",
-      "description": "Label for the recognition session."
-    },
-    "recognition_timeout": {
-      "type": "number",
-      "description": "Maximum recognition duration in seconds."
-    },
-    "speech_complete_timeout": {
-      "type": "number",
-      "description": "Silence duration in seconds after complete speech."
-    },
-    "speech_incomplete_timeout": {
-      "type": "number",
-      "description": "Silence duration in seconds after incomplete speech."
-    }
-  },
-  "additionalProperties": false
-}