@jambonz/schema 0.3.11 → 0.3.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -11,19 +11,91 @@
|
|
|
11
11
|
},
|
|
12
12
|
"model": {
|
|
13
13
|
"type": "string",
|
|
14
|
-
"description": "Soniox recognition model."
|
|
14
|
+
"description": "Soniox recognition model (e.g. 'stt-rt-v5')."
|
|
15
|
+
},
|
|
16
|
+
"languageHints": {
|
|
17
|
+
"type": "array",
|
|
18
|
+
"items": { "type": "string" },
|
|
19
|
+
"description": "Language hints for the v5 multilingual model, as ISO language codes (e.g. ['en','es']). Improves accuracy and speeds up language detection."
|
|
20
|
+
},
|
|
21
|
+
"enableLanguageIdentification": {
|
|
22
|
+
"type": "boolean",
|
|
23
|
+
"description": "Enable language identification (v5); each recognized token is tagged with its detected language."
|
|
24
|
+
},
|
|
25
|
+
"enableSpeakerDiarization": {
|
|
26
|
+
"type": "boolean",
|
|
27
|
+
"description": "Enable speaker diarization (v5); each recognized token is tagged with a speaker."
|
|
15
28
|
},
|
|
16
29
|
"endpointDetection": {
|
|
17
30
|
"type": "boolean",
|
|
18
31
|
"description": "Enable endpoint detection."
|
|
19
32
|
},
|
|
33
|
+
"endpointSensitivity": {
|
|
34
|
+
"type": "number",
|
|
35
|
+
"minimum": -1,
|
|
36
|
+
"maximum": 1,
|
|
37
|
+
"description": "Endpoint detection sensitivity (v5), -1.0 to 1.0 (default 0.0). Higher values finalize endpoints faster; lower values are more conservative."
|
|
38
|
+
},
|
|
39
|
+
"maxEndpointDelayMs": {
|
|
40
|
+
"type": "integer",
|
|
41
|
+
"minimum": 500,
|
|
42
|
+
"maximum": 3000,
|
|
43
|
+
"description": "Maximum delay in milliseconds before an endpoint is forced (v5), 500-3000 (default 2000)."
|
|
44
|
+
},
|
|
45
|
+
"maxNonFinalTokensDurationMs": {
|
|
46
|
+
"type": "integer",
|
|
47
|
+
"minimum": 0,
|
|
48
|
+
"description": "Maximum duration in milliseconds that tokens may remain non-final before forced finalization (v5)."
|
|
49
|
+
},
|
|
20
50
|
"profanityFilter": {
|
|
21
51
|
"type": "boolean",
|
|
22
52
|
"description": "Filter profanity from results."
|
|
23
53
|
},
|
|
54
|
+
"context": {
|
|
55
|
+
"type": "object",
|
|
56
|
+
"additionalProperties": false,
|
|
57
|
+
"description": "Soniox v5 recognition context to improve accuracy (max ~8000 tokens). See https://soniox.com/docs/stt/concepts/context. When supplied this takes precedence outright over the 'speechContext' and 'hints' shortcuts.",
|
|
58
|
+
"properties": {
|
|
59
|
+
"general": {
|
|
60
|
+
"type": "array",
|
|
61
|
+
"description": "Structured key/value metadata describing the domain, topic, participants, setting, etc.",
|
|
62
|
+
"items": {
|
|
63
|
+
"type": "object",
|
|
64
|
+
"additionalProperties": false,
|
|
65
|
+
"properties": {
|
|
66
|
+
"key": { "type": "string" },
|
|
67
|
+
"value": { "type": "string" }
|
|
68
|
+
},
|
|
69
|
+
"required": ["key", "value"]
|
|
70
|
+
}
|
|
71
|
+
},
|
|
72
|
+
"text": {
|
|
73
|
+
"type": "string",
|
|
74
|
+
"description": "Free-form background text (history of prior interactions, reference documents, meeting notes, summaries)."
|
|
75
|
+
},
|
|
76
|
+
"terms": {
|
|
77
|
+
"type": "array",
|
|
78
|
+
"items": { "type": "string" },
|
|
79
|
+
"description": "Domain-specific vocabulary to boost recognition."
|
|
80
|
+
},
|
|
81
|
+
"translation_terms": {
|
|
82
|
+
"type": "array",
|
|
83
|
+
"description": "Source/target term mappings; only used when translation is enabled.",
|
|
84
|
+
"items": {
|
|
85
|
+
"type": "object",
|
|
86
|
+
"additionalProperties": false,
|
|
87
|
+
"properties": {
|
|
88
|
+
"source": { "type": "string" },
|
|
89
|
+
"target": { "type": "string" }
|
|
90
|
+
},
|
|
91
|
+
"required": ["source", "target"]
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
},
|
|
24
96
|
"speechContext": {
|
|
25
97
|
"type": "string",
|
|
26
|
-
"description": "
|
|
98
|
+
"description": "Shortcut for context.text (free-text background). Ignored when the full 'context' object is supplied. The generic recognizer 'hints' are likewise sent as context.terms unless 'context' is supplied."
|
|
27
99
|
},
|
|
28
100
|
"clientRequestReference": {
|
|
29
101
|
"type": "string",
|
package/package.json
CHANGED
package/verbs/agent.schema.json
CHANGED
|
@@ -199,6 +199,11 @@
|
|
|
199
199
|
"minimum": 0,
|
|
200
200
|
"description": "Sampling temperature."
|
|
201
201
|
},
|
|
202
|
+
"reasoningEffort": {
|
|
203
|
+
"type": "string",
|
|
204
|
+
"enum": ["minimal", "low", "medium", "high"],
|
|
205
|
+
"description": "Vendor-neutral thinking/reasoning effort. Mapped per-vendor by the LLM adapter (Gemini thinkingLevel, OpenAI reasoning_effort, Anthropic extended thinking); ignored by vendors without a native equivalent. 'minimal' minimizes thinking for lowest TTFT on latency-sensitive voice turns."
|
|
206
|
+
},
|
|
202
207
|
"tools": {
|
|
203
208
|
"type": "array",
|
|
204
209
|
"description": "Tool / function definitions available to the model. The MCP-flat shape `{name, description, parameters}` is canonical; the OpenAI-wrapped form `{type:'function', function:{...}}` is also accepted.",
|