@ailib-official/ai-protocol 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +6 -0
- package/README.md +415 -0
- package/dist/index.json +8 -0
- package/dist/v1/models/ai21.json +140 -0
- package/dist/v1/models/baichuan.json +138 -0
- package/dist/v1/models/cerebras.json +147 -0
- package/dist/v1/models/claude.json +114 -0
- package/dist/v1/models/cohere.json +235 -0
- package/dist/v1/models/deepinfra.json +43 -0
- package/dist/v1/models/deepseek-chat.json +55 -0
- package/dist/v1/models/doubao.json +197 -0
- package/dist/v1/models/ernie.json +223 -0
- package/dist/v1/models/fireworks.json +222 -0
- package/dist/v1/models/gemini.json +58 -0
- package/dist/v1/models/gpt.json +166 -0
- package/dist/v1/models/grok.json +138 -0
- package/dist/v1/models/huggingface.json +183 -0
- package/dist/v1/models/hunyuan.json +255 -0
- package/dist/v1/models/jina.json +139 -0
- package/dist/v1/models/lepton.json +188 -0
- package/dist/v1/models/llama.json +143 -0
- package/dist/v1/models/minimax.json +194 -0
- package/dist/v1/models/mistral.json +177 -0
- package/dist/v1/models/moonshot.json +144 -0
- package/dist/v1/models/nvidia.json +212 -0
- package/dist/v1/models/palmyra.json +103 -0
- package/dist/v1/models/perplexity.json +143 -0
- package/dist/v1/models/qwen.json +49 -0
- package/dist/v1/models/replicate.json +206 -0
- package/dist/v1/models/sensenova.json +144 -0
- package/dist/v1/models/siliconflow.json +44 -0
- package/dist/v1/models/spark.json +173 -0
- package/dist/v1/models/stable-diffusion.json +161 -0
- package/dist/v1/models/tiangong.json +169 -0
- package/dist/v1/models/together.json +43 -0
- package/dist/v1/models/yi.json +199 -0
- package/dist/v1/models/zhipu.json +250 -0
- package/dist/v1/providers/ai21.json +215 -0
- package/dist/v1/providers/anthropic.json +253 -0
- package/dist/v1/providers/anyscale.json +115 -0
- package/dist/v1/providers/azure.json +188 -0
- package/dist/v1/providers/baichuan.json +205 -0
- package/dist/v1/providers/baidu.json +205 -0
- package/dist/v1/providers/cerebras.json +216 -0
- package/dist/v1/providers/cohere.json +209 -0
- package/dist/v1/providers/deepinfra.json +196 -0
- package/dist/v1/providers/deepseek.json +221 -0
- package/dist/v1/providers/doubao.json +209 -0
- package/dist/v1/providers/fireworks.json +227 -0
- package/dist/v1/providers/gemini.json +279 -0
- package/dist/v1/providers/groq.json +218 -0
- package/dist/v1/providers/huggingface.json +181 -0
- package/dist/v1/providers/hunyuan.json +198 -0
- package/dist/v1/providers/jina.json +202 -0
- package/dist/v1/providers/lepton.json +221 -0
- package/dist/v1/providers/minimax.json +209 -0
- package/dist/v1/providers/mistral.json +214 -0
- package/dist/v1/providers/moonshot.json +214 -0
- package/dist/v1/providers/nvidia.json +213 -0
- package/dist/v1/providers/openai.json +267 -0
- package/dist/v1/providers/openrouter.json +220 -0
- package/dist/v1/providers/perplexity.json +172 -0
- package/dist/v1/providers/qwen.json +231 -0
- package/dist/v1/providers/replicate.json +192 -0
- package/dist/v1/providers/sensenova.json +185 -0
- package/dist/v1/providers/siliconflow.json +197 -0
- package/dist/v1/providers/spark.json +204 -0
- package/dist/v1/providers/stability.json +221 -0
- package/dist/v1/providers/tiangong.json +207 -0
- package/dist/v1/providers/together.json +196 -0
- package/dist/v1/providers/writer.json +253 -0
- package/dist/v1/providers/xai.json +238 -0
- package/dist/v1/providers/yi.json +205 -0
- package/dist/v1/providers/zhipu.json +208 -0
- package/dist/v1/spec.json +783 -0
- package/dist/v2/providers/anthropic.json +372 -0
- package/dist/v2/providers/cohere.json +109 -0
- package/dist/v2/providers/deepseek.json +272 -0
- package/dist/v2/providers/doubao.json +260 -0
- package/dist/v2/providers/google.json +388 -0
- package/dist/v2/providers/jina.json +71 -0
- package/dist/v2/providers/moonshot.json +284 -0
- package/dist/v2/providers/openai.json +419 -0
- package/dist/v2/providers/qwen.json +274 -0
- package/dist/v2/providers/zhipu.json +257 -0
- package/dist/v2-alpha/providers/anthropic.json +182 -0
- package/dist/v2-alpha/providers/gemini.json +187 -0
- package/dist/v2-alpha/providers/openai.json +215 -0
- package/dist/v2-alpha/spec.json +644 -0
- package/package.json +61 -0
- package/schemas/spec.json +186 -0
- package/schemas/v1.json +1116 -0
- package/schemas/v2/availability.json +66 -0
- package/schemas/v2/capabilities.json +182 -0
- package/schemas/v2/capability-profile.json +174 -0
- package/schemas/v2/computer-use.json +222 -0
- package/schemas/v2/context-policy.json +148 -0
- package/schemas/v2/endpoint.json +2 -0
- package/schemas/v2/error-codes.yaml +225 -0
- package/schemas/v2/errors.json +250 -0
- package/schemas/v2/execution-metadata.json +59 -0
- package/schemas/v2/mcp.json +225 -0
- package/schemas/v2/message-roles.json +10 -0
- package/schemas/v2/multimodal.json +297 -0
- package/schemas/v2/pricing.json +14 -0
- package/schemas/v2/provider-contract.json +317 -0
- package/schemas/v2/provider.json +203 -0
- package/schemas/v2/regions.json +14 -0
- package/v1/models/ai21.yaml +97 -0
- package/v1/models/baichuan.yaml +95 -0
- package/v1/models/cerebras.yaml +99 -0
- package/v1/models/claude.yaml +59 -0
- package/v1/models/cohere.yaml +165 -0
- package/v1/models/deepinfra.yaml +29 -0
- package/v1/models/deepseek-chat.yaml +32 -0
- package/v1/models/doubao.yaml +129 -0
- package/v1/models/ernie.yaml +146 -0
- package/v1/models/fireworks.yaml +145 -0
- package/v1/models/gemini.yaml +32 -0
- package/v1/models/gpt.yaml +90 -0
- package/v1/models/grok.yaml +74 -0
- package/v1/models/huggingface.yaml +110 -0
- package/v1/models/hunyuan.yaml +164 -0
- package/v1/models/jina.yaml +98 -0
- package/v1/models/lepton.yaml +130 -0
- package/v1/models/llama.yaml +91 -0
- package/v1/models/minimax.yaml +132 -0
- package/v1/models/mistral.yaml +100 -0
- package/v1/models/moonshot.yaml +97 -0
- package/v1/models/nvidia.yaml +118 -0
- package/v1/models/palmyra.yaml +59 -0
- package/v1/models/perplexity.yaml +97 -0
- package/v1/models/qwen.yaml +27 -0
- package/v1/models/replicate.yaml +136 -0
- package/v1/models/sensenova.yaml +97 -0
- package/v1/models/siliconflow.yaml +29 -0
- package/v1/models/spark.yaml +114 -0
- package/v1/models/stable-diffusion.yaml +113 -0
- package/v1/models/tiangong.yaml +114 -0
- package/v1/models/together.yaml +29 -0
- package/v1/models/yi.yaml +132 -0
- package/v1/models/zhipu.yaml +163 -0
- package/v1/providers/ai21.yaml +176 -0
- package/v1/providers/anthropic.yaml +209 -0
- package/v1/providers/anyscale.yaml +106 -0
- package/v1/providers/azure.yaml +155 -0
- package/v1/providers/baichuan.yaml +168 -0
- package/v1/providers/baidu.yaml +173 -0
- package/v1/providers/cerebras.yaml +178 -0
- package/v1/providers/cohere.yaml +175 -0
- package/v1/providers/deepinfra.yaml +156 -0
- package/v1/providers/deepseek.yaml +189 -0
- package/v1/providers/doubao.yaml +172 -0
- package/v1/providers/fireworks.yaml +187 -0
- package/v1/providers/gemini.yaml +229 -0
- package/v1/providers/groq.yaml +181 -0
- package/v1/providers/huggingface.yaml +157 -0
- package/v1/providers/hunyuan.yaml +162 -0
- package/v1/providers/jina.yaml +171 -0
- package/v1/providers/lepton.yaml +183 -0
- package/v1/providers/minimax.yaml +172 -0
- package/v1/providers/mistral.yaml +186 -0
- package/v1/providers/moonshot.yaml +176 -0
- package/v1/providers/nvidia.yaml +172 -0
- package/v1/providers/openai.yaml +224 -0
- package/v1/providers/openrouter.yaml +181 -0
- package/v1/providers/perplexity.yaml +144 -0
- package/v1/providers/qwen.yaml +192 -0
- package/v1/providers/replicate.yaml +159 -0
- package/v1/providers/sensenova.yaml +153 -0
- package/v1/providers/siliconflow.yaml +156 -0
- package/v1/providers/spark.yaml +167 -0
- package/v1/providers/stability.yaml +185 -0
- package/v1/providers/tiangong.yaml +170 -0
- package/v1/providers/together.yaml +156 -0
- package/v1/providers/writer.yaml +212 -0
- package/v1/providers/xai.yaml +206 -0
- package/v1/providers/yi.yaml +168 -0
- package/v1/providers/zhipu.yaml +171 -0
- package/v1/spec.yaml +637 -0
- package/v2/providers/anthropic.yaml +244 -0
- package/v2/providers/cohere.yaml +75 -0
- package/v2/providers/deepseek.yaml +176 -0
- package/v2/providers/doubao.yaml +169 -0
- package/v2/providers/google.yaml +245 -0
- package/v2/providers/jina.yaml +55 -0
- package/v2/providers/moonshot.yaml +186 -0
- package/v2/providers/openai.yaml +266 -0
- package/v2/providers/qwen.yaml +174 -0
- package/v2/providers/zhipu.yaml +176 -0
- package/v2-alpha/providers/anthropic.yaml +134 -0
- package/v2-alpha/providers/gemini.yaml +144 -0
- package/v2-alpha/providers/openai.yaml +154 -0
- package/v2-alpha/spec.yaml +512 -0
package/v1/spec.yaml
ADDED
|
@@ -0,0 +1,637 @@
|
|
|
1
|
+
version: "1.1"
|
|
2
|
+
metadata:
|
|
3
|
+
description: "AI-Protocol v1.1 - Standard Specification for AI Model Integration"
|
|
4
|
+
last_updated: "2026-01-31"
|
|
5
|
+
release_version: "1.1.1"
|
|
6
|
+
authors: ["AI-Protocol Team"]
|
|
7
|
+
license: "MIT OR Apache-2.0"
|
|
8
|
+
|
|
9
|
+
# =========================================================
|
|
10
|
+
# Version Semantics (Important for Runtime Compatibility)
|
|
11
|
+
# =========================================================
|
|
12
|
+
# This specification uses a layered versioning model:
|
|
13
|
+
#
|
|
14
|
+
# 1. spec_version (this file's `version` field):
|
|
15
|
+
# - Defines the schema structure and field definitions
|
|
16
|
+
# - Runtimes MUST validate manifests against the matching schema version
|
|
17
|
+
# - Format: MAJOR.MINOR (e.g., "1.1")
|
|
18
|
+
#
|
|
19
|
+
# 2. protocol_version (in provider manifests):
|
|
20
|
+
# - Indicates which protocol features the provider config uses
|
|
21
|
+
# - Runtimes SHOULD select appropriate adapters based on this
|
|
22
|
+
# - Format: MAJOR.MINOR (e.g., "1.5")
|
|
23
|
+
#
|
|
24
|
+
# 3. release_version (in metadata):
|
|
25
|
+
# - Full semantic version for this specification release
|
|
26
|
+
# - Format: MAJOR.MINOR.PATCH (e.g., "1.1.1")
|
|
27
|
+
#
|
|
28
|
+
# Runtime Alignment Guidelines:
|
|
29
|
+
# - Runtimes MUST check `protocol_version` before processing provider configs
|
|
30
|
+
# - Runtimes SHOULD support multiple protocol versions for backward compatibility
|
|
31
|
+
# - Runtimes SHOULD NOT hard-code branch names; use version tags or schema URLs
|
|
32
|
+
# - Schema URL example: https://raw.githubusercontent.com/ailib-official/ai-protocol/v0.2.1/schemas/v1.json
|
|
33
|
+
|
|
34
|
+
# =========================================================
|
|
35
|
+
# Standard Schema (2025 Extensions)
|
|
36
|
+
# =========================================================
|
|
37
|
+
standard_schema:
|
|
38
|
+
# Standard parameter definitions - unified developer interface
|
|
39
|
+
parameters:
|
|
40
|
+
temperature:
|
|
41
|
+
type: float
|
|
42
|
+
range: [0.0, 2.0]
|
|
43
|
+
default: 1.0
|
|
44
|
+
description: "Controls randomness in the output"
|
|
45
|
+
max_tokens:
|
|
46
|
+
type: integer
|
|
47
|
+
min: 1
|
|
48
|
+
max: 32768
|
|
49
|
+
description: "Maximum number of tokens to generate"
|
|
50
|
+
stream:
|
|
51
|
+
type: boolean
|
|
52
|
+
default: false
|
|
53
|
+
description: "Enable streaming responses"
|
|
54
|
+
top_p:
|
|
55
|
+
type: float
|
|
56
|
+
range: [0.0, 1.0]
|
|
57
|
+
default: 1.0
|
|
58
|
+
description: "Nucleus sampling parameter"
|
|
59
|
+
|
|
60
|
+
frequency_penalty:
|
|
61
|
+
type: float
|
|
62
|
+
range: [-2.0, 2.0]
|
|
63
|
+
default: 0.0
|
|
64
|
+
description: "Penalize frequent tokens (OpenAI-compatible)"
|
|
65
|
+
|
|
66
|
+
presence_penalty:
|
|
67
|
+
type: float
|
|
68
|
+
range: [-2.0, 2.0]
|
|
69
|
+
default: 0.0
|
|
70
|
+
description: "Penalize repeated presence (OpenAI-compatible)"
|
|
71
|
+
|
|
72
|
+
top_k:
|
|
73
|
+
type: integer
|
|
74
|
+
min: 1
|
|
75
|
+
max: 500
|
|
76
|
+
description: "Top-K filtering (Gemini/open-source compatible)"
|
|
77
|
+
|
|
78
|
+
stop_sequences:
|
|
79
|
+
type: array
|
|
80
|
+
description: "Stop sequences to truncate generation"
|
|
81
|
+
|
|
82
|
+
logprobs:
|
|
83
|
+
type: boolean
|
|
84
|
+
default: false
|
|
85
|
+
description: "Return token logprobs when provider supports"
|
|
86
|
+
|
|
87
|
+
top_logprobs:
|
|
88
|
+
type: integer
|
|
89
|
+
min: 1
|
|
90
|
+
max: 5
|
|
91
|
+
description: "Number of top logprobs to return"
|
|
92
|
+
|
|
93
|
+
seed:
|
|
94
|
+
type: integer
|
|
95
|
+
description: "Deterministic seed when supported"
|
|
96
|
+
|
|
97
|
+
tool_choice:
|
|
98
|
+
type: string_or_object
|
|
99
|
+
values: ["auto", "none", "required", "specific"]
|
|
100
|
+
default: "auto"
|
|
101
|
+
description: "Tool choice policy per request. Runtimes SHOULD accept either a simple string policy (auto/none/required) or a provider-specific object (e.g., OpenAI tool_choice object for selecting a specific tool)."
|
|
102
|
+
object_shape:
|
|
103
|
+
description: "Minimum object contract when tool selection is explicit."
|
|
104
|
+
fields:
|
|
105
|
+
type:
|
|
106
|
+
type: string
|
|
107
|
+
values: ["function", "tool"]
|
|
108
|
+
required: true
|
|
109
|
+
name:
|
|
110
|
+
type: string
|
|
111
|
+
required: true
|
|
112
|
+
description: "Function/tool name to invoke when type indicates a specific tool."
|
|
113
|
+
id:
|
|
114
|
+
type: string
|
|
115
|
+
required: false
|
|
116
|
+
description: "Provider-specific tool identifier when available."
|
|
117
|
+
|
|
118
|
+
response_format_mode:
|
|
119
|
+
type: string
|
|
120
|
+
values: ["auto", "text", "json_object", "json_schema"]
|
|
121
|
+
default: "auto"
|
|
122
|
+
description: "Structured output selection (OpenAI Responses / JSON mode)"
|
|
123
|
+
|
|
124
|
+
# 2025: Agentic reasoning control
|
|
125
|
+
reasoning_effort:
|
|
126
|
+
type: string
|
|
127
|
+
values: ["low", "medium", "high", "auto"]
|
|
128
|
+
default: "auto"
|
|
129
|
+
description: "Reasoning effort for agentic tasks"
|
|
130
|
+
|
|
131
|
+
# Tool definition standards
|
|
132
|
+
tools:
|
|
133
|
+
schema: "standard_tool_definition"
|
|
134
|
+
choice_policy: ["auto", "none", "required", "specific"]
|
|
135
|
+
strict_mode: true
|
|
136
|
+
parallel_calls: true
|
|
137
|
+
|
|
138
|
+
# Response format standards
|
|
139
|
+
response_format:
|
|
140
|
+
types: ["text", "json", "structured"]
|
|
141
|
+
schema_validation: true
|
|
142
|
+
|
|
143
|
+
# Multimodal content standards
|
|
144
|
+
multimodal:
|
|
145
|
+
image:
|
|
146
|
+
formats: ["png", "jpeg", "gif", "webp"]
|
|
147
|
+
max_size: "10MB"
|
|
148
|
+
|
|
149
|
+
# 2025: Agentic Loop configuration
|
|
150
|
+
agentic_loop:
|
|
151
|
+
max_iterations: 10
|
|
152
|
+
stop_conditions: ["tool_result", "final_answer"]
|
|
153
|
+
reasoning_effort: "auto"
|
|
154
|
+
|
|
155
|
+
# 2025: Streaming events model
|
|
156
|
+
streaming_events:
|
|
157
|
+
supported_events: ["PartialContentDelta", "ThinkingDelta", "PartialToolCall", "ToolCallStarted", "ToolCallEnded", "StreamError"]
|
|
158
|
+
thinking_blocks: true
|
|
159
|
+
citations_enabled: true
|
|
160
|
+
|
|
161
|
+
transport_definitions:
|
|
162
|
+
sse:
|
|
163
|
+
description: "Server-Sent Events (SSE) framing contract used by multiple providers."
|
|
164
|
+
framing:
|
|
165
|
+
delimiter: "\\n\\n"
|
|
166
|
+
data_prefix: "data: "
|
|
167
|
+
done_signal: "[DONE]"
|
|
168
|
+
notes:
|
|
169
|
+
- "Runtimes should split frames by delimiter and parse each data payload as JSON when applicable."
|
|
170
|
+
- "Providers MAY emit non-data lines; runtimes should ignore unknown lines unless a provider-specific decoder.strategy says otherwise."
|
|
171
|
+
|
|
172
|
+
# Termination / stop reasons (cross-provider normalization)
|
|
173
|
+
#
|
|
174
|
+
# Goal: normalize provider-specific termination signals (e.g. OpenAI finish_reason,
|
|
175
|
+
# Anthropic stop_reason) into a stable enum that runtimes and UIs can rely on.
|
|
176
|
+
termination_reasons:
|
|
177
|
+
standard_reasons:
|
|
178
|
+
- id: "end_turn"
|
|
179
|
+
description: "Model reached a natural stopping point."
|
|
180
|
+
- id: "max_tokens"
|
|
181
|
+
description: "Generation stopped because max tokens or context constraints were hit."
|
|
182
|
+
- id: "stop_sequence"
|
|
183
|
+
description: "Generation stopped because a user-provided stop sequence was encountered."
|
|
184
|
+
- id: "tool_use"
|
|
185
|
+
description: "Model decided to invoke one or more tools / function calls."
|
|
186
|
+
- id: "refusal"
|
|
187
|
+
description: "Model refused or safety classifiers intervened (policy)."
|
|
188
|
+
- id: "pause_turn"
|
|
189
|
+
description: "Turn was paused and may be continued in a subsequent request."
|
|
190
|
+
- id: "other"
|
|
191
|
+
description: "Provider-specific or unknown termination."
|
|
192
|
+
|
|
193
|
+
# =========================================================
|
|
194
|
+
# Telemetry & Feedback (optional, application-controlled)
|
|
195
|
+
# =========================================================
|
|
196
|
+
telemetry:
|
|
197
|
+
principles:
|
|
198
|
+
- "Telemetry MUST be opt-in. Runtimes MUST NOT force collection or transmission."
|
|
199
|
+
- "Generation and feedback reporting MUST be decoupled; feedback is asynchronous and may be dropped."
|
|
200
|
+
- "Data minimization is required. Prefer ids, indices, and hashes over raw prompt/output text."
|
|
201
|
+
- "A stable request_id (client-generated if necessary) is REQUIRED for closed-loop linkage."
|
|
202
|
+
feedback_events:
|
|
203
|
+
ChoiceSelection:
|
|
204
|
+
description: "User selection among multiple candidates (n > 1)."
|
|
205
|
+
fields:
|
|
206
|
+
request_id:
|
|
207
|
+
type: string
|
|
208
|
+
required: true
|
|
209
|
+
description: "Runtime-generated request identifier used for linkage (not necessarily provider's request id)."
|
|
210
|
+
chosen_index:
|
|
211
|
+
type: integer
|
|
212
|
+
required: true
|
|
213
|
+
description: "0-based index of the chosen candidate."
|
|
214
|
+
rejected_indices:
|
|
215
|
+
type: array
|
|
216
|
+
items: integer
|
|
217
|
+
required: false
|
|
218
|
+
description: "Optional list of 0-based indices that were shown but not chosen."
|
|
219
|
+
latency_to_select_ms:
|
|
220
|
+
type: integer
|
|
221
|
+
required: false
|
|
222
|
+
description: "Time from rendering candidates to the user making a selection (milliseconds)."
|
|
223
|
+
ui_context:
|
|
224
|
+
type: object
|
|
225
|
+
required: false
|
|
226
|
+
description: "Optional UI / experiment context (A/B bucket, component, etc.)."
|
|
227
|
+
candidate_hashes:
|
|
228
|
+
type: array
|
|
229
|
+
items: string
|
|
230
|
+
required: false
|
|
231
|
+
description: "Optional hashes for the rendered candidates; enables audit/linkage without uploading full text."
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
# Informative mapping guidance (NOT exhaustive; providers may evolve)
|
|
235
|
+
provider_mappings:
|
|
236
|
+
anthropic:
|
|
237
|
+
source_field: "stop_reason"
|
|
238
|
+
notes:
|
|
239
|
+
- "Non-streaming: stop_reason is always non-null."
|
|
240
|
+
- "Streaming: stop_reason is null in message_start, then non-null otherwise."
|
|
241
|
+
map:
|
|
242
|
+
end_turn: "end_turn"
|
|
243
|
+
max_tokens: "max_tokens"
|
|
244
|
+
stop_sequence: "stop_sequence"
|
|
245
|
+
tool_use: "tool_use"
|
|
246
|
+
pause_turn: "pause_turn"
|
|
247
|
+
refusal: "refusal"
|
|
248
|
+
openai:
|
|
249
|
+
source_field: "finish_reason"
|
|
250
|
+
notes:
|
|
251
|
+
- "finish_reason is reported per candidate."
|
|
252
|
+
map:
|
|
253
|
+
stop: "end_turn"
|
|
254
|
+
length: "max_tokens"
|
|
255
|
+
tool_calls: "tool_use"
|
|
256
|
+
content_filter: "refusal"
|
|
257
|
+
null: "other"
|
|
258
|
+
|
|
259
|
+
runtime_behavior:
|
|
260
|
+
- "Runtimes SHOULD surface standardized termination reasons to users and logs."
|
|
261
|
+
- "Runtimes SHOULD preserve the original provider-specific reason in extra metadata for debugging."
|
|
262
|
+
|
|
263
|
+
# Standard event payload definitions (used by runtimes for normalization)
|
|
264
|
+
event_definitions:
|
|
265
|
+
StreamError:
|
|
266
|
+
description: "A stream-level error event emitted after a streaming/realtime session has been established. Most errors are recoverable and the session may stay open; runtimes SHOULD monitor and log error messages by default."
|
|
267
|
+
payload:
|
|
268
|
+
type: object
|
|
269
|
+
# Providers differ: some emit a rich error object with event ids (e.g. OpenAI realtime),
|
|
270
|
+
# others may emit a simple string (e.g. SSE frames with an `error` message).
|
|
271
|
+
required: ["error"]
|
|
272
|
+
properties:
|
|
273
|
+
type:
|
|
274
|
+
type: string
|
|
275
|
+
description: "Optional event type when provided by the stream (e.g., 'error')."
|
|
276
|
+
event_id:
|
|
277
|
+
type: string
|
|
278
|
+
description: "Optional unique ID of the server event when provided."
|
|
279
|
+
error:
|
|
280
|
+
type: any
|
|
281
|
+
description: "Error payload. May be an object (rich error) or a string (human-readable message), depending on provider."
|
|
282
|
+
model_version:
|
|
283
|
+
type: string
|
|
284
|
+
description: "Optional model version string when provided by the stream."
|
|
285
|
+
|
|
286
|
+
# Standard content block model (cross-provider normalization)
|
|
287
|
+
#
|
|
288
|
+
# Goal: normalize tool invocation and tool result payloads across providers that represent
|
|
289
|
+
# tools as content blocks (e.g. Anthropic tool_use/tool_result) or as tool_calls arrays (e.g. OpenAI).
|
|
290
|
+
content_blocks:
|
|
291
|
+
types: ["text", "tool_use", "tool_result", "thinking"]
|
|
292
|
+
|
|
293
|
+
tool_use:
|
|
294
|
+
description: "A request from the model to invoke a tool."
|
|
295
|
+
required_fields: ["type", "id", "name", "input"]
|
|
296
|
+
fields:
|
|
297
|
+
type:
|
|
298
|
+
const: "tool_use"
|
|
299
|
+
id:
|
|
300
|
+
type: string
|
|
301
|
+
description: "Unique identifier for this tool invocation (tool_use id / tool_call id)."
|
|
302
|
+
name:
|
|
303
|
+
type: string
|
|
304
|
+
description: "Tool name to invoke."
|
|
305
|
+
input:
|
|
306
|
+
type: object
|
|
307
|
+
description: "Tool input parameters (structured object)."
|
|
308
|
+
|
|
309
|
+
tool_result:
|
|
310
|
+
description: "A tool execution result provided back to the model."
|
|
311
|
+
required_fields: ["type", "tool_use_id", "content"]
|
|
312
|
+
fields:
|
|
313
|
+
type:
|
|
314
|
+
const: "tool_result"
|
|
315
|
+
tool_use_id:
|
|
316
|
+
type: string
|
|
317
|
+
description: "The id of the corresponding tool_use/tool_call."
|
|
318
|
+
content:
|
|
319
|
+
type: any
|
|
320
|
+
description: "Result content (provider/runtime-defined). Prefer a structured object or an array of text blocks."
|
|
321
|
+
is_error:
|
|
322
|
+
type: boolean
|
|
323
|
+
description: "Optional: whether the tool execution failed."
|
|
324
|
+
|
|
325
|
+
# Retry policy model (cross-provider normalization)
|
|
326
|
+
#
|
|
327
|
+
# Goal: allow providers to declare retry guidance so runtimes can apply consistent behavior.
|
|
328
|
+
retry_policy:
|
|
329
|
+
fields:
|
|
330
|
+
strategy:
|
|
331
|
+
values: ["none", "exponential_backoff"]
|
|
332
|
+
max_retries:
|
|
333
|
+
type: integer
|
|
334
|
+
min: 0
|
|
335
|
+
min_delay_ms:
|
|
336
|
+
type: integer
|
|
337
|
+
min: 0
|
|
338
|
+
max_delay_ms:
|
|
339
|
+
type: integer
|
|
340
|
+
min: 0
|
|
341
|
+
jitter:
|
|
342
|
+
values: ["none", "full", "equal"]
|
|
343
|
+
retry_on_http_status:
|
|
344
|
+
type: array
|
|
345
|
+
items: integer
|
|
346
|
+
retry_on_error_status:
|
|
347
|
+
type: array
|
|
348
|
+
items: string
|
|
349
|
+
runtime_notes:
|
|
350
|
+
- "Runtimes SHOULD treat retry_policy as guidance; user overrides may apply."
|
|
351
|
+
- "Runtimes SHOULD apply exponential backoff with an upper bound and optional jitter when enabled."
|
|
352
|
+
|
|
353
|
+
# Rate limit headers (cross-provider normalization)
|
|
354
|
+
#
|
|
355
|
+
# Goal: allow providers to declare which HTTP response headers expose rate limits.
|
|
356
|
+
rate_limit_headers:
|
|
357
|
+
fields:
|
|
358
|
+
requests_limit:
|
|
359
|
+
type: string
|
|
360
|
+
description: "Header name that reports request limit."
|
|
361
|
+
requests_remaining:
|
|
362
|
+
type: string
|
|
363
|
+
description: "Header name that reports remaining requests."
|
|
364
|
+
requests_reset:
|
|
365
|
+
type: string
|
|
366
|
+
description: "Header name that reports time until request limit resets (format is provider-specific)."
|
|
367
|
+
tokens_limit:
|
|
368
|
+
type: string
|
|
369
|
+
description: "Header name that reports token limit."
|
|
370
|
+
tokens_remaining:
|
|
371
|
+
type: string
|
|
372
|
+
description: "Header name that reports remaining tokens."
|
|
373
|
+
tokens_reset:
|
|
374
|
+
type: string
|
|
375
|
+
description: "Header name that reports time until token limit resets (format is provider-specific)."
|
|
376
|
+
retry_after:
|
|
377
|
+
type: string
|
|
378
|
+
description: "Header name that reports seconds until retry allowed (Retry-After style), if provided."
|
|
379
|
+
runtime_notes:
|
|
380
|
+
- "Runtimes SHOULD treat rate_limit_headers as optional; not all providers return all headers."
|
|
381
|
+
- "When retry_after is present on 429, runtimes SHOULD respect it."
|
|
382
|
+
|
|
383
|
+
# Error handling (cross-provider normalization)
|
|
384
|
+
#
|
|
385
|
+
# Goal: define a stable, cross-provider error taxonomy and runtime behavior guidelines.
|
|
386
|
+
#
|
|
387
|
+
# Provider configs should:
|
|
388
|
+
# - Declare how to extract errors via `features.response_mapping.error.*`
|
|
389
|
+
# - Declare retry guidance via `retry_policy`
|
|
390
|
+
# - Declare rate limit headers via `rate_limit_headers`
|
|
391
|
+
error_handling:
|
|
392
|
+
# Stable classification labels that runtimes can surface consistently.
|
|
393
|
+
# Providers MAY offer richer or more specific codes; runtimes SHOULD preserve raw provider fields.
|
|
394
|
+
error_classes:
|
|
395
|
+
- id: "invalid_request"
|
|
396
|
+
description: "Malformed request body, invalid parameters, or unsupported operation."
|
|
397
|
+
default_retryable: false
|
|
398
|
+
- id: "authentication"
|
|
399
|
+
description: "Missing/invalid credentials (API key/token)."
|
|
400
|
+
default_retryable: false
|
|
401
|
+
- id: "permission_denied"
|
|
402
|
+
description: "Caller is authenticated but not permitted to access the resource."
|
|
403
|
+
default_retryable: false
|
|
404
|
+
- id: "not_found"
|
|
405
|
+
description: "Requested resource not found."
|
|
406
|
+
default_retryable: false
|
|
407
|
+
- id: "quota_exhausted"
|
|
408
|
+
description: "Account quota/billing/spend limit exceeded."
|
|
409
|
+
default_retryable: false
|
|
410
|
+
- id: "rate_limited"
|
|
411
|
+
description: "Throttled due to request/token limits; typically retryable with backoff."
|
|
412
|
+
default_retryable: true
|
|
413
|
+
- id: "request_too_large"
|
|
414
|
+
description: "Payload too large (e.g., context too long, request too big)."
|
|
415
|
+
default_retryable: false
|
|
416
|
+
- id: "timeout"
|
|
417
|
+
description: "Request timed out or deadline exceeded."
|
|
418
|
+
default_retryable: true
|
|
419
|
+
- id: "conflict"
|
|
420
|
+
description: "Request conflict (often safe to retry depending on semantics)."
|
|
421
|
+
default_retryable: true
|
|
422
|
+
- id: "cancelled"
|
|
423
|
+
description: "Request was cancelled by client or upstream."
|
|
424
|
+
default_retryable: false
|
|
425
|
+
- id: "server_error"
|
|
426
|
+
description: "Transient server-side failure (5xx)."
|
|
427
|
+
default_retryable: true
|
|
428
|
+
- id: "overloaded"
|
|
429
|
+
description: "Service overloaded / temporarily unavailable."
|
|
430
|
+
default_retryable: true
|
|
431
|
+
- id: "other"
|
|
432
|
+
description: "Unknown or provider-specific classification."
|
|
433
|
+
default_retryable: false
|
|
434
|
+
|
|
435
|
+
# Common envelopes observed across providers (informative, not prescriptive).
|
|
436
|
+
# Runtimes SHOULD use provider mappings to extract fields.
|
|
437
|
+
envelope_patterns:
|
|
438
|
+
openai_style:
|
|
439
|
+
shape: '{ "error": { "message": string, "type": string, "code"?: string|null, "param"?: string|null } }'
|
|
440
|
+
extraction:
|
|
441
|
+
message: "error.message"
|
|
442
|
+
type: "error.type"
|
|
443
|
+
code: "error.code"
|
|
444
|
+
param: "error.param"
|
|
445
|
+
anthropic_style:
|
|
446
|
+
shape: '{ "type": "error", "error": { "type": string, "message": string }, "request_id": string }'
|
|
447
|
+
extraction:
|
|
448
|
+
message: "error.message"
|
|
449
|
+
type: "error.type"
|
|
450
|
+
request_id: "request_id"
|
|
451
|
+
google_cloud_style:
|
|
452
|
+
shape: '{ "error": { "code": integer, "message": string, "status": string, "errors"?: [], "details"?: [] } }'
|
|
453
|
+
extraction:
|
|
454
|
+
message: "error.message"
|
|
455
|
+
http_code: "error.code"
|
|
456
|
+
status: "error.status"
|
|
457
|
+
errors: "error.errors"
|
|
458
|
+
details: "error.details"
|
|
459
|
+
|
|
460
|
+
# Canonical guidance for classification and retries.
|
|
461
|
+
runtime_behavior:
|
|
462
|
+
- "Runtimes SHOULD map provider-specific errors into error_classes for logs/UI, while preserving raw fields."
|
|
463
|
+
- "Runtimes SHOULD treat HTTP 401/403 as authentication/permission_denied and avoid retries by default."
|
|
464
|
+
- "Runtimes SHOULD treat HTTP 400/422 as invalid_request and avoid retries by default."
|
|
465
|
+
- "On HTTP 429, runtimes SHOULD distinguish rate-limited vs quota-exhausted when possible (often via message/type/code)."
|
|
466
|
+
- "On HTTP 429, if a Retry-After header (or provider-declared retry_after) is present, runtimes SHOULD respect it."
|
|
467
|
+
- "On transient 5xx/overloaded errors, runtimes SHOULD retry with exponential backoff and a cap (see retry_policy)."
|
|
468
|
+
- "Runtimes SHOULD cap retries and add jitter to avoid retry storms."
|
|
469
|
+
|
|
470
|
+
# Provider field contract: how provider configs combine to express error handling
|
|
471
|
+
#
|
|
472
|
+
# Providers SHOULD declare:
|
|
473
|
+
# 1. `features.response_mapping.error.*`: JSON paths to extract error fields from responses.
|
|
474
|
+
# 2. `error_classification`: How HTTP status codes and provider error status strings map to error_classes.
|
|
475
|
+
# 3. `retry_policy`: Which errors are retryable and with what strategy (backoff, delays, caps).
|
|
476
|
+
# 4. `rate_limit_headers`: Which HTTP headers expose rate limit state (for monitoring/throttling).
|
|
477
|
+
#
|
|
478
|
+
# Runtime interpretation:
|
|
479
|
+
# - Extract error fields using `response_mapping.error.*` paths.
|
|
480
|
+
# - Classify errors using `error_classification` (HTTP status 鈫?error_class, or provider status 鈫?error_class).
|
|
481
|
+
# - Determine retry eligibility from `error_classification` output + `retry_policy.retry_on_http_status` + `retry_policy.retry_on_error_status`.
|
|
482
|
+
# - Apply `retry_policy` strategy (exponential_backoff) with declared delays/jitter/caps.
|
|
483
|
+
# - Monitor `rate_limit_headers` for rate limit state (remaining/reset times).
|
|
484
|
+
# - On 429, check `rate_limit_headers.retry_after` (if present) before applying backoff.
|
|
485
|
+
#
|
|
486
|
+
# Example flow:
|
|
487
|
+
# 1. HTTP 429 response received.
|
|
488
|
+
# 2. Extract `error.message` using `features.response_mapping.error.message_path`.
|
|
489
|
+
# 3. Classify via `error_classification.by_http_status["429"]` 鈫?"rate_limited" (or "quota_exhausted" if detectable).
|
|
490
|
+
# 4. Check `retry_policy.retry_on_http_status` includes 429 鈫?retryable.
|
|
491
|
+
# 5. Check `rate_limit_headers.retry_after` header value (if present) 鈫?use that delay; otherwise use `retry_policy.min_delay_ms` + exponential backoff.
|
|
492
|
+
# 6. Apply retry with `retry_policy.strategy`, `retry_policy.jitter`, up to `retry_policy.max_retries`.
|
|
493
|
+
provider_field_contract:
|
|
494
|
+
purpose: "Clarify how provider configs work together to enable consistent error handling and retry behavior."
|
|
495
|
+
fields_relationship:
|
|
496
|
+
- "response_mapping.error: Extracts raw error fields (message, code, type, etc.) from provider responses."
|
|
497
|
+
- "error_classification: Maps HTTP status / provider status strings to standard error_classes."
|
|
498
|
+
- "retry_policy: Declares which errors are retryable and how to retry (strategy, delays, caps)."
|
|
499
|
+
- "rate_limit_headers: Declares which HTTP headers expose rate limit state (for throttling/monitoring)."
|
|
500
|
+
runtime_integration:
|
|
501
|
+
- "Runtimes SHOULD use error_classification output to determine retry eligibility (default_retryable from error_class + explicit retry_policy overrides)."
|
|
502
|
+
- "Runtimes SHOULD combine error_classification with retry_policy.retry_on_http_status and retry_policy.retry_on_error_status to finalize retry decisions."
|
|
503
|
+
- "Runtimes SHOULD respect rate_limit_headers.retry_after when present on 429 responses."
|
|
504
|
+
- "Runtimes SHOULD preserve raw provider error fields (via response_mapping) alongside normalized error_class for debugging."
|
|
505
|
+
|
|
506
|
+
# =========================================================
|
|
507
|
+
# Provider Manifest Schema (v1.1+ Extensions)
|
|
508
|
+
# =========================================================
|
|
509
|
+
provider_manifest:
|
|
510
|
+
# Provider Identity & Core Metadata
|
|
511
|
+
identity:
|
|
512
|
+
id:
|
|
513
|
+
type: string
|
|
514
|
+
pattern: "^[a-z0-9][a-z0-9-_]{1,63}$"
|
|
515
|
+
required: true
|
|
516
|
+
description: "Stable provider identifier (lowercase, alphanumeric, hyphens, underscores)"
|
|
517
|
+
name:
|
|
518
|
+
type: string
|
|
519
|
+
required: false
|
|
520
|
+
description: "Human-readable provider name"
|
|
521
|
+
status:
|
|
522
|
+
type: string
|
|
523
|
+
values: ["stable", "beta", "deprecated"]
|
|
524
|
+
default: "stable"
|
|
525
|
+
description: "Provider status"
|
|
526
|
+
category:
|
|
527
|
+
type: string
|
|
528
|
+
values: ["ai_provider", "model_provider", "third_party_aggregator"]
|
|
529
|
+
required: false
|
|
530
|
+
description: "Provider category"
|
|
531
|
+
official_url:
|
|
532
|
+
type: string
|
|
533
|
+
format: uri
|
|
534
|
+
required: false
|
|
535
|
+
description: "Official documentation website"
|
|
536
|
+
support_contact:
|
|
537
|
+
type: string
|
|
538
|
+
format: uri
|
|
539
|
+
required: false
|
|
540
|
+
description: "Support or feedback link"
|
|
541
|
+
|
|
542
|
+
# Endpoint Definition (Required)
|
|
543
|
+
endpoint:
|
|
544
|
+
base_url:
|
|
545
|
+
type: string
|
|
546
|
+
format: uri
|
|
547
|
+
required: true
|
|
548
|
+
description: "Absolute base URL of the provider API"
|
|
549
|
+
protocol:
|
|
550
|
+
type: string
|
|
551
|
+
values: ["https", "http", "ws", "wss"]
|
|
552
|
+
default: "https"
|
|
553
|
+
description: "Protocol used for communication"
|
|
554
|
+
timeout_ms:
|
|
555
|
+
type: integer
|
|
556
|
+
min: 100
|
|
557
|
+
default: 10000
|
|
558
|
+
description: "Default timeout for requests in milliseconds"
|
|
559
|
+
|
|
560
|
+
# Availability and Health Checking (Required)
|
|
561
|
+
availability:
|
|
562
|
+
required:
|
|
563
|
+
type: boolean
|
|
564
|
+
required: true
|
|
565
|
+
description: "Whether the provider must be reachable at runtime startup"
|
|
566
|
+
regions:
|
|
567
|
+
type: array
|
|
568
|
+
min_items: 1
|
|
569
|
+
items:
|
|
570
|
+
type: string
|
|
571
|
+
values: ["cn", "global", "us", "eu"]
|
|
572
|
+
unique_items: true
|
|
573
|
+
required: true
|
|
574
|
+
description: "Geographic regions where the provider is available"
|
|
575
|
+
semantics:
|
|
576
|
+
cn: "Reachable from mainland China without special routing"
|
|
577
|
+
global: "Reachable from the general international Internet. This does NOT imply reachability from mainland China."
|
|
578
|
+
us: "Explicitly deployed in US region only"
|
|
579
|
+
eu: "Explicitly deployed in EU region only"
|
|
580
|
+
notes:
|
|
581
|
+
- "Multiple regions MAY be listed (e.g., ['cn', 'global']) to explicitly declare dual availability"
|
|
582
|
+
- "Runtimes MUST NOT assume implicit region coverage"
|
|
583
|
+
check:
|
|
584
|
+
method:
|
|
585
|
+
type: string
|
|
586
|
+
values: ["HEAD", "GET"]
|
|
587
|
+
required: true
|
|
588
|
+
description: "HTTP method for health check"
|
|
589
|
+
path:
|
|
590
|
+
type: string
|
|
591
|
+
pattern: "^/"
|
|
592
|
+
required: true
|
|
593
|
+
description: "Path relative to base_url for health check"
|
|
594
|
+
expected_status:
|
|
595
|
+
type: array
|
|
596
|
+
min_items: 1
|
|
597
|
+
items:
|
|
598
|
+
type: integer
|
|
599
|
+
min: 100
|
|
600
|
+
max: 599
|
|
601
|
+
required: true
|
|
602
|
+
description: "HTTP status codes that indicate healthy state"
|
|
603
|
+
timeout_ms:
|
|
604
|
+
type: integer
|
|
605
|
+
min: 100
|
|
606
|
+
default: 3000
|
|
607
|
+
description: "Timeout for health check in milliseconds"
|
|
608
|
+
notes:
|
|
609
|
+
- "Health checks SHOULD NOT require authentication"
|
|
610
|
+
- "Unauthorized responses (401) MAY be considered healthy if they indicate the service is reachable"
|
|
611
|
+
|
|
612
|
+
# Capabilities Declaration (Required)
|
|
613
|
+
capabilities:
|
|
614
|
+
streaming:
|
|
615
|
+
type: boolean
|
|
616
|
+
required: true
|
|
617
|
+
description: "Supports streaming responses"
|
|
618
|
+
tools:
|
|
619
|
+
type: boolean
|
|
620
|
+
required: true
|
|
621
|
+
description: "Supports tool / function calling"
|
|
622
|
+
vision:
|
|
623
|
+
type: boolean
|
|
624
|
+
required: true
|
|
625
|
+
description: "Supports image or multimodal inputs"
|
|
626
|
+
agentic:
|
|
627
|
+
type: boolean
|
|
628
|
+
default: false
|
|
629
|
+
description: "Supports agentic reasoning and multi-turn tool sequences"
|
|
630
|
+
reasoning:
|
|
631
|
+
type: boolean
|
|
632
|
+
default: false
|
|
633
|
+
description: "Supports extended reasoning / thinking blocks"
|
|
634
|
+
parallel_tools:
|
|
635
|
+
type: boolean
|
|
636
|
+
default: false
|
|
637
|
+
description: "Supports parallel tool invocations"
|