@ailib-official/ai-protocol 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/LICENSE +6 -0
  2. package/README.md +415 -0
  3. package/dist/index.json +8 -0
  4. package/dist/v1/models/ai21.json +140 -0
  5. package/dist/v1/models/baichuan.json +138 -0
  6. package/dist/v1/models/cerebras.json +147 -0
  7. package/dist/v1/models/claude.json +114 -0
  8. package/dist/v1/models/cohere.json +235 -0
  9. package/dist/v1/models/deepinfra.json +43 -0
  10. package/dist/v1/models/deepseek-chat.json +55 -0
  11. package/dist/v1/models/doubao.json +197 -0
  12. package/dist/v1/models/ernie.json +223 -0
  13. package/dist/v1/models/fireworks.json +222 -0
  14. package/dist/v1/models/gemini.json +58 -0
  15. package/dist/v1/models/gpt.json +166 -0
  16. package/dist/v1/models/grok.json +138 -0
  17. package/dist/v1/models/huggingface.json +183 -0
  18. package/dist/v1/models/hunyuan.json +255 -0
  19. package/dist/v1/models/jina.json +139 -0
  20. package/dist/v1/models/lepton.json +188 -0
  21. package/dist/v1/models/llama.json +143 -0
  22. package/dist/v1/models/minimax.json +194 -0
  23. package/dist/v1/models/mistral.json +177 -0
  24. package/dist/v1/models/moonshot.json +144 -0
  25. package/dist/v1/models/nvidia.json +212 -0
  26. package/dist/v1/models/palmyra.json +103 -0
  27. package/dist/v1/models/perplexity.json +143 -0
  28. package/dist/v1/models/qwen.json +49 -0
  29. package/dist/v1/models/replicate.json +206 -0
  30. package/dist/v1/models/sensenova.json +144 -0
  31. package/dist/v1/models/siliconflow.json +44 -0
  32. package/dist/v1/models/spark.json +173 -0
  33. package/dist/v1/models/stable-diffusion.json +161 -0
  34. package/dist/v1/models/tiangong.json +169 -0
  35. package/dist/v1/models/together.json +43 -0
  36. package/dist/v1/models/yi.json +199 -0
  37. package/dist/v1/models/zhipu.json +250 -0
  38. package/dist/v1/providers/ai21.json +215 -0
  39. package/dist/v1/providers/anthropic.json +253 -0
  40. package/dist/v1/providers/anyscale.json +115 -0
  41. package/dist/v1/providers/azure.json +188 -0
  42. package/dist/v1/providers/baichuan.json +205 -0
  43. package/dist/v1/providers/baidu.json +205 -0
  44. package/dist/v1/providers/cerebras.json +216 -0
  45. package/dist/v1/providers/cohere.json +209 -0
  46. package/dist/v1/providers/deepinfra.json +196 -0
  47. package/dist/v1/providers/deepseek.json +221 -0
  48. package/dist/v1/providers/doubao.json +209 -0
  49. package/dist/v1/providers/fireworks.json +227 -0
  50. package/dist/v1/providers/gemini.json +279 -0
  51. package/dist/v1/providers/groq.json +218 -0
  52. package/dist/v1/providers/huggingface.json +181 -0
  53. package/dist/v1/providers/hunyuan.json +198 -0
  54. package/dist/v1/providers/jina.json +202 -0
  55. package/dist/v1/providers/lepton.json +221 -0
  56. package/dist/v1/providers/minimax.json +209 -0
  57. package/dist/v1/providers/mistral.json +214 -0
  58. package/dist/v1/providers/moonshot.json +214 -0
  59. package/dist/v1/providers/nvidia.json +213 -0
  60. package/dist/v1/providers/openai.json +267 -0
  61. package/dist/v1/providers/openrouter.json +220 -0
  62. package/dist/v1/providers/perplexity.json +172 -0
  63. package/dist/v1/providers/qwen.json +231 -0
  64. package/dist/v1/providers/replicate.json +192 -0
  65. package/dist/v1/providers/sensenova.json +185 -0
  66. package/dist/v1/providers/siliconflow.json +197 -0
  67. package/dist/v1/providers/spark.json +204 -0
  68. package/dist/v1/providers/stability.json +221 -0
  69. package/dist/v1/providers/tiangong.json +207 -0
  70. package/dist/v1/providers/together.json +196 -0
  71. package/dist/v1/providers/writer.json +253 -0
  72. package/dist/v1/providers/xai.json +238 -0
  73. package/dist/v1/providers/yi.json +205 -0
  74. package/dist/v1/providers/zhipu.json +208 -0
  75. package/dist/v1/spec.json +783 -0
  76. package/dist/v2/providers/anthropic.json +372 -0
  77. package/dist/v2/providers/cohere.json +109 -0
  78. package/dist/v2/providers/deepseek.json +272 -0
  79. package/dist/v2/providers/doubao.json +260 -0
  80. package/dist/v2/providers/google.json +388 -0
  81. package/dist/v2/providers/jina.json +71 -0
  82. package/dist/v2/providers/moonshot.json +284 -0
  83. package/dist/v2/providers/openai.json +419 -0
  84. package/dist/v2/providers/qwen.json +274 -0
  85. package/dist/v2/providers/zhipu.json +257 -0
  86. package/dist/v2-alpha/providers/anthropic.json +182 -0
  87. package/dist/v2-alpha/providers/gemini.json +187 -0
  88. package/dist/v2-alpha/providers/openai.json +215 -0
  89. package/dist/v2-alpha/spec.json +644 -0
  90. package/package.json +61 -0
  91. package/schemas/spec.json +186 -0
  92. package/schemas/v1.json +1116 -0
  93. package/schemas/v2/availability.json +66 -0
  94. package/schemas/v2/capabilities.json +182 -0
  95. package/schemas/v2/capability-profile.json +174 -0
  96. package/schemas/v2/computer-use.json +222 -0
  97. package/schemas/v2/context-policy.json +148 -0
  98. package/schemas/v2/endpoint.json +2 -0
  99. package/schemas/v2/error-codes.yaml +225 -0
  100. package/schemas/v2/errors.json +250 -0
  101. package/schemas/v2/execution-metadata.json +59 -0
  102. package/schemas/v2/mcp.json +225 -0
  103. package/schemas/v2/message-roles.json +10 -0
  104. package/schemas/v2/multimodal.json +297 -0
  105. package/schemas/v2/pricing.json +14 -0
  106. package/schemas/v2/provider-contract.json +317 -0
  107. package/schemas/v2/provider.json +203 -0
  108. package/schemas/v2/regions.json +14 -0
  109. package/v1/models/ai21.yaml +97 -0
  110. package/v1/models/baichuan.yaml +95 -0
  111. package/v1/models/cerebras.yaml +99 -0
  112. package/v1/models/claude.yaml +59 -0
  113. package/v1/models/cohere.yaml +165 -0
  114. package/v1/models/deepinfra.yaml +29 -0
  115. package/v1/models/deepseek-chat.yaml +32 -0
  116. package/v1/models/doubao.yaml +129 -0
  117. package/v1/models/ernie.yaml +146 -0
  118. package/v1/models/fireworks.yaml +145 -0
  119. package/v1/models/gemini.yaml +32 -0
  120. package/v1/models/gpt.yaml +90 -0
  121. package/v1/models/grok.yaml +74 -0
  122. package/v1/models/huggingface.yaml +110 -0
  123. package/v1/models/hunyuan.yaml +164 -0
  124. package/v1/models/jina.yaml +98 -0
  125. package/v1/models/lepton.yaml +130 -0
  126. package/v1/models/llama.yaml +91 -0
  127. package/v1/models/minimax.yaml +132 -0
  128. package/v1/models/mistral.yaml +100 -0
  129. package/v1/models/moonshot.yaml +97 -0
  130. package/v1/models/nvidia.yaml +118 -0
  131. package/v1/models/palmyra.yaml +59 -0
  132. package/v1/models/perplexity.yaml +97 -0
  133. package/v1/models/qwen.yaml +27 -0
  134. package/v1/models/replicate.yaml +136 -0
  135. package/v1/models/sensenova.yaml +97 -0
  136. package/v1/models/siliconflow.yaml +29 -0
  137. package/v1/models/spark.yaml +114 -0
  138. package/v1/models/stable-diffusion.yaml +113 -0
  139. package/v1/models/tiangong.yaml +114 -0
  140. package/v1/models/together.yaml +29 -0
  141. package/v1/models/yi.yaml +132 -0
  142. package/v1/models/zhipu.yaml +163 -0
  143. package/v1/providers/ai21.yaml +176 -0
  144. package/v1/providers/anthropic.yaml +209 -0
  145. package/v1/providers/anyscale.yaml +106 -0
  146. package/v1/providers/azure.yaml +155 -0
  147. package/v1/providers/baichuan.yaml +168 -0
  148. package/v1/providers/baidu.yaml +173 -0
  149. package/v1/providers/cerebras.yaml +178 -0
  150. package/v1/providers/cohere.yaml +175 -0
  151. package/v1/providers/deepinfra.yaml +156 -0
  152. package/v1/providers/deepseek.yaml +189 -0
  153. package/v1/providers/doubao.yaml +172 -0
  154. package/v1/providers/fireworks.yaml +187 -0
  155. package/v1/providers/gemini.yaml +229 -0
  156. package/v1/providers/groq.yaml +181 -0
  157. package/v1/providers/huggingface.yaml +157 -0
  158. package/v1/providers/hunyuan.yaml +162 -0
  159. package/v1/providers/jina.yaml +171 -0
  160. package/v1/providers/lepton.yaml +183 -0
  161. package/v1/providers/minimax.yaml +172 -0
  162. package/v1/providers/mistral.yaml +186 -0
  163. package/v1/providers/moonshot.yaml +176 -0
  164. package/v1/providers/nvidia.yaml +172 -0
  165. package/v1/providers/openai.yaml +224 -0
  166. package/v1/providers/openrouter.yaml +181 -0
  167. package/v1/providers/perplexity.yaml +144 -0
  168. package/v1/providers/qwen.yaml +192 -0
  169. package/v1/providers/replicate.yaml +159 -0
  170. package/v1/providers/sensenova.yaml +153 -0
  171. package/v1/providers/siliconflow.yaml +156 -0
  172. package/v1/providers/spark.yaml +167 -0
  173. package/v1/providers/stability.yaml +185 -0
  174. package/v1/providers/tiangong.yaml +170 -0
  175. package/v1/providers/together.yaml +156 -0
  176. package/v1/providers/writer.yaml +212 -0
  177. package/v1/providers/xai.yaml +206 -0
  178. package/v1/providers/yi.yaml +168 -0
  179. package/v1/providers/zhipu.yaml +171 -0
  180. package/v1/spec.yaml +637 -0
  181. package/v2/providers/anthropic.yaml +244 -0
  182. package/v2/providers/cohere.yaml +75 -0
  183. package/v2/providers/deepseek.yaml +176 -0
  184. package/v2/providers/doubao.yaml +169 -0
  185. package/v2/providers/google.yaml +245 -0
  186. package/v2/providers/jina.yaml +55 -0
  187. package/v2/providers/moonshot.yaml +186 -0
  188. package/v2/providers/openai.yaml +266 -0
  189. package/v2/providers/qwen.yaml +174 -0
  190. package/v2/providers/zhipu.yaml +176 -0
  191. package/v2-alpha/providers/anthropic.yaml +134 -0
  192. package/v2-alpha/providers/gemini.yaml +144 -0
  193. package/v2-alpha/providers/openai.yaml +154 -0
  194. package/v2-alpha/spec.yaml +512 -0
package/v1/spec.yaml ADDED
@@ -0,0 +1,637 @@
1
+ version: "1.1"
2
+ metadata:
3
+ description: "AI-Protocol v1.1 - Standard Specification for AI Model Integration"
4
+ last_updated: "2026-01-31"
5
+ release_version: "1.1.1"
6
+ authors: ["AI-Protocol Team"]
7
+ license: "MIT OR Apache-2.0"
8
+
9
+ # =========================================================
10
+ # Version Semantics (Important for Runtime Compatibility)
11
+ # =========================================================
12
+ # This specification uses a layered versioning model:
13
+ #
14
+ # 1. spec_version (this file's `version` field):
15
+ # - Defines the schema structure and field definitions
16
+ # - Runtimes MUST validate manifests against the matching schema version
17
+ # - Format: MAJOR.MINOR (e.g., "1.1")
18
+ #
19
+ # 2. protocol_version (in provider manifests):
20
+ # - Indicates which protocol features the provider config uses
21
+ # - Runtimes SHOULD select appropriate adapters based on this
22
+ # - Format: MAJOR.MINOR (e.g., "1.5")
23
+ #
24
+ # 3. release_version (in metadata):
25
+ # - Full semantic version for this specification release
26
+ # - Format: MAJOR.MINOR.PATCH (e.g., "1.1.1")
27
+ #
28
+ # Runtime Alignment Guidelines:
29
+ # - Runtimes MUST check `protocol_version` before processing provider configs
30
+ # - Runtimes SHOULD support multiple protocol versions for backward compatibility
31
+ # - Runtimes SHOULD NOT hard-code branch names; use version tags or schema URLs
32
+ # - Schema URL example: https://raw.githubusercontent.com/ailib-official/ai-protocol/v0.2.1/schemas/v1.json
33
+
34
+ # =========================================================
35
+ # Standard Schema (2025 Extensions)
36
+ # =========================================================
37
+ standard_schema:
38
+ # Standard parameter definitions - unified developer interface
39
+ parameters:
40
+ temperature:
41
+ type: float
42
+ range: [0.0, 2.0]
43
+ default: 1.0
44
+ description: "Controls randomness in the output"
45
+ max_tokens:
46
+ type: integer
47
+ min: 1
48
+ max: 32768
49
+ description: "Maximum number of tokens to generate"
50
+ stream:
51
+ type: boolean
52
+ default: false
53
+ description: "Enable streaming responses"
54
+ top_p:
55
+ type: float
56
+ range: [0.0, 1.0]
57
+ default: 1.0
58
+ description: "Nucleus sampling parameter"
59
+
60
+ frequency_penalty:
61
+ type: float
62
+ range: [-2.0, 2.0]
63
+ default: 0.0
64
+ description: "Penalize frequent tokens (OpenAI-compatible)"
65
+
66
+ presence_penalty:
67
+ type: float
68
+ range: [-2.0, 2.0]
69
+ default: 0.0
70
+ description: "Penalize repeated presence (OpenAI-compatible)"
71
+
72
+ top_k:
73
+ type: integer
74
+ min: 1
75
+ max: 500
76
+ description: "Top-K filtering (Gemini/open-source compatible)"
77
+
78
+ stop_sequences:
79
+ type: array
80
+ description: "Stop sequences to truncate generation"
81
+
82
+ logprobs:
83
+ type: boolean
84
+ default: false
85
+ description: "Return token logprobs when provider supports"
86
+
87
+ top_logprobs:
88
+ type: integer
89
+ min: 1
90
+ max: 5
91
+ description: "Number of top logprobs to return"
92
+
93
+ seed:
94
+ type: integer
95
+ description: "Deterministic seed when supported"
96
+
97
+ tool_choice:
98
+ type: string_or_object
99
+ values: ["auto", "none", "required", "specific"]
100
+ default: "auto"
101
+ description: "Tool choice policy per request. Runtimes SHOULD accept either a simple string policy (auto/none/required) or a provider-specific object (e.g., OpenAI tool_choice object for selecting a specific tool)."
102
+ object_shape:
103
+ description: "Minimum object contract when tool selection is explicit."
104
+ fields:
105
+ type:
106
+ type: string
107
+ values: ["function", "tool"]
108
+ required: true
109
+ name:
110
+ type: string
111
+ required: true
112
+ description: "Function/tool name to invoke when type indicates a specific tool."
113
+ id:
114
+ type: string
115
+ required: false
116
+ description: "Provider-specific tool identifier when available."
117
+
118
+ response_format_mode:
119
+ type: string
120
+ values: ["auto", "text", "json_object", "json_schema"]
121
+ default: "auto"
122
+ description: "Structured output selection (OpenAI Responses / JSON mode)"
123
+
124
+ # 2025: Agentic reasoning control
125
+ reasoning_effort:
126
+ type: string
127
+ values: ["low", "medium", "high", "auto"]
128
+ default: "auto"
129
+ description: "Reasoning effort for agentic tasks"
130
+
131
+ # Tool definition standards
132
+ tools:
133
+ schema: "standard_tool_definition"
134
+ choice_policy: ["auto", "none", "required", "specific"]
135
+ strict_mode: true
136
+ parallel_calls: true
137
+
138
+ # Response format standards
139
+ response_format:
140
+ types: ["text", "json", "structured"]
141
+ schema_validation: true
142
+
143
+ # Multimodal content standards
144
+ multimodal:
145
+ image:
146
+ formats: ["png", "jpeg", "gif", "webp"]
147
+ max_size: "10MB"
148
+
149
+ # 2025: Agentic Loop configuration
150
+ agentic_loop:
151
+ max_iterations: 10
152
+ stop_conditions: ["tool_result", "final_answer"]
153
+ reasoning_effort: "auto"
154
+
155
+ # 2025: Streaming events model
156
+ streaming_events:
157
+ supported_events: ["PartialContentDelta", "ThinkingDelta", "PartialToolCall", "ToolCallStarted", "ToolCallEnded", "StreamError"]
158
+ thinking_blocks: true
159
+ citations_enabled: true
160
+
161
+ transport_definitions:
162
+ sse:
163
+ description: "Server-Sent Events (SSE) framing contract used by multiple providers."
164
+ framing:
165
+ delimiter: "\\n\\n"
166
+ data_prefix: "data: "
167
+ done_signal: "[DONE]"
168
+ notes:
169
+ - "Runtimes should split frames by delimiter and parse each data payload as JSON when applicable."
170
+ - "Providers MAY emit non-data lines; runtimes should ignore unknown lines unless a provider-specific decoder.strategy says otherwise."
171
+
172
+ # Termination / stop reasons (cross-provider normalization)
173
+ #
174
+ # Goal: normalize provider-specific termination signals (e.g. OpenAI finish_reason,
175
+ # Anthropic stop_reason) into a stable enum that runtimes and UIs can rely on.
176
+ termination_reasons:
177
+ standard_reasons:
178
+ - id: "end_turn"
179
+ description: "Model reached a natural stopping point."
180
+ - id: "max_tokens"
181
+ description: "Generation stopped because max tokens or context constraints were hit."
182
+ - id: "stop_sequence"
183
+ description: "Generation stopped because a user-provided stop sequence was encountered."
184
+ - id: "tool_use"
185
+ description: "Model decided to invoke one or more tools / function calls."
186
+ - id: "refusal"
187
+ description: "Model refused or safety classifiers intervened (policy)."
188
+ - id: "pause_turn"
189
+ description: "Turn was paused and may be continued in a subsequent request."
190
+ - id: "other"
191
+ description: "Provider-specific or unknown termination."
192
+
193
+ # =========================================================
194
+ # Telemetry & Feedback (optional, application-controlled)
195
+ # =========================================================
196
+ telemetry:
197
+ principles:
198
+ - "Telemetry MUST be opt-in. Runtimes MUST NOT force collection or transmission."
199
+ - "Generation and feedback reporting MUST be decoupled; feedback is asynchronous and may be dropped."
200
+ - "Data minimization is required. Prefer ids, indices, and hashes over raw prompt/output text."
201
+ - "A stable request_id (client-generated if necessary) is REQUIRED for closed-loop linkage."
202
+ feedback_events:
203
+ ChoiceSelection:
204
+ description: "User selection among multiple candidates (n > 1)."
205
+ fields:
206
+ request_id:
207
+ type: string
208
+ required: true
209
+ description: "Runtime-generated request identifier used for linkage (not necessarily provider's request id)."
210
+ chosen_index:
211
+ type: integer
212
+ required: true
213
+ description: "0-based index of the chosen candidate."
214
+ rejected_indices:
215
+ type: array
216
+ items: integer
217
+ required: false
218
+ description: "Optional list of 0-based indices that were shown but not chosen."
219
+ latency_to_select_ms:
220
+ type: integer
221
+ required: false
222
+ description: "Time from rendering candidates to the user making a selection (milliseconds)."
223
+ ui_context:
224
+ type: object
225
+ required: false
226
+ description: "Optional UI / experiment context (A/B bucket, component, etc.)."
227
+ candidate_hashes:
228
+ type: array
229
+ items: string
230
+ required: false
231
+ description: "Optional hashes for the rendered candidates; enables audit/linkage without uploading full text."
232
+
233
+
234
+ # Informative mapping guidance (NOT exhaustive; providers may evolve)
235
+ provider_mappings:
236
+ anthropic:
237
+ source_field: "stop_reason"
238
+ notes:
239
+ - "Non-streaming: stop_reason is always non-null."
240
+ - "Streaming: stop_reason is null in message_start, then non-null otherwise."
241
+ map:
242
+ end_turn: "end_turn"
243
+ max_tokens: "max_tokens"
244
+ stop_sequence: "stop_sequence"
245
+ tool_use: "tool_use"
246
+ pause_turn: "pause_turn"
247
+ refusal: "refusal"
248
+ openai:
249
+ source_field: "finish_reason"
250
+ notes:
251
+ - "finish_reason is reported per candidate."
252
+ map:
253
+ stop: "end_turn"
254
+ length: "max_tokens"
255
+ tool_calls: "tool_use"
256
+ content_filter: "refusal"
257
+ null: "other"
258
+
259
+ runtime_behavior:
260
+ - "Runtimes SHOULD surface standardized termination reasons to users and logs."
261
+ - "Runtimes SHOULD preserve the original provider-specific reason in extra metadata for debugging."
262
+
263
+ # Standard event payload definitions (used by runtimes for normalization)
264
+ event_definitions:
265
+ StreamError:
266
+ description: "A stream-level error event emitted after a streaming/realtime session has been established. Most errors are recoverable and the session may stay open; runtimes SHOULD monitor and log error messages by default."
267
+ payload:
268
+ type: object
269
+ # Providers differ: some emit a rich error object with event ids (e.g. OpenAI realtime),
270
+ # others may emit a simple string (e.g. SSE frames with an `error` message).
271
+ required: ["error"]
272
+ properties:
273
+ type:
274
+ type: string
275
+ description: "Optional event type when provided by the stream (e.g., 'error')."
276
+ event_id:
277
+ type: string
278
+ description: "Optional unique ID of the server event when provided."
279
+ error:
280
+ type: any
281
+ description: "Error payload. May be an object (rich error) or a string (human-readable message), depending on provider."
282
+ model_version:
283
+ type: string
284
+ description: "Optional model version string when provided by the stream."
285
+
286
+ # Standard content block model (cross-provider normalization)
287
+ #
288
+ # Goal: normalize tool invocation and tool result payloads across providers that represent
289
+ # tools as content blocks (e.g. Anthropic tool_use/tool_result) or as tool_calls arrays (e.g. OpenAI).
290
+ content_blocks:
291
+ types: ["text", "tool_use", "tool_result", "thinking"]
292
+
293
+ tool_use:
294
+ description: "A request from the model to invoke a tool."
295
+ required_fields: ["type", "id", "name", "input"]
296
+ fields:
297
+ type:
298
+ const: "tool_use"
299
+ id:
300
+ type: string
301
+ description: "Unique identifier for this tool invocation (tool_use id / tool_call id)."
302
+ name:
303
+ type: string
304
+ description: "Tool name to invoke."
305
+ input:
306
+ type: object
307
+ description: "Tool input parameters (structured object)."
308
+
309
+ tool_result:
310
+ description: "A tool execution result provided back to the model."
311
+ required_fields: ["type", "tool_use_id", "content"]
312
+ fields:
313
+ type:
314
+ const: "tool_result"
315
+ tool_use_id:
316
+ type: string
317
+ description: "The id of the corresponding tool_use/tool_call."
318
+ content:
319
+ type: any
320
+ description: "Result content (provider/runtime-defined). Prefer a structured object or an array of text blocks."
321
+ is_error:
322
+ type: boolean
323
+ description: "Optional: whether the tool execution failed."
324
+
325
+ # Retry policy model (cross-provider normalization)
326
+ #
327
+ # Goal: allow providers to declare retry guidance so runtimes can apply consistent behavior.
328
+ retry_policy:
329
+ fields:
330
+ strategy:
331
+ values: ["none", "exponential_backoff"]
332
+ max_retries:
333
+ type: integer
334
+ min: 0
335
+ min_delay_ms:
336
+ type: integer
337
+ min: 0
338
+ max_delay_ms:
339
+ type: integer
340
+ min: 0
341
+ jitter:
342
+ values: ["none", "full", "equal"]
343
+ retry_on_http_status:
344
+ type: array
345
+ items: integer
346
+ retry_on_error_status:
347
+ type: array
348
+ items: string
349
+ runtime_notes:
350
+ - "Runtimes SHOULD treat retry_policy as guidance; user overrides may apply."
351
+ - "Runtimes SHOULD apply exponential backoff with an upper bound and optional jitter when enabled."
352
+
353
+ # Rate limit headers (cross-provider normalization)
354
+ #
355
+ # Goal: allow providers to declare which HTTP response headers expose rate limits.
356
+ rate_limit_headers:
357
+ fields:
358
+ requests_limit:
359
+ type: string
360
+ description: "Header name that reports request limit."
361
+ requests_remaining:
362
+ type: string
363
+ description: "Header name that reports remaining requests."
364
+ requests_reset:
365
+ type: string
366
+ description: "Header name that reports time until request limit resets (format is provider-specific)."
367
+ tokens_limit:
368
+ type: string
369
+ description: "Header name that reports token limit."
370
+ tokens_remaining:
371
+ type: string
372
+ description: "Header name that reports remaining tokens."
373
+ tokens_reset:
374
+ type: string
375
+ description: "Header name that reports time until token limit resets (format is provider-specific)."
376
+ retry_after:
377
+ type: string
378
+ description: "Header name that reports seconds until retry allowed (Retry-After style), if provided."
379
+ runtime_notes:
380
+ - "Runtimes SHOULD treat rate_limit_headers as optional; not all providers return all headers."
381
+ - "When retry_after is present on 429, runtimes SHOULD respect it."
382
+
383
+ # Error handling (cross-provider normalization)
384
+ #
385
+ # Goal: define a stable, cross-provider error taxonomy and runtime behavior guidelines.
386
+ #
387
+ # Provider configs should:
388
+ # - Declare how to extract errors via `features.response_mapping.error.*`
389
+ # - Declare retry guidance via `retry_policy`
390
+ # - Declare rate limit headers via `rate_limit_headers`
391
+ error_handling:
392
+ # Stable classification labels that runtimes can surface consistently.
393
+ # Providers MAY offer richer or more specific codes; runtimes SHOULD preserve raw provider fields.
394
+ error_classes:
395
+ - id: "invalid_request"
396
+ description: "Malformed request body, invalid parameters, or unsupported operation."
397
+ default_retryable: false
398
+ - id: "authentication"
399
+ description: "Missing/invalid credentials (API key/token)."
400
+ default_retryable: false
401
+ - id: "permission_denied"
402
+ description: "Caller is authenticated but not permitted to access the resource."
403
+ default_retryable: false
404
+ - id: "not_found"
405
+ description: "Requested resource not found."
406
+ default_retryable: false
407
+ - id: "quota_exhausted"
408
+ description: "Account quota/billing/spend limit exceeded."
409
+ default_retryable: false
410
+ - id: "rate_limited"
411
+ description: "Throttled due to request/token limits; typically retryable with backoff."
412
+ default_retryable: true
413
+ - id: "request_too_large"
414
+ description: "Payload too large (e.g., context too long, request too big)."
415
+ default_retryable: false
416
+ - id: "timeout"
417
+ description: "Request timed out or deadline exceeded."
418
+ default_retryable: true
419
+ - id: "conflict"
420
+ description: "Request conflict (often safe to retry depending on semantics)."
421
+ default_retryable: true
422
+ - id: "cancelled"
423
+ description: "Request was cancelled by client or upstream."
424
+ default_retryable: false
425
+ - id: "server_error"
426
+ description: "Transient server-side failure (5xx)."
427
+ default_retryable: true
428
+ - id: "overloaded"
429
+ description: "Service overloaded / temporarily unavailable."
430
+ default_retryable: true
431
+ - id: "other"
432
+ description: "Unknown or provider-specific classification."
433
+ default_retryable: false
434
+
435
+ # Common envelopes observed across providers (informative, not prescriptive).
436
+ # Runtimes SHOULD use provider mappings to extract fields.
437
+ envelope_patterns:
438
+ openai_style:
439
+ shape: '{ "error": { "message": string, "type": string, "code"?: string|null, "param"?: string|null } }'
440
+ extraction:
441
+ message: "error.message"
442
+ type: "error.type"
443
+ code: "error.code"
444
+ param: "error.param"
445
+ anthropic_style:
446
+ shape: '{ "type": "error", "error": { "type": string, "message": string }, "request_id": string }'
447
+ extraction:
448
+ message: "error.message"
449
+ type: "error.type"
450
+ request_id: "request_id"
451
+ google_cloud_style:
452
+ shape: '{ "error": { "code": integer, "message": string, "status": string, "errors"?: [], "details"?: [] } }'
453
+ extraction:
454
+ message: "error.message"
455
+ http_code: "error.code"
456
+ status: "error.status"
457
+ errors: "error.errors"
458
+ details: "error.details"
459
+
460
+ # Canonical guidance for classification and retries.
461
+ runtime_behavior:
462
+ - "Runtimes SHOULD map provider-specific errors into error_classes for logs/UI, while preserving raw fields."
463
+ - "Runtimes SHOULD treat HTTP 401/403 as authentication/permission_denied and avoid retries by default."
464
+ - "Runtimes SHOULD treat HTTP 400/422 as invalid_request and avoid retries by default."
465
+ - "On HTTP 429, runtimes SHOULD distinguish rate-limited vs quota-exhausted when possible (often via message/type/code)."
466
+ - "On HTTP 429, if a Retry-After header (or provider-declared retry_after) is present, runtimes SHOULD respect it."
467
+ - "On transient 5xx/overloaded errors, runtimes SHOULD retry with exponential backoff and a cap (see retry_policy)."
468
+ - "Runtimes SHOULD cap retries and add jitter to avoid retry storms."
469
+
470
+ # Provider field contract: how provider configs combine to express error handling
471
+ #
472
+ # Providers SHOULD declare:
473
+ # 1. `features.response_mapping.error.*`: JSON paths to extract error fields from responses.
474
+ # 2. `error_classification`: How HTTP status codes and provider error status strings map to error_classes.
475
+ # 3. `retry_policy`: Which errors are retryable and with what strategy (backoff, delays, caps).
476
+ # 4. `rate_limit_headers`: Which HTTP headers expose rate limit state (for monitoring/throttling).
477
+ #
478
+ # Runtime interpretation:
479
+ # - Extract error fields using `response_mapping.error.*` paths.
480
+ # - Classify errors using `error_classification` (HTTP status 鈫?error_class, or provider status 鈫?error_class).
481
+ # - Determine retry eligibility from `error_classification` output + `retry_policy.retry_on_http_status` + `retry_policy.retry_on_error_status`.
482
+ # - Apply `retry_policy` strategy (exponential_backoff) with declared delays/jitter/caps.
483
+ # - Monitor `rate_limit_headers` for rate limit state (remaining/reset times).
484
+ # - On 429, check `rate_limit_headers.retry_after` (if present) before applying backoff.
485
+ #
486
+ # Example flow:
487
+ # 1. HTTP 429 response received.
488
+ # 2. Extract `error.message` using `features.response_mapping.error.message_path`.
489
+ # 3. Classify via `error_classification.by_http_status["429"]` 鈫?"rate_limited" (or "quota_exhausted" if detectable).
490
+ # 4. Check `retry_policy.retry_on_http_status` includes 429 鈫?retryable.
491
+ # 5. Check `rate_limit_headers.retry_after` header value (if present) 鈫?use that delay; otherwise use `retry_policy.min_delay_ms` + exponential backoff.
492
+ # 6. Apply retry with `retry_policy.strategy`, `retry_policy.jitter`, up to `retry_policy.max_retries`.
493
+ provider_field_contract:
494
+ purpose: "Clarify how provider configs work together to enable consistent error handling and retry behavior."
495
+ fields_relationship:
496
+ - "response_mapping.error: Extracts raw error fields (message, code, type, etc.) from provider responses."
497
+ - "error_classification: Maps HTTP status / provider status strings to standard error_classes."
498
+ - "retry_policy: Declares which errors are retryable and how to retry (strategy, delays, caps)."
499
+ - "rate_limit_headers: Declares which HTTP headers expose rate limit state (for throttling/monitoring)."
500
+ runtime_integration:
501
+ - "Runtimes SHOULD use error_classification output to determine retry eligibility (default_retryable from error_class + explicit retry_policy overrides)."
502
+ - "Runtimes SHOULD combine error_classification with retry_policy.retry_on_http_status and retry_policy.retry_on_error_status to finalize retry decisions."
503
+ - "Runtimes SHOULD respect rate_limit_headers.retry_after when present on 429 responses."
504
+ - "Runtimes SHOULD preserve raw provider error fields (via response_mapping) alongside normalized error_class for debugging."
505
+
506
+ # =========================================================
507
+ # Provider Manifest Schema (v1.1+ Extensions)
508
+ # =========================================================
509
+ provider_manifest:
510
+ # Provider Identity & Core Metadata
511
+ identity:
512
+ id:
513
+ type: string
514
+ pattern: "^[a-z0-9][a-z0-9-_]{1,63}$"
515
+ required: true
516
+ description: "Stable provider identifier (lowercase, alphanumeric, hyphens, underscores)"
517
+ name:
518
+ type: string
519
+ required: false
520
+ description: "Human-readable provider name"
521
+ status:
522
+ type: string
523
+ values: ["stable", "beta", "deprecated"]
524
+ default: "stable"
525
+ description: "Provider status"
526
+ category:
527
+ type: string
528
+ values: ["ai_provider", "model_provider", "third_party_aggregator"]
529
+ required: false
530
+ description: "Provider category"
531
+ official_url:
532
+ type: string
533
+ format: uri
534
+ required: false
535
+ description: "Official documentation website"
536
+ support_contact:
537
+ type: string
538
+ format: uri
539
+ required: false
540
+ description: "Support or feedback link"
541
+
542
+ # Endpoint Definition (Required)
543
+ endpoint:
544
+ base_url:
545
+ type: string
546
+ format: uri
547
+ required: true
548
+ description: "Absolute base URL of the provider API"
549
+ protocol:
550
+ type: string
551
+ values: ["https", "http", "ws", "wss"]
552
+ default: "https"
553
+ description: "Protocol used for communication"
554
+ timeout_ms:
555
+ type: integer
556
+ min: 100
557
+ default: 10000
558
+ description: "Default timeout for requests in milliseconds"
559
+
560
+ # Availability and Health Checking (Required)
561
+ availability:
562
+ required:
563
+ type: boolean
564
+ required: true
565
+ description: "Whether the provider must be reachable at runtime startup"
566
+ regions:
567
+ type: array
568
+ min_items: 1
569
+ items:
570
+ type: string
571
+ values: ["cn", "global", "us", "eu"]
572
+ unique_items: true
573
+ required: true
574
+ description: "Geographic regions where the provider is available"
575
+ semantics:
576
+ cn: "Reachable from mainland China without special routing"
577
+ global: "Reachable from the general international Internet. This does NOT imply reachability from mainland China."
578
+ us: "Explicitly deployed in US region only"
579
+ eu: "Explicitly deployed in EU region only"
580
+ notes:
581
+ - "Multiple regions MAY be listed (e.g., ['cn', 'global']) to explicitly declare dual availability"
582
+ - "Runtimes MUST NOT assume implicit region coverage"
583
+ check:
584
+ method:
585
+ type: string
586
+ values: ["HEAD", "GET"]
587
+ required: true
588
+ description: "HTTP method for health check"
589
+ path:
590
+ type: string
591
+ pattern: "^/"
592
+ required: true
593
+ description: "Path relative to base_url for health check"
594
+ expected_status:
595
+ type: array
596
+ min_items: 1
597
+ items:
598
+ type: integer
599
+ min: 100
600
+ max: 599
601
+ required: true
602
+ description: "HTTP status codes that indicate healthy state"
603
+ timeout_ms:
604
+ type: integer
605
+ min: 100
606
+ default: 3000
607
+ description: "Timeout for health check in milliseconds"
608
+ notes:
609
+ - "Health checks SHOULD NOT require authentication"
610
+ - "Unauthorized responses (401) MAY be considered healthy if they indicate the service is reachable"
611
+
612
+ # Capabilities Declaration (Required)
613
+ capabilities:
614
+ streaming:
615
+ type: boolean
616
+ required: true
617
+ description: "Supports streaming responses"
618
+ tools:
619
+ type: boolean
620
+ required: true
621
+ description: "Supports tool / function calling"
622
+ vision:
623
+ type: boolean
624
+ required: true
625
+ description: "Supports image or multimodal inputs"
626
+ agentic:
627
+ type: boolean
628
+ default: false
629
+ description: "Supports agentic reasoning and multi-turn tool sequences"
630
+ reasoning:
631
+ type: boolean
632
+ default: false
633
+ description: "Supports extended reasoning / thinking blocks"
634
+ parallel_tools:
635
+ type: boolean
636
+ default: false
637
+ description: "Supports parallel tool invocations"