@ailib-official/ai-protocol 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/LICENSE +6 -0
  2. package/README.md +415 -0
  3. package/dist/index.json +8 -0
  4. package/dist/v1/models/ai21.json +140 -0
  5. package/dist/v1/models/baichuan.json +138 -0
  6. package/dist/v1/models/cerebras.json +147 -0
  7. package/dist/v1/models/claude.json +114 -0
  8. package/dist/v1/models/cohere.json +235 -0
  9. package/dist/v1/models/deepinfra.json +43 -0
  10. package/dist/v1/models/deepseek-chat.json +55 -0
  11. package/dist/v1/models/doubao.json +197 -0
  12. package/dist/v1/models/ernie.json +223 -0
  13. package/dist/v1/models/fireworks.json +222 -0
  14. package/dist/v1/models/gemini.json +58 -0
  15. package/dist/v1/models/gpt.json +166 -0
  16. package/dist/v1/models/grok.json +138 -0
  17. package/dist/v1/models/huggingface.json +183 -0
  18. package/dist/v1/models/hunyuan.json +255 -0
  19. package/dist/v1/models/jina.json +139 -0
  20. package/dist/v1/models/lepton.json +188 -0
  21. package/dist/v1/models/llama.json +143 -0
  22. package/dist/v1/models/minimax.json +194 -0
  23. package/dist/v1/models/mistral.json +177 -0
  24. package/dist/v1/models/moonshot.json +144 -0
  25. package/dist/v1/models/nvidia.json +212 -0
  26. package/dist/v1/models/palmyra.json +103 -0
  27. package/dist/v1/models/perplexity.json +143 -0
  28. package/dist/v1/models/qwen.json +49 -0
  29. package/dist/v1/models/replicate.json +206 -0
  30. package/dist/v1/models/sensenova.json +144 -0
  31. package/dist/v1/models/siliconflow.json +44 -0
  32. package/dist/v1/models/spark.json +173 -0
  33. package/dist/v1/models/stable-diffusion.json +161 -0
  34. package/dist/v1/models/tiangong.json +169 -0
  35. package/dist/v1/models/together.json +43 -0
  36. package/dist/v1/models/yi.json +199 -0
  37. package/dist/v1/models/zhipu.json +250 -0
  38. package/dist/v1/providers/ai21.json +215 -0
  39. package/dist/v1/providers/anthropic.json +253 -0
  40. package/dist/v1/providers/anyscale.json +115 -0
  41. package/dist/v1/providers/azure.json +188 -0
  42. package/dist/v1/providers/baichuan.json +205 -0
  43. package/dist/v1/providers/baidu.json +205 -0
  44. package/dist/v1/providers/cerebras.json +216 -0
  45. package/dist/v1/providers/cohere.json +209 -0
  46. package/dist/v1/providers/deepinfra.json +196 -0
  47. package/dist/v1/providers/deepseek.json +221 -0
  48. package/dist/v1/providers/doubao.json +209 -0
  49. package/dist/v1/providers/fireworks.json +227 -0
  50. package/dist/v1/providers/gemini.json +279 -0
  51. package/dist/v1/providers/groq.json +218 -0
  52. package/dist/v1/providers/huggingface.json +181 -0
  53. package/dist/v1/providers/hunyuan.json +198 -0
  54. package/dist/v1/providers/jina.json +202 -0
  55. package/dist/v1/providers/lepton.json +221 -0
  56. package/dist/v1/providers/minimax.json +209 -0
  57. package/dist/v1/providers/mistral.json +214 -0
  58. package/dist/v1/providers/moonshot.json +214 -0
  59. package/dist/v1/providers/nvidia.json +213 -0
  60. package/dist/v1/providers/openai.json +267 -0
  61. package/dist/v1/providers/openrouter.json +220 -0
  62. package/dist/v1/providers/perplexity.json +172 -0
  63. package/dist/v1/providers/qwen.json +231 -0
  64. package/dist/v1/providers/replicate.json +192 -0
  65. package/dist/v1/providers/sensenova.json +185 -0
  66. package/dist/v1/providers/siliconflow.json +197 -0
  67. package/dist/v1/providers/spark.json +204 -0
  68. package/dist/v1/providers/stability.json +221 -0
  69. package/dist/v1/providers/tiangong.json +207 -0
  70. package/dist/v1/providers/together.json +196 -0
  71. package/dist/v1/providers/writer.json +253 -0
  72. package/dist/v1/providers/xai.json +238 -0
  73. package/dist/v1/providers/yi.json +205 -0
  74. package/dist/v1/providers/zhipu.json +208 -0
  75. package/dist/v1/spec.json +783 -0
  76. package/dist/v2/providers/anthropic.json +372 -0
  77. package/dist/v2/providers/cohere.json +109 -0
  78. package/dist/v2/providers/deepseek.json +272 -0
  79. package/dist/v2/providers/doubao.json +260 -0
  80. package/dist/v2/providers/google.json +388 -0
  81. package/dist/v2/providers/jina.json +71 -0
  82. package/dist/v2/providers/moonshot.json +284 -0
  83. package/dist/v2/providers/openai.json +419 -0
  84. package/dist/v2/providers/qwen.json +274 -0
  85. package/dist/v2/providers/zhipu.json +257 -0
  86. package/dist/v2-alpha/providers/anthropic.json +182 -0
  87. package/dist/v2-alpha/providers/gemini.json +187 -0
  88. package/dist/v2-alpha/providers/openai.json +215 -0
  89. package/dist/v2-alpha/spec.json +644 -0
  90. package/package.json +61 -0
  91. package/schemas/spec.json +186 -0
  92. package/schemas/v1.json +1116 -0
  93. package/schemas/v2/availability.json +66 -0
  94. package/schemas/v2/capabilities.json +182 -0
  95. package/schemas/v2/capability-profile.json +174 -0
  96. package/schemas/v2/computer-use.json +222 -0
  97. package/schemas/v2/context-policy.json +148 -0
  98. package/schemas/v2/endpoint.json +2 -0
  99. package/schemas/v2/error-codes.yaml +225 -0
  100. package/schemas/v2/errors.json +250 -0
  101. package/schemas/v2/execution-metadata.json +59 -0
  102. package/schemas/v2/mcp.json +225 -0
  103. package/schemas/v2/message-roles.json +10 -0
  104. package/schemas/v2/multimodal.json +297 -0
  105. package/schemas/v2/pricing.json +14 -0
  106. package/schemas/v2/provider-contract.json +317 -0
  107. package/schemas/v2/provider.json +203 -0
  108. package/schemas/v2/regions.json +14 -0
  109. package/v1/models/ai21.yaml +97 -0
  110. package/v1/models/baichuan.yaml +95 -0
  111. package/v1/models/cerebras.yaml +99 -0
  112. package/v1/models/claude.yaml +59 -0
  113. package/v1/models/cohere.yaml +165 -0
  114. package/v1/models/deepinfra.yaml +29 -0
  115. package/v1/models/deepseek-chat.yaml +32 -0
  116. package/v1/models/doubao.yaml +129 -0
  117. package/v1/models/ernie.yaml +146 -0
  118. package/v1/models/fireworks.yaml +145 -0
  119. package/v1/models/gemini.yaml +32 -0
  120. package/v1/models/gpt.yaml +90 -0
  121. package/v1/models/grok.yaml +74 -0
  122. package/v1/models/huggingface.yaml +110 -0
  123. package/v1/models/hunyuan.yaml +164 -0
  124. package/v1/models/jina.yaml +98 -0
  125. package/v1/models/lepton.yaml +130 -0
  126. package/v1/models/llama.yaml +91 -0
  127. package/v1/models/minimax.yaml +132 -0
  128. package/v1/models/mistral.yaml +100 -0
  129. package/v1/models/moonshot.yaml +97 -0
  130. package/v1/models/nvidia.yaml +118 -0
  131. package/v1/models/palmyra.yaml +59 -0
  132. package/v1/models/perplexity.yaml +97 -0
  133. package/v1/models/qwen.yaml +27 -0
  134. package/v1/models/replicate.yaml +136 -0
  135. package/v1/models/sensenova.yaml +97 -0
  136. package/v1/models/siliconflow.yaml +29 -0
  137. package/v1/models/spark.yaml +114 -0
  138. package/v1/models/stable-diffusion.yaml +113 -0
  139. package/v1/models/tiangong.yaml +114 -0
  140. package/v1/models/together.yaml +29 -0
  141. package/v1/models/yi.yaml +132 -0
  142. package/v1/models/zhipu.yaml +163 -0
  143. package/v1/providers/ai21.yaml +176 -0
  144. package/v1/providers/anthropic.yaml +209 -0
  145. package/v1/providers/anyscale.yaml +106 -0
  146. package/v1/providers/azure.yaml +155 -0
  147. package/v1/providers/baichuan.yaml +168 -0
  148. package/v1/providers/baidu.yaml +173 -0
  149. package/v1/providers/cerebras.yaml +178 -0
  150. package/v1/providers/cohere.yaml +175 -0
  151. package/v1/providers/deepinfra.yaml +156 -0
  152. package/v1/providers/deepseek.yaml +189 -0
  153. package/v1/providers/doubao.yaml +172 -0
  154. package/v1/providers/fireworks.yaml +187 -0
  155. package/v1/providers/gemini.yaml +229 -0
  156. package/v1/providers/groq.yaml +181 -0
  157. package/v1/providers/huggingface.yaml +157 -0
  158. package/v1/providers/hunyuan.yaml +162 -0
  159. package/v1/providers/jina.yaml +171 -0
  160. package/v1/providers/lepton.yaml +183 -0
  161. package/v1/providers/minimax.yaml +172 -0
  162. package/v1/providers/mistral.yaml +186 -0
  163. package/v1/providers/moonshot.yaml +176 -0
  164. package/v1/providers/nvidia.yaml +172 -0
  165. package/v1/providers/openai.yaml +224 -0
  166. package/v1/providers/openrouter.yaml +181 -0
  167. package/v1/providers/perplexity.yaml +144 -0
  168. package/v1/providers/qwen.yaml +192 -0
  169. package/v1/providers/replicate.yaml +159 -0
  170. package/v1/providers/sensenova.yaml +153 -0
  171. package/v1/providers/siliconflow.yaml +156 -0
  172. package/v1/providers/spark.yaml +167 -0
  173. package/v1/providers/stability.yaml +185 -0
  174. package/v1/providers/tiangong.yaml +170 -0
  175. package/v1/providers/together.yaml +156 -0
  176. package/v1/providers/writer.yaml +212 -0
  177. package/v1/providers/xai.yaml +206 -0
  178. package/v1/providers/yi.yaml +168 -0
  179. package/v1/providers/zhipu.yaml +171 -0
  180. package/v1/spec.yaml +637 -0
  181. package/v2/providers/anthropic.yaml +244 -0
  182. package/v2/providers/cohere.yaml +75 -0
  183. package/v2/providers/deepseek.yaml +176 -0
  184. package/v2/providers/doubao.yaml +169 -0
  185. package/v2/providers/google.yaml +245 -0
  186. package/v2/providers/jina.yaml +55 -0
  187. package/v2/providers/moonshot.yaml +186 -0
  188. package/v2/providers/openai.yaml +266 -0
  189. package/v2/providers/qwen.yaml +174 -0
  190. package/v2/providers/zhipu.yaml +176 -0
  191. package/v2-alpha/providers/anthropic.yaml +134 -0
  192. package/v2-alpha/providers/gemini.yaml +144 -0
  193. package/v2-alpha/providers/openai.yaml +154 -0
  194. package/v2-alpha/spec.yaml +512 -0
@@ -0,0 +1,157 @@
1
+ $schema: "https://raw.githubusercontent.com/ailib-official/ai-protocol/main/schemas/v1.json"
2
+
3
+ id: huggingface
4
+ protocol_version: "1.5"
5
+
6
+ name: Hugging Face Inference
7
+ version: "v1"
8
+ status: stable
9
+ category: model_provider
10
+ official_url: "https://huggingface.co/docs/api-inference"
11
+ support_contact: "https://huggingface.co/support"
12
+
13
+ # Structured endpoint definition (v1.1+ extension)
14
+ endpoint:
15
+ base_url: "https://api-inference.huggingface.co/models"
16
+ protocol: https
17
+ timeout_ms: 60000
18
+
19
+ auth:
20
+ type: bearer
21
+ token_env: "HUGGINGFACE_API_KEY"
22
+ payload_format: "inference_api"
23
+
24
+ # Hugging Face supports multiple inference modes
25
+ api_families: ["text_generation", "image_generation", "embeddings", "audio"]
26
+ default_api_family: "text_generation"
27
+ endpoints:
28
+ chat:
29
+ path: "/{model_id}"
30
+ method: "POST"
31
+ adapter: "huggingface"
32
+ embeddings:
33
+ path: "/{model_id}"
34
+ method: "POST"
35
+ adapter: "huggingface"
36
+ image:
37
+ path: "/{model_id}"
38
+ method: "POST"
39
+ adapter: "huggingface"
40
+
41
+ # V1.5 Service Endpoints
42
+ services:
43
+ list_models:
44
+ path: "https://huggingface.co/api/models"
45
+ method: "GET"
46
+ response_binding: "null"
47
+
48
+ # Termination reason normalization
49
+ termination:
50
+ source_field: "finish_reason"
51
+ mapping:
52
+ stop: "end_turn"
53
+ length: "max_tokens"
54
+ notes:
55
+ - "finish_reason varies by model"
56
+ - "not all models support explicit finish_reason"
57
+
58
+ # Tool invocation normalization
59
+ tooling:
60
+ source_model: "unknown"
61
+ notes:
62
+ - "Hugging Face serverless inference does not natively support tool calling"
63
+ - "Deployed custom models may implement tool calling via custom logic"
64
+
65
+ # Retry policy
66
+ retry_policy:
67
+ strategy: "exponential_backoff"
68
+ max_retries: 3
69
+ min_delay_ms: 1000
70
+ max_delay_ms: 10000
71
+ jitter: "full"
72
+ retry_on_http_status: [429, 500, 502, 503]
73
+ notes:
74
+ - "Hugging Face free tier has cold starts; retry recommended"
75
+ - "503 is common during model loading"
76
+
77
+ # Error classification hints
78
+ error_classification:
79
+ by_http_status:
80
+ "400": "invalid_request"
81
+ "401": "authentication"
82
+ "403": "permission_denied"
83
+ "404": "not_found"
84
+ "429": "rate_limited"
85
+ "500": "server_error"
86
+ "503": "overloaded"
87
+ notes:
88
+ - "503 often indicates model is loading (cold start)"
89
+ - "404 may indicate model not found or removed"
90
+
91
+ parameter_mappings:
92
+ temperature: "temperature"
93
+ max_tokens: "max_new_tokens"
94
+ stream: "stream"
95
+ top_p: "top_p"
96
+ stop_sequences: "stop"
97
+ top_k: "top_k"
98
+ repetition_penalty: "repetition_penalty"
99
+
100
+ response_format: "huggingface_style"
101
+ response_paths:
102
+ content: "output_text"
103
+ generated_text: "generated_text"
104
+ usage: "details"
105
+
106
+ streaming:
107
+ event_format: "inference_api"
108
+ decoder:
109
+ format: "sse"
110
+ delimiter: "\n"
111
+ prefix: ""
112
+ done_signal: ""
113
+ frame_selector: "exists($.token) || exists($.generated_text)"
114
+ event_map:
115
+ - match: "exists($.token)"
116
+ emit: "PartialContentDelta"
117
+ fields:
118
+ content: "$.token.text"
119
+ token_id: "$.token.id"
120
+ - match: "exists($.generated_text)"
121
+ emit: "FinalCandidate"
122
+ fields:
123
+ content: "$.generated_text"
124
+ stop_condition: "exists($.finish_reason) || length > 0 && !exists($.token)"
125
+ extra_metadata_path: "$.details"
126
+ content_path: "$.output_text"
127
+
128
+ capabilities:
129
+ streaming: true
130
+ tools: false
131
+ vision: true
132
+ agentic: false
133
+ parallel_tools: false
134
+ reasoning: false
135
+
136
+ experimental_features:
137
+ - "model_selection"
138
+ - "serverless_inference"
139
+ - "custom_deployment"
140
+
141
+ # Connection vars for dynamic model selection
142
+ connection_vars:
143
+ model_id: "model"
144
+
145
+ # Availability and health checking (v1.1+ extension)
146
+ availability:
147
+ required: false
148
+ regions:
149
+ - global
150
+ check:
151
+ method: GET
152
+ path: "/meta-llama/Llama-2-7b-chat-hf"
153
+ expected_status: [200]
154
+ timeout_ms: 5000
155
+ notes:
156
+ - "Check against a popular model (Llama-2) to verify service availability"
157
+ - "Model-specific endpoint: /models/{model_id}"
@@ -0,0 +1,162 @@
1
+ $schema: "https://raw.githubusercontent.com/ailib-official/ai-protocol/main/schemas/v1.json"
2
+
3
+ id: hunyuan
4
+ protocol_version: "1.5"
5
+
6
+ name: 腾讯混元 (Tencent Hunyuan)
7
+ version: "v1"
8
+ status: stable
9
+ category: ai_provider
10
+ official_url: "https://cloud.tencent.com/document/product/1729"
11
+ support_contact: "https://cloud.tencent.com/online-service"
12
+
13
+ endpoint:
14
+ base_url: "https://hunyuan.tencentcloudapi.com"
15
+ protocol: https
16
+ timeout_ms: 60000
17
+
18
+ auth:
19
+ type: bearer
20
+ token_env: "HUNYUAN_API_KEY"
21
+
22
+ payload_format: "openai_style"
23
+
24
+ # Hunyuan supports OpenAI-compatible API
25
+ api_families: ["chat_completions"]
26
+ default_api_family: "chat_completions"
27
+
28
+ endpoints:
29
+ chat:
30
+ path: "/v1/chat/completions"
31
+ method: "POST"
32
+ adapter: "openai"
33
+
34
+ termination:
35
+ source_field: "finish_reason"
36
+ mapping:
37
+ stop: "end_turn"
38
+ length: "max_tokens"
39
+ tool_calls: "tool_use"
40
+ sensitive: "refusal"
41
+ notes:
42
+ - "OpenAI-compatible finish_reason."
43
+ - "混元大模型 (Hunyuan) is Tencent's flagship LLM."
44
+
45
+ tooling:
46
+ source_model: "openai_tool_calls"
47
+ tool_use:
48
+ id_path: "id"
49
+ name_path: "function.name"
50
+ input_path: "function.arguments"
51
+ input_format: "json_string"
52
+ notes:
53
+ - "Supports OpenAI-compatible function calling."
54
+
55
+ retry_policy:
56
+ strategy: "exponential_backoff"
57
+ max_retries: 3
58
+ min_delay_ms: 1000
59
+ max_delay_ms: 30000
60
+ jitter: "full"
61
+ retry_on_http_status: [429, 500, 502, 503]
62
+
63
+ error_classification:
64
+ by_http_status:
65
+ "400": "invalid_request"
66
+ "401": "authentication"
67
+ "403": "permission_denied"
68
+ "404": "not_found"
69
+ "429": "rate_limited"
70
+ "500": "server_error"
71
+ "503": "overloaded"
72
+
73
+ parameter_mappings:
74
+ temperature: "temperature"
75
+ max_tokens: "max_tokens"
76
+ stream: "stream"
77
+ top_p: "top_p"
78
+ tools: "tools"
79
+ tool_choice: "tool_choice"
80
+
81
+ response_format: "openai_style"
82
+
83
+ response_paths:
84
+ content: "choices[0].message.content"
85
+ tool_calls: "choices[0].message.tool_calls"
86
+ usage: "usage"
87
+ finish_reason: "choices[0].finish_reason"
88
+
89
+ streaming:
90
+ event_format: "data_lines"
91
+ decoder:
92
+ format: "sse"
93
+ delimiter: "\n\n"
94
+ prefix: "data: "
95
+ done_signal: "[DONE]"
96
+ content_path: "choices[0].delta.content"
97
+ tool_call_path: "choices[0].delta.tool_calls"
98
+ usage_path: "usage"
99
+ frame_selector: "exists($.choices) || exists($.error)"
100
+ event_map:
101
+ - match: "exists($.choices[*].delta.content)"
102
+ emit: "PartialContentDelta"
103
+ fields:
104
+ content: "$.choices[*].delta.content"
105
+ - match: "exists($.choices[*].delta.tool_calls[*].function.name)"
106
+ emit: "ToolCallStarted"
107
+ fields:
108
+ tool_call_id: "$.choices[*].delta.tool_calls[*].id"
109
+ tool_name: "$.choices[*].delta.tool_calls[*].function.name"
110
+ index: "$.choices[*].delta.tool_calls[*].index"
111
+ - match: "exists($.choices[*].delta.tool_calls[*].function.arguments)"
112
+ emit: "PartialToolCall"
113
+ fields:
114
+ arguments: "$.choices[*].delta.tool_calls[*].function.arguments"
115
+ index: "$.choices[*].delta.tool_calls[*].index"
116
+ - match: "exists($.usage)"
117
+ emit: "Metadata"
118
+ fields:
119
+ usage: "$.usage"
120
+ - match: "exists($.choices[*].finish_reason)"
121
+ emit: "FinalCandidate"
122
+ fields:
123
+ finish_reason: "$.choices[*].finish_reason"
124
+ stop_condition: "$.choices[0].finish_reason != null"
125
+
126
+ features:
127
+ multi_candidate:
128
+ support_type: "native"
129
+ param_name: "n"
130
+ response_mapping:
131
+ tool_calls:
132
+ path: "choices[0].message.tool_calls"
133
+ fields:
134
+ id: "id"
135
+ name: "function.name"
136
+ args: "function.arguments"
137
+ error:
138
+ message_path: "error.message"
139
+ code_path: "error.code"
140
+
141
+ capabilities:
142
+ streaming: true
143
+ tools: true
144
+ vision: true
145
+ agentic: true
146
+ parallel_tools: true
147
+ reasoning: true
148
+
149
+ availability:
150
+ required: false
151
+ regions:
152
+ - cn
153
+ check:
154
+ method: GET
155
+ path: "/v1/models"
156
+ expected_status: [200, 401]
157
+ timeout_ms: 5000
158
+
159
+ experimental_features:
160
+ - "search_enhancement"
161
+ - "image_understanding"
162
+ - "role_play"
@@ -0,0 +1,171 @@
1
+ $schema: "https://raw.githubusercontent.com/ailib-official/ai-protocol/main/schemas/v1.json"
2
+
3
+ id: jina
4
+ protocol_version: "1.5"
5
+
6
+ name: Jina AI
7
+ version: "v1"
8
+ status: stable
9
+ category: ai_provider
10
+ official_url: "https://jina.ai/docs"
11
+ support_contact: "https://jina.ai/contact"
12
+
13
+ # Structured endpoint definition (v1.1+ extension)
14
+ endpoint:
15
+ base_url: "https://api.jina.ai"
16
+ protocol: https
17
+ timeout_ms: 30000
18
+
19
+ auth:
20
+ type: bearer
21
+ token_env: "JINA_API_KEY"
22
+ payload_format: "jina_api"
23
+
24
+ # Jina AI supports embeddings and search APIs
25
+ api_families: ["embeddings", "rerank", "reader", "search"]
26
+ default_api_family: "embeddings"
27
+ endpoints:
28
+ embeddings:
29
+ path: "/v1/embeddings"
30
+ method: "POST"
31
+ adapter: "openai"
32
+ rerank:
33
+ path: "/v1/rerank"
34
+ method: "POST"
35
+ adapter: "jina"
36
+ reader:
37
+ path: "/v1/reader"
38
+ method: "POST"
39
+ adapter: "jina"
40
+
41
+ # V1.5 Service Endpoints
42
+ services:
43
+ list_models:
44
+ path: "/v1/models"
45
+ method: "GET"
46
+ response_binding: "data"
47
+
48
+ # Termination reason normalization
49
+ termination:
50
+ source_field: "finish_reason"
51
+ mapping:
52
+ stop: "end_turn"
53
+ length: "max_tokens"
54
+ notes:
55
+ - "Jina Reader uses finish_reason for completion"
56
+ - "Embeddings API does not use finish_reason"
57
+
58
+ # Tool invocation normalization
59
+ tooling:
60
+ source_model: "unknown"
61
+ notes:
62
+ - "Jina AI is primarily embeddings and search"
63
+ - "Does not support traditional tool calling"
64
+
65
+ # Retry policy
66
+ retry_policy:
67
+ strategy: "exponential_backoff"
68
+ max_retries: 3
69
+ max_delay_ms: 30000
70
+ min_delay_ms: 1000
71
+ jitter: "full"
72
+ retry_on_http_status: [429, 500, 502, 503]
73
+ notes:
74
+ - "Jina API is generally fast but may experience spikes"
75
+ - "Exponential backoff recommended for rate limits"
76
+
77
+ # Error classification hints
78
+ error_classification:
79
+ by_http_status:
80
+ "400": "invalid_request"
81
+ "401": "authentication"
82
+ "403": "permission_denied"
83
+ "404": "not_found"
84
+ "429": "rate_limited"
85
+ "500": "server_error"
86
+ "502": "overloaded"
87
+ "503": "overloaded"
88
+ notes:
89
+ - "Jina returns clear error messages in JSON format"
90
+ - "Rate limit info included in error responses"
91
+
92
+ parameter_mappings:
93
+ model: "model"
94
+ input: "input"
95
+ encoding_format: "encoding_format"
96
+ dimensions: "dimensions"
97
+ top_n: "top_n"
98
+ query: "query"
99
+ documents: "documents"
100
+ return_documents: "return_documents"
101
+ url: "url"
102
+ question: "question"
103
+
104
+ response_format: "jina_style"
105
+ response_paths:
106
+ content: "output"
107
+ embedding: "data.embedding"
108
+ data: "data"
109
+ results: "results"
110
+
111
+ streaming:
112
+ event_format: "jina_stream"
113
+ decoder:
114
+ format: "sse"
115
+ delimiter: "\n\n"
116
+ prefix: "data: "
117
+ done_signal: "[DONE]"
118
+ frame_selector: "exists($.delta) || exists($.embedding) || exists($.results)"
119
+ event_map:
120
+ - match: "exists($.delta)"
121
+ emit: "PartialContentDelta"
122
+ fields:
123
+ content: "$.delta.content"
124
+ - match: "exists($.embedding)"
125
+ emit: "Metadata"
126
+ fields:
127
+ embedding: "$.embedding"
128
+ index: "$.index"
129
+ - match: "exists($.results)"
130
+ emit: "FinalCandidate"
131
+ fields:
132
+ results: "$.results"
133
+ stop_condition: "exists($.finish_reason) || exists($.results)"
134
+ extra_metadata_path: "$.usage"
135
+
136
+ features:
137
+ multi_candidate:
138
+ support_type: "native"
139
+ param_name: "top_n"
140
+ response_mapping:
141
+ error:
142
+ message_path: "message"
143
+ code_path: "code"
144
+ type_path: "error"
145
+
146
+ capabilities:
147
+ streaming: false
148
+ tools: false
149
+ vision: false
150
+ agentic: false
151
+ parallel_tools: false
152
+ reasoning: false
153
+
154
+ experimental_features:
155
+ - "rerank_api"
156
+ - "reader_api"
157
+ - "multimodal_embeddings"
158
+ - "code_embeddings"
159
+
160
+ # Availability and health checking (v1.1+ extension)
161
+ availability:
162
+ required: false
163
+ regions:
164
+ - global
165
+ check:
166
+ method: GET
167
+ path: "/v1/models"
168
+ expected_status: [200]
169
+ timeout_ms: 3000
170
+ notes:
171
+ - "Check v1/models endpoint to verify service health"
@@ -0,0 +1,183 @@
1
+ $schema: "https://raw.githubusercontent.com/ailib-official/ai-protocol/main/schemas/v1.json"
2
+
3
+ id: lepton
4
+ protocol_version: "1.5"
5
+
6
+ name: Lepton AI
7
+ version: "v1"
8
+ status: stable
9
+ category: ai_provider
10
+ official_url: "https://www.lepton.ai/docs"
11
+ support_contact: "https://www.lepton.ai/contact"
12
+
13
+ endpoint:
14
+ base_url: "https://llama3-1-8b.lepton.run/api/v1"
15
+ protocol: https
16
+ timeout_ms: 30000
17
+
18
+ # Dynamic base URL template for different models
19
+ base_url_template: "https://{model_id}.lepton.run/api/v1"
20
+
21
+ auth:
22
+ type: bearer
23
+ token_env: "LEPTON_API_TOKEN"
24
+
25
+ payload_format: "openai_style"
26
+
27
+ # OpenAI-compatible API
28
+ api_families: ["chat_completions", "completions"]
29
+ default_api_family: "chat_completions"
30
+
31
+ endpoints:
32
+ chat:
33
+ path: "/chat/completions"
34
+ method: "POST"
35
+ adapter: "openai"
36
+ completions:
37
+ path: "/completions"
38
+ method: "POST"
39
+ adapter: "openai"
40
+
41
+ services:
42
+ list_models:
43
+ path: "/models"
44
+ method: "GET"
45
+ response_binding: "data"
46
+
47
+ termination:
48
+ source_field: "finish_reason"
49
+ mapping:
50
+ stop: "end_turn"
51
+ length: "max_tokens"
52
+ tool_calls: "tool_use"
53
+ notes:
54
+ - "OpenAI-compatible finish_reason."
55
+
56
+ tooling:
57
+ source_model: "openai_tool_calls"
58
+ tool_use:
59
+ id_path: "id"
60
+ name_path: "function.name"
61
+ input_path: "function.arguments"
62
+ input_format: "json_string"
63
+ notes:
64
+ - "OpenAI-compatible tool calling for supported models."
65
+
66
+ retry_policy:
67
+ strategy: "exponential_backoff"
68
+ max_retries: 3
69
+ min_delay_ms: 1000
70
+ max_delay_ms: 30000
71
+ jitter: "full"
72
+ retry_on_http_status: [429, 500, 502, 503]
73
+ notes:
74
+ - "Standard exponential backoff for rate limits and transient errors."
75
+
76
+ error_classification:
77
+ by_http_status:
78
+ "400": "invalid_request"
79
+ "401": "authentication"
80
+ "403": "permission_denied"
81
+ "404": "not_found"
82
+ "429": "rate_limited"
83
+ "500": "server_error"
84
+ "502": "server_error"
85
+ "503": "overloaded"
86
+
87
+ parameter_mappings:
88
+ temperature: "temperature"
89
+ max_tokens: "max_tokens"
90
+ stream: "stream"
91
+ top_p: "top_p"
92
+ stop_sequences: "stop"
93
+ frequency_penalty: "frequency_penalty"
94
+ presence_penalty: "presence_penalty"
95
+ tools: "tools"
96
+ tool_choice: "tool_choice"
97
+ seed: "seed"
98
+ top_k: "top_k"
99
+
100
+ response_format: "openai_style"
101
+
102
+ response_paths:
103
+ content: "choices[0].message.content"
104
+ tool_calls: "choices[0].message.tool_calls"
105
+ usage: "usage"
106
+ finish_reason: "choices[0].finish_reason"
107
+
108
+ streaming:
109
+ event_format: "data_lines"
110
+ decoder:
111
+ format: "sse"
112
+ delimiter: "\n\n"
113
+ prefix: "data: "
114
+ done_signal: "[DONE]"
115
+ content_path: "choices[0].delta.content"
116
+ tool_call_path: "choices[0].delta.tool_calls"
117
+ usage_path: "usage"
118
+ frame_selector: "exists($.choices) || exists($.error)"
119
+ event_map:
120
+ - match: "exists($.choices[*].delta.content)"
121
+ emit: "PartialContentDelta"
122
+ fields:
123
+ content: "$.choices[*].delta.content"
124
+ - match: "exists($.choices[*].delta.tool_calls[*].function.name)"
125
+ emit: "ToolCallStarted"
126
+ fields:
127
+ tool_call_id: "$.choices[*].delta.tool_calls[*].id"
128
+ tool_name: "$.choices[*].delta.tool_calls[*].function.name"
129
+ index: "$.choices[*].delta.tool_calls[*].index"
130
+ - match: "exists($.choices[*].delta.tool_calls[*].function.arguments)"
131
+ emit: "PartialToolCall"
132
+ fields:
133
+ arguments: "$.choices[*].delta.tool_calls[*].function.arguments"
134
+ index: "$.choices[*].delta.tool_calls[*].index"
135
+ - match: "exists($.usage)"
136
+ emit: "Metadata"
137
+ fields:
138
+ usage: "$.usage"
139
+ - match: "exists($.choices[*].finish_reason)"
140
+ emit: "FinalCandidate"
141
+ fields:
142
+ finish_reason: "$.choices[*].finish_reason"
143
+ candidate_index: "$.choices[*].index"
144
+ stop_condition: "$.choices[0].finish_reason != null"
145
+
146
+ features:
147
+ multi_candidate:
148
+ support_type: "native"
149
+ param_name: "n"
150
+ response_mapping:
151
+ tool_calls:
152
+ path: "choices[0].message.tool_calls"
153
+ fields:
154
+ id: "id"
155
+ name: "function.name"
156
+ args: "function.arguments"
157
+ error:
158
+ message_path: "error.message"
159
+ code_path: "error.code"
160
+ type_path: "error.type"
161
+
162
+ capabilities:
163
+ streaming: true
164
+ tools: true
165
+ vision: true
166
+ agentic: true
167
+ parallel_tools: true
168
+ reasoning: false
169
+
170
+ availability:
171
+ required: false
172
+ regions:
173
+ - global
174
+ check:
175
+ method: GET
176
+ path: "/models"
177
+ expected_status: [200, 401]
178
+ timeout_ms: 5000
179
+
180
+ experimental_features:
181
+ - "photon_deployment"
182
+ - "auto_scaling"
183
+ - "custom_models"