@ailib-official/ai-protocol 0.8.4 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +10 -2
  2. package/dist/v1/models/deepseek-chat.json +4 -4
  3. package/dist/v1/models/gemini.json +59 -1
  4. package/dist/v1/providers/gemini.json +11 -1
  5. package/dist/v2/contracts/anthropic-messages.contract.json +62 -0
  6. package/dist/v2/contracts/gemini-generate.contract.json +59 -0
  7. package/dist/v2/providers/anthropic.json +42 -10
  8. package/dist/v2/providers/cohere.json +21 -0
  9. package/dist/v2/providers/deepseek.json +121 -47
  10. package/dist/v2/providers/doubao.json +23 -8
  11. package/dist/v2/providers/google.json +39 -0
  12. package/dist/v2/providers/groq.json +223 -0
  13. package/dist/v2/providers/jina.json +15 -0
  14. package/dist/v2/providers/moonshot.json +23 -8
  15. package/dist/v2/providers/nvidia.json +520 -0
  16. package/dist/v2/providers/openai.json +39 -11
  17. package/dist/v2/providers/qwen.json +25 -9
  18. package/dist/v2/providers/zhipu.json +114 -22
  19. package/package.json +12 -3
  20. package/schemas/v1.json +1 -1
  21. package/schemas/v2/availability.json +12 -0
  22. package/schemas/v2/capabilities.json +4 -0
  23. package/schemas/v2/error-codes.yaml +5 -0
  24. package/schemas/v2/metadata-model-entry.json +57 -0
  25. package/schemas/v2/pack.json +145 -0
  26. package/schemas/v2/provider-contract.json +45 -0
  27. package/schemas/v2/provider.json +15 -2
  28. package/schemas/v2/tool-calling.json +61 -0
  29. package/v1/models/deepseek-chat.yaml +4 -4
  30. package/v1/models/gemini.yaml +31 -1
  31. package/v1/providers/gemini.yaml +10 -2
  32. package/v2/contracts/anthropic-messages.contract.yaml +55 -0
  33. package/v2/contracts/gemini-generate.contract.yaml +52 -0
  34. package/v2/packs/examples/README.md +9 -0
  35. package/v2/packs/examples/deepseek-economy-pack.json +43 -0
  36. package/v2/providers/anthropic.yaml +34 -13
  37. package/v2/providers/cohere.yaml +16 -3
  38. package/v2/providers/deepseek.yaml +77 -33
  39. package/v2/providers/doubao.yaml +18 -8
  40. package/v2/providers/google.yaml +32 -4
  41. package/v2/providers/groq.yaml +159 -0
  42. package/v2/providers/jina.yaml +10 -0
  43. package/v2/providers/moonshot.yaml +20 -12
  44. package/v2/providers/nvidia.yaml +405 -0
  45. package/v2/providers/openai.yaml +33 -11
  46. package/v2/providers/qwen.yaml +20 -9
  47. package/v2/providers/zhipu.yaml +70 -23
@@ -0,0 +1,61 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://raw.githubusercontent.com/ailib-official/ai-protocol/main/schemas/v2/tool-calling.json",
4
+ "title": "Tool Calling Capability Declaration",
5
+ "description": "Describes native function calling support and text fallback configuration for a provider.",
6
+ "type": "object",
7
+ "properties": {
8
+ "native": {
9
+ "type": "object",
10
+ "properties": {
11
+ "supported": { "type": "boolean", "default": true },
12
+ "reliability": {
13
+ "type": "string",
14
+ "enum": ["full", "partial", "unreliable"],
15
+ "default": "full"
16
+ },
17
+ "parallel": { "type": "boolean" },
18
+ "streaming": { "type": "boolean" },
19
+ "notes": { "type": "string" }
20
+ },
21
+ "additionalProperties": false
22
+ },
23
+ "text_fallback": {
24
+ "type": ["object", "null"],
25
+ "properties": {
26
+ "format": {
27
+ "type": "string",
28
+ "enum": ["xml_json", "markdown_fence", "custom"]
29
+ },
30
+ "wrapper": { "type": "string" },
31
+ "body": {
32
+ "type": "string",
33
+ "enum": ["json", "yaml", "plain"]
34
+ },
35
+ "name_location": {
36
+ "type": "string",
37
+ "enum": ["attribute", "json_field"]
38
+ },
39
+ "args_key": { "type": "string" },
40
+ "known_dialects": {
41
+ "type": "array",
42
+ "items": {
43
+ "type": "object",
44
+ "required": ["tag", "map_to"],
45
+ "properties": {
46
+ "tag": { "type": "string" },
47
+ "map_to": { "type": "string" }
48
+ },
49
+ "additionalProperties": false
50
+ }
51
+ },
52
+ "prompt_level": {
53
+ "type": "string",
54
+ "enum": ["L1", "L2", "L3"]
55
+ }
56
+ },
57
+ "additionalProperties": false
58
+ }
59
+ },
60
+ "additionalProperties": false
61
+ }
@@ -4,8 +4,8 @@ protocol_version: "1.1"
4
4
  models:
5
5
  deepseek-chat:
6
6
  provider: deepseek
7
- model_id: "deepseek-chat"
8
- display_name: "DeepSeek Chat"
7
+ model_id: "deepseek-v4-pro"
8
+ display_name: "DeepSeek Chat (V4 Pro)"
9
9
  verification:
10
10
  status: verified
11
11
  verified_at: "2026-01-28"
@@ -20,8 +20,8 @@ models:
20
20
 
21
21
  deepseek-reasoner:
22
22
  provider: deepseek
23
- model_id: "deepseek-reasoner"
24
- display_name: "DeepSeek Reasoner"
23
+ model_id: "deepseek-v4-flash"
24
+ display_name: "DeepSeek Reasoner (V4 Flash)"
25
25
  verification:
26
26
  status: verified
27
27
  verified_at: "2026-03-06"
@@ -1,5 +1,5 @@
1
1
  $schema: "https://raw.githubusercontent.com/ailib-official/ai-protocol/main/schemas/v1.json"
2
- protocol_version: "1.1"
2
+ protocol_version: "1.5"
3
3
 
4
4
  # Gemini Model Instances
5
5
  models:
@@ -30,3 +30,33 @@ models:
30
30
  capabilities: ["chat", "vision", "audio", "video", "tools", "streaming"]
31
31
  status: active
32
32
  tags: ["google", "multimodal", "fast", "cost-effective"]
33
+
34
+ # VERIFIED_AT: 2026-05-17 (official model card: gemini-2.5-flash-lite)
35
+ gemini-2.5-flash-lite:
36
+ provider: gemini
37
+ model_id: "gemini-2.5-flash-lite"
38
+ name: "Gemini 2.5 Flash-Lite"
39
+ verification:
40
+ status: verified
41
+ verified_at: "2026-05-17"
42
+ source: "https://ai.google.dev/gemini-api/docs/models/gemini-2.5-flash-lite"
43
+ notes: "Free-tier RPM/RPD vary by account and region; typical AI Studio free band is high concurrency vs Flash/Pro."
44
+ context_window: 1048576
45
+ capabilities: ["chat", "vision", "audio", "video", "tools", "streaming", "reasoning"]
46
+ status: active
47
+ tags: ["google", "multimodal", "flash-lite", "cost-effective", "free-tier"]
48
+
49
+ # VERIFIED_AT: 2026-05-17 (official model card: gemini-3.1-flash-lite-preview)
50
+ gemini-3.1-flash-lite-preview:
51
+ provider: gemini
52
+ model_id: "gemini-3.1-flash-lite-preview"
53
+ name: "Gemini 3.1 Flash-Lite (Preview)"
54
+ verification:
55
+ status: verified
56
+ verified_at: "2026-05-17"
57
+ source: "https://ai.google.dev/gemini-api/docs/models/gemini-3.1-flash-lite-preview"
58
+ notes: "Preview model; availability and quotas may change by region."
59
+ context_window: 1048576
60
+ capabilities: ["chat", "vision", "audio", "video", "tools", "streaming", "reasoning"]
61
+ status: active
62
+ tags: ["google", "multimodal", "flash-lite", "preview", "free-tier"]
@@ -20,14 +20,18 @@ auth:
20
20
  token_env: "GEMINI_API_KEY"
21
21
  payload_format: "gemini_style"
22
22
 
23
- # Gemini native API families
24
- api_families: ["generate_content"]
23
+ # Gemini native API plus OpenAI-compatible chat (same API key; Bearer on /openai/*).
24
+ api_families: ["generate_content", "chat_completions"]
25
25
  default_api_family: "generate_content"
26
26
  endpoints:
27
27
  chat:
28
28
  path: "/models/{model}:generateContent"
29
29
  method: "POST"
30
30
  adapter: "gemini"
31
+ openai_chat:
32
+ path: "/openai/chat/completions"
33
+ method: "POST"
34
+ adapter: "openai"
31
35
 
32
36
  # V1.5 Service Endpoints
33
37
  services:
@@ -227,3 +231,7 @@ availability:
227
231
  experimental_features:
228
232
  - "multimodal_video"
229
233
  - "multimodal_audio"
234
+
235
+ notes:
236
+ - "OpenAI-compatible Chat Completions: POST {base_url}/openai/chat/completions with Authorization: Bearer <GEMINI_API_KEY> and OpenAI-style JSON body. See https://ai.google.dev/gemini-api/docs/openai"
237
+ - "Native GenerateContent continues to use ?key= query authentication as configured under auth.type query_param."
@@ -0,0 +1,55 @@
1
+ # Anthropic Messages API — ProviderContract (content_block_mapping reference)
2
+ # PT-079-R1 | Drives manifest encoder for Claude document blocks
3
+ $schema: "https://raw.githubusercontent.com/ailib-official/ai-protocol/main/schemas/v2/provider-contract.json"
4
+
5
+ contract_version: "1.0"
6
+ provider_id: anthropic
7
+ api_style: anthropic_messages
8
+ api_version: "2023-06-01"
9
+
10
+ request_mapping:
11
+ message_format: anthropic_messages
12
+ role_mapping:
13
+ system: system
14
+ user: user
15
+ assistant: assistant
16
+ tool: user
17
+ system_message_handling: top_level_field
18
+ content_block_mapping:
19
+ text:
20
+ wrapper: content_blocks
21
+ image:
22
+ format: anthropic_source
23
+ base64_field: source.data
24
+ url_field: source.url
25
+ document:
26
+ format: anthropic_document
27
+ type_field: document
28
+ source_wrapper: anthropic_source
29
+ base64_field: source.data
30
+ mime_type_field: source.media_type
31
+ default_mime_type: application/pdf
32
+ ref_resolution: error_before_encode
33
+
34
+ response_mapping:
35
+ content_path: "$.content[?(@.type=='text')].text"
36
+ finish_reason_path: "$.stop_reason"
37
+ usage_path: "$.usage"
38
+ finish_reason_mapping:
39
+ end_turn: stop
40
+ max_tokens: length
41
+ tool_use: tool_calls
42
+ stop_sequence: stop
43
+
44
+ capability_contracts:
45
+ streaming:
46
+ protocol: sse
47
+ supports_usage_in_stream: true
48
+ supports_tool_streaming: true
49
+ done_signal: message_stop
50
+ tools:
51
+ definition_format: anthropic_tools
52
+ supports_parallel: true
53
+
54
+ authentication_contract:
55
+ method: api_key_header
@@ -0,0 +1,52 @@
1
+ # Google Gemini generateContent — ProviderContract (content_block_mapping reference)
2
+ # PT-079-R1 | Drives manifest encoder for Gemini inline document payloads
3
+ $schema: "https://raw.githubusercontent.com/ailib-official/ai-protocol/main/schemas/v2/provider-contract.json"
4
+
5
+ contract_version: "1.0"
6
+ provider_id: google
7
+ api_style: gemini_generate
8
+ api_version: "v1beta"
9
+
10
+ request_mapping:
11
+ message_format: gemini_contents
12
+ role_mapping:
13
+ system: user
14
+ user: user
15
+ assistant: model
16
+ tool: user
17
+ system_message_handling: prepend_to_user
18
+ content_block_mapping:
19
+ text:
20
+ field: text
21
+ wrapper: array_of_parts
22
+ image:
23
+ format: gemini_inline_data
24
+ base64_field: inlineData.data
25
+ mime_type_field: inlineData.mimeType
26
+ document:
27
+ format: gemini_inline_data
28
+ source_wrapper: gemini_inline_data
29
+ base64_field: inlineData.data
30
+ mime_type_field: inlineData.mimeType
31
+ default_mime_type: application/pdf
32
+ ref_resolution: error_before_encode
33
+
34
+ response_mapping:
35
+ content_path: "$.candidates[0].content.parts[?(@.text)].text"
36
+ finish_reason_path: "$.candidates[0].finishReason"
37
+ usage_path: "$.usageMetadata"
38
+ finish_reason_mapping:
39
+ STOP: stop
40
+ MAX_TOKENS: length
41
+ SAFETY: content_filter
42
+ RECITATION: content_filter
43
+
44
+ capability_contracts:
45
+ streaming:
46
+ protocol: sse
47
+ supports_usage_in_stream: true
48
+ tools:
49
+ definition_format: gemini_declarations
50
+
51
+ authentication_contract:
52
+ method: query_param
@@ -0,0 +1,9 @@
1
+ # Pack examples
2
+
3
+ JSON pack manifests validated against `schemas/v2/pack.json`.
4
+
5
+ | File | Description |
6
+ |------|-------------|
7
+ | [deepseek-economy-pack.json](./deepseek-economy-pack.json) | Cost-optimized DeepSeek routes with Groq fallback (PR-PP-001) |
8
+
9
+ Provider and model ids must exist in `v2/providers/*.yaml` manifests.
@@ -0,0 +1,43 @@
1
+ {
2
+ "$schema": "https://raw.githubusercontent.com/ailib-official/ai-protocol/main/schemas/v2/pack.json",
3
+ "id": "deepseek-economy-pack",
4
+ "name": "DeepSeek Economy Pack",
5
+ "version": "0.1.0",
6
+ "protocol_version": "2.0",
7
+ "description": "Cost-optimized chat routes centered on DeepSeek models. Primary: deepseek-chat; fallback: deepseek-reasoner for reasoning-tagged requests. NOT PRODUCTION SLA — example pack for Prism Pack contract (PR-PP-001).",
8
+ "status": "draft",
9
+ "optimize_default": "cost",
10
+ "provider_routes": [
11
+ {
12
+ "provider": "deepseek",
13
+ "model": "deepseek-chat",
14
+ "priority": 0,
15
+ "cost_weight": 1.0,
16
+ "capability_tags": ["chat", "economy"],
17
+ "notes": "Primary economy route; legacy alias of deepseek-v4-pro per provider manifest"
18
+ },
19
+ {
20
+ "provider": "deepseek",
21
+ "model": "deepseek-reasoner",
22
+ "priority": 1,
23
+ "cost_weight": 0.85,
24
+ "capability_tags": ["chat", "reasoning"],
25
+ "notes": "Reasoning-capable fallback; higher cost than deepseek-chat"
26
+ },
27
+ {
28
+ "provider": "groq",
29
+ "model": "llama-3.1-8b-instant",
30
+ "priority": 2,
31
+ "cost_weight": 0.7,
32
+ "capability_tags": ["chat", "fallback"],
33
+ "notes": "Cross-provider fallback when DeepSeek is unhealthy (aligns with prism-core cost_router fixtures)"
34
+ }
35
+ ],
36
+ "metadata": {
37
+ "publisher": "ailib-prism",
38
+ "task_ref": "PR-PP-001",
39
+ "docs": "https://github.com/ailib-official/ai-protocol/blob/main/docs/PACK_SPECIFICATION.md",
40
+ "billing_hint": "pay_per_use",
41
+ "sla": "NOT PRODUCTION SLA — contract example only"
42
+ }
43
+ }
@@ -1,14 +1,13 @@
1
- # Anthropic V2 正式提供商清单 — MCP 创建者,合并 v2-alpha 结构与审计修正后数据
2
- # AI-Protocol V2 Provider Manifest
3
- # Provider: Anthropic | Models: Claude Opus 4.6 / Sonnet 4.5
4
- # Last Updated: 2026-02-16
1
+ # Anthropic V2 Provider Manifest — MCP 创建者
2
+ # Provider: Anthropic | Models: Claude Opus 4.8 / Sonnet 4.6 / Haiku 4.5
3
+ # Last Updated: 2026-06-26 (4.6→4.8, add tool_calling)
5
4
  $schema: "https://raw.githubusercontent.com/ailib-official/ai-protocol/main/schemas/v2/provider.json"
6
5
 
7
6
  # === Ring 1: Core Skeleton ===
8
7
  id: anthropic
9
8
  protocol_version: "2.0"
10
9
  name: "Anthropic"
11
- version: "4.6.0"
10
+ version: "4.8.0"
12
11
  status: stable
13
12
  category: ai_provider
14
13
  official_url: "https://docs.anthropic.com"
@@ -45,6 +44,16 @@ error_classification:
45
44
  "api_error": "server_error"
46
45
  "overloaded_error": "overloaded"
47
46
 
47
+ availability:
48
+ required: false
49
+ regions:
50
+ - global
51
+ check:
52
+ method: GET
53
+ path: "/models"
54
+ expected_status: [200, 401]
55
+ timeout_ms: 3000
56
+
48
57
  # === Ring 2: Capability Mapping ===
49
58
  capabilities:
50
59
  required:
@@ -67,6 +76,13 @@ capabilities:
67
76
  streaming_usage: true
68
77
  system_messages: true
69
78
 
79
+ tool_calling:
80
+ native:
81
+ supported: true
82
+ reliability: "full"
83
+ parallel: true
84
+ streaming: true
85
+
70
86
  capability_profile:
71
87
  phase: "ios_v1"
72
88
  inputs:
@@ -218,16 +234,21 @@ termination:
218
234
 
219
235
  metadata:
220
236
  models:
221
- claude-opus-4-6:
237
+ claude-opus-4-8:
222
238
  context_window: 1000000
223
- max_output_tokens: 8192
224
- release_date: "2026-02-05"
225
- pricing: { input_per_1m: 5.00, output_per_1m: 25.00 }
226
- claude-sonnet-4-5:
227
- context_window: 200000
228
- max_output_tokens: 8192
229
- release_date: "2025-11-24"
239
+ max_output_tokens: 32768
240
+ release_date: "2026-06-15"
241
+ pricing: { input_per_1m: 15.00, output_per_1m: 75.00 }
242
+ claude-sonnet-4-6:
243
+ context_window: 1000000
244
+ max_output_tokens: 32768
245
+ release_date: "2026-06-11"
230
246
  pricing: { input_per_1m: 3.00, output_per_1m: 15.00 }
247
+ claude-haiku-4-5:
248
+ context_window: 200000
249
+ max_output_tokens: 64000
250
+ release_date: "2025-10-01"
251
+ pricing: { input_per_1m: 0.80, output_per_1m: 4.00 }
231
252
  context_management:
232
253
  long_context_beta: "context-1m-2025-08-07"
233
254
  compaction_api: true
@@ -1,7 +1,5 @@
1
1
  # Cohere V2 Provider Manifest — Chat + Rerank
2
- # AI-Protocol V2 Provider Manifest
3
- # Provider: Cohere | Rerank v3.5/v4, Command R+
4
- # Last Updated: 2026-02-19
2
+ # Last Updated: 2026-06-26 (add tool_calling)
5
3
  $schema: "https://raw.githubusercontent.com/ailib-official/ai-protocol/main/schemas/v2/provider.json"
6
4
 
7
5
  # === Ring 1: Core Skeleton ===
@@ -35,6 +33,16 @@ error_classification:
35
33
  "500": "server_error"
36
34
  "503": "overloaded"
37
35
 
36
+ availability:
37
+ required: false
38
+ regions:
39
+ - global
40
+ check:
41
+ method: GET
42
+ path: "/models"
43
+ expected_status: [200, 401]
44
+ timeout_ms: 3000
45
+
38
46
  # === Ring 2: Capability Mapping ===
39
47
  capabilities:
40
48
  required:
@@ -48,6 +56,11 @@ capabilities:
48
56
  structured_output: true
49
57
  system_messages: true
50
58
 
59
+ tool_calling:
60
+ native:
61
+ supported: true
62
+ reliability: "full"
63
+
51
64
  capability_profile:
52
65
  phase: "ios_v1"
53
66
  inputs:
@@ -1,21 +1,23 @@
1
- # DeepSeek V2 正式提供商清单 — OpenAI 兼容 API,超高性价比
1
+ # DeepSeek V4 Provider Manifest — OpenAI/Anthropic 双兼容 API
2
2
  # AI-Protocol V2 Provider Manifest
3
- # Provider: DeepSeek AI | Models: V3.2 / R1
4
- # Last Updated: 2026-02-16
3
+ # Provider: DeepSeek AI | Models: V4-Pro / V4-Flash
4
+ # Last Updated: 2026-06-26 (updated from V3.2 → V4)
5
+ # Source: https://api-docs.deepseek.com/updates (2026-04-24)
6
+ # Important: deepseek-chat / deepseek-reasoner DEPRECATED — retired 2026-07-24
5
7
  $schema: "https://raw.githubusercontent.com/ailib-official/ai-protocol/main/schemas/v2/provider.json"
6
8
 
7
9
  # === Ring 1: Core Skeleton ===
8
10
  id: deepseek
9
11
  protocol_version: "2.0"
10
12
  name: "DeepSeek"
11
- version: "3.2.0"
13
+ version: "4.0.0"
12
14
  status: stable
13
15
  category: ai_provider
14
16
  official_url: "https://api-docs.deepseek.com/"
15
17
  support_contact: "https://api-docs.deepseek.com/"
16
18
 
17
19
  endpoint:
18
- base_url: "https://api.deepseek.com/v1"
20
+ base_url: "https://api.deepseek.com"
19
21
  chat: "/chat/completions"
20
22
  auth:
21
23
  type: "bearer"
@@ -27,6 +29,7 @@ error_classification:
27
29
  by_http_status:
28
30
  "400": "invalid_request"
29
31
  "401": "authentication"
32
+ "402": "insufficient_quota"
30
33
  "403": "permission_denied"
31
34
  "404": "not_found"
32
35
  "413": "request_too_large"
@@ -39,6 +42,17 @@ error_classification:
39
42
  "rate_limit_exceeded": "rate_limited"
40
43
  "insufficient_quota": "quota_exhausted"
41
44
 
45
+ availability:
46
+ required: false
47
+ regions:
48
+ - cn
49
+ - global
50
+ check:
51
+ method: GET
52
+ path: "/models"
53
+ expected_status: [200, 401]
54
+ timeout_ms: 3000
55
+
42
56
  # === Ring 2: Capability Mapping ===
43
57
  capabilities:
44
58
  required:
@@ -46,7 +60,6 @@ capabilities:
46
60
  - streaming
47
61
  - tools
48
62
  optional:
49
- - vision
50
63
  - parallel_tools
51
64
  - agentic
52
65
  - reasoning
@@ -59,6 +72,25 @@ capabilities:
59
72
  streaming_usage: true
60
73
  system_messages: true
61
74
 
75
+ tool_calling:
76
+ native:
77
+ supported: true
78
+ reliability: "partial"
79
+ parallel: true
80
+ streaming: true
81
+ notes: "DeepSeek native tool calling is unreliable — LLM often outputs <shell> instead; text fallback required"
82
+ text_fallback:
83
+ format: "xml_json"
84
+ wrapper: "tool_call"
85
+ body: "json"
86
+ name_location: "json_field"
87
+ args_key: "arguments"
88
+ known_dialects:
89
+ - { tag: "shell", map_to: "shell" }
90
+ - { tag: "bash", map_to: "shell" }
91
+ - { tag: "function", map_to: "" }
92
+ prompt_level: "L2"
93
+
62
94
  capability_profile:
63
95
  phase: "ios_v1"
64
96
  inputs:
@@ -66,7 +98,7 @@ capability_profile:
66
98
  outcomes:
67
99
  types: ["text_completion", "structured_json", "tool_call_sequence"]
68
100
  systems:
69
- requires: ["mcp", "search"]
101
+ requires: ["mcp"]
70
102
 
71
103
  parameters:
72
104
  temperature: { type: float, range: [0.0, 2.0], default: 1.0 }
@@ -75,14 +107,7 @@ parameters:
75
107
  frequency_penalty: { type: float, range: [-2.0, 2.0] }
76
108
  presence_penalty: { type: float, range: [-2.0, 2.0] }
77
109
  stream: { type: boolean }
78
-
79
- parameter_mappings:
80
- temperature: "temperature"
81
- max_tokens: "max_tokens"
82
- stream: "stream"
83
- top_p: "top_p"
84
- tools: "tools"
85
- tool_choice: "tool_choice"
110
+ reasoning_effort: { type: string, enum: ["low", "medium", "high"], default: "medium" }
86
111
 
87
112
  streaming:
88
113
  decoder:
@@ -98,6 +123,10 @@ streaming:
98
123
  emit: "PartialContentDelta"
99
124
  fields:
100
125
  content: "$.choices[*].delta.content"
126
+ - match: "exists($.choices[*].delta.reasoning_content)"
127
+ emit: "ThinkingDelta"
128
+ fields:
129
+ thinking: "$.choices[*].delta.reasoning_content"
101
130
  - match: "exists($.choices[*].delta.tool_calls)"
102
131
  emit: "PartialToolCall"
103
132
  fields:
@@ -115,10 +144,7 @@ streaming:
115
144
  multimodal:
116
145
  input:
117
146
  vision:
118
- supported: true
119
- formats: [jpeg, png, gif, webp]
120
- encoding_methods: [base64_inline, url]
121
- document_understanding: true
147
+ supported: false
122
148
  audio:
123
149
  supported: false
124
150
  video:
@@ -131,19 +157,23 @@ multimodal:
131
157
  supported: false
132
158
 
133
159
  # === Ring 3: Advanced Extensions ===
134
- api_families: ["chat_completions"]
160
+ api_families: ["chat_completions", "anthropic_messages"]
135
161
  default_api_family: "chat_completions"
136
162
 
137
163
  endpoints:
138
- chat:
164
+ chat_openai:
139
165
  path: "/chat/completions"
140
166
  method: "POST"
141
167
  adapter: "openai"
168
+ chat_anthropic:
169
+ path: "/anthropic/v1/messages"
170
+ method: "POST"
171
+ adapter: "anthropic"
142
172
 
143
173
  retry_policy:
144
174
  strategy: "exponential_backoff"
145
175
  max_retries: 3
146
- min_delay_ms: 1000
176
+ min_delay_ms: 500
147
177
  max_delay_ms: 30000
148
178
  jitter: "full"
149
179
  retry_on_http_status: [429, 500, 502, 503]
@@ -156,21 +186,35 @@ termination:
156
186
  tool_calls: "tool_use"
157
187
 
158
188
  metadata:
159
- api_compatibility: openai
189
+ api_compatibility: [openai, anthropic]
160
190
  open_source:
161
- license: MIT
162
- hugging_face: "https://huggingface.co/deepseek-ai/DeepSeek-V3.2"
191
+ license: "MIT (weights)"
192
+ hugging_face: "https://huggingface.co/collections/deepseek-ai/deepseek-v4"
163
193
  models:
194
+ deepseek-v4-pro:
195
+ context_window: 1048576
196
+ max_output_tokens: 393216
197
+ release_date: "2026-04-24"
198
+ architecture: { type: moe, total: "1.6T", active: "49B" }
199
+ pricing: { input_per_1m: 0.435, output_per_1m: 0.87, cache_hit_input_per_1m: 0.003625 }
200
+ thinking: "dual_mode"
201
+ deepseek-v4-flash:
202
+ context_window: 1048576
203
+ max_output_tokens: 393216
204
+ release_date: "2026-04-24"
205
+ architecture: { type: moe, total: "284B", active: "13B" }
206
+ pricing: { input_per_1m: 0.14, output_per_1m: 0.28, cache_hit_input_per_1m: 0.0028 }
207
+ thinking: "dual_mode"
208
+ deprecated:
164
209
  deepseek-chat:
165
- context_window: 128000
166
- max_output_tokens: 8192
167
- architecture: { type: moe, total: "671B", active: "37B", experts: 256 }
168
- pricing: { input_per_1m: 0.28, output_per_1m: 0.42 }
210
+ retired_on: "2026-07-24"
211
+ maps_to: "deepseek-v4-flash (non-thinking)"
212
+ note: "Use deepseek-v4-flash instead"
169
213
  deepseek-reasoner:
170
- context_window: 128000
171
- max_output_tokens: 8192
172
- pricing: { input_per_1m: 0.55, output_per_1m: 2.19 }
214
+ retired_on: "2026-07-24"
215
+ maps_to: "deepseek-v4-flash (thinking mode)"
216
+ note: "Use deepseek-v4-flash with thinking mode instead"
173
217
  sdk:
174
- note: "OpenAI-compatible — use openai SDK with base_url override"
218
+ note: "OpenAI-compatible — use openai SDK with base_url override. Also supports Anthropic Messages API format."
175
219
  python: openai
176
220
  typescript: openai