muonroi-cli 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +122 -122
  3. package/dist/packages/agent-harness-core/src/predicate.d.ts +1 -1
  4. package/dist/src/agent-harness/__tests__/mock-model.spec.js +48 -1
  5. package/dist/src/agent-harness/mock-model.d.ts +11 -0
  6. package/dist/src/agent-harness/mock-model.js +21 -0
  7. package/dist/src/cli/cost-forensics.js +12 -12
  8. package/dist/src/council/__tests__/clarification-prompt.test.js +51 -0
  9. package/dist/src/council/__tests__/clarifier-ready-gate.test.js +32 -0
  10. package/dist/src/council/__tests__/decisions-lock.test.js +17 -1
  11. package/dist/src/council/__tests__/oauth-reachable.test.d.ts +1 -0
  12. package/dist/src/council/__tests__/oauth-reachable.test.js +31 -0
  13. package/dist/src/council/__tests__/parse-outcome-fallback.test.js +11 -0
  14. package/dist/src/council/clarifier.js +9 -1
  15. package/dist/src/council/debate.js +5 -1
  16. package/dist/src/council/decisions-lock.js +3 -3
  17. package/dist/src/council/index.js +12 -5
  18. package/dist/src/council/leader.d.ts +0 -17
  19. package/dist/src/council/leader.js +22 -15
  20. package/dist/src/council/planner.js +1 -1
  21. package/dist/src/council/prompts.js +63 -57
  22. package/dist/src/council/types.d.ts +7 -0
  23. package/dist/src/ee/__tests__/ee-onboarding.test.d.ts +1 -0
  24. package/dist/src/ee/__tests__/ee-onboarding.test.js +32 -0
  25. package/dist/src/ee/auth.d.ts +9 -0
  26. package/dist/src/ee/auth.js +19 -0
  27. package/dist/src/ee/ee-onboarding.d.ts +5 -0
  28. package/dist/src/ee/ee-onboarding.js +76 -0
  29. package/dist/src/generated/version.d.ts +1 -1
  30. package/dist/src/generated/version.js +1 -1
  31. package/dist/src/headless/output.js +6 -4
  32. package/dist/src/headless/output.test.js +4 -3
  33. package/dist/src/index.js +20 -1
  34. package/dist/src/mcp/__tests__/auto-setup.test.js +74 -0
  35. package/dist/src/mcp/__tests__/client-pool.spec.d.ts +1 -0
  36. package/dist/src/mcp/__tests__/client-pool.spec.js +98 -0
  37. package/dist/src/mcp/__tests__/parallel-build.spec.d.ts +1 -0
  38. package/dist/src/mcp/__tests__/parallel-build.spec.js +67 -0
  39. package/dist/src/mcp/__tests__/smart-filter.test.js +56 -0
  40. package/dist/src/mcp/auto-setup.js +56 -2
  41. package/dist/src/mcp/client-pool.d.ts +46 -0
  42. package/dist/src/mcp/client-pool.js +212 -0
  43. package/dist/src/mcp/oauth-callback.js +2 -2
  44. package/dist/src/mcp/parse-headers.test.js +14 -14
  45. package/dist/src/mcp/runtime.d.ts +28 -0
  46. package/dist/src/mcp/runtime.js +117 -51
  47. package/dist/src/mcp/self-verify-runner.d.ts +14 -0
  48. package/dist/src/mcp/self-verify-runner.js +38 -0
  49. package/dist/src/mcp/setup-guide-text.d.ts +9 -0
  50. package/dist/src/mcp/setup-guide-text.js +84 -0
  51. package/dist/src/mcp/smart-filter.js +49 -0
  52. package/dist/src/mcp/smoke.test.js +43 -43
  53. package/dist/src/mcp/tools-server.d.ts +7 -0
  54. package/dist/src/mcp/tools-server.js +19 -22
  55. package/dist/src/models/catalog.json +349 -349
  56. package/dist/src/ops/__tests__/doctor-ee-health.test.js +21 -0
  57. package/dist/src/ops/doctor.d.ts +3 -2
  58. package/dist/src/ops/doctor.js +47 -11
  59. package/dist/src/ops/doctor.test.js +4 -3
  60. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.d.ts +1 -0
  61. package/dist/src/orchestrator/__tests__/mcp-capability-block.test.js +39 -0
  62. package/dist/src/orchestrator/__tests__/project-stack.test.d.ts +1 -0
  63. package/dist/src/orchestrator/__tests__/project-stack.test.js +65 -0
  64. package/dist/src/orchestrator/batch-turn-runner.js +7 -11
  65. package/dist/src/orchestrator/message-processor.js +57 -27
  66. package/dist/src/orchestrator/orchestrator.js +26 -0
  67. package/dist/src/orchestrator/prompts.d.ts +51 -0
  68. package/dist/src/orchestrator/prompts.js +257 -134
  69. package/dist/src/orchestrator/scope-ceiling.js +6 -1
  70. package/dist/src/orchestrator/stream-runner.js +20 -15
  71. package/dist/src/orchestrator/text-tool-call-detector.test.js +13 -13
  72. package/dist/src/pil/__tests__/clarity-gate.test.js +24 -215
  73. package/dist/src/pil/__tests__/config.test.js +1 -17
  74. package/dist/src/pil/__tests__/discovery.test.js +144 -11
  75. package/dist/src/pil/__tests__/layer1-intent-trace.test.js +7 -2
  76. package/dist/src/pil/__tests__/layer1-intent.test.js +3 -0
  77. package/dist/src/pil/__tests__/layer16-clarity.test.js +32 -116
  78. package/dist/src/pil/__tests__/layer4-gsd.test.js +37 -0
  79. package/dist/src/pil/__tests__/layer6-output.test.js +137 -18
  80. package/dist/src/pil/__tests__/llm-classify.test.js +49 -2
  81. package/dist/src/pil/agent-operating-contract.d.ts +1 -1
  82. package/dist/src/pil/agent-operating-contract.js +2 -0
  83. package/dist/src/pil/agent-operating-contract.test.js +7 -2
  84. package/dist/src/pil/cheap-model-playbook.js +35 -35
  85. package/dist/src/pil/cheap-model-workbooks.js +16 -13
  86. package/dist/src/pil/clarity-gate.d.ts +21 -19
  87. package/dist/src/pil/clarity-gate.js +26 -153
  88. package/dist/src/pil/config.d.ts +9 -1
  89. package/dist/src/pil/config.js +15 -4
  90. package/dist/src/pil/discovery.js +211 -136
  91. package/dist/src/pil/layer1-intent.d.ts +12 -0
  92. package/dist/src/pil/layer1-intent.js +283 -38
  93. package/dist/src/pil/layer1-intent.test.js +210 -4
  94. package/dist/src/pil/layer16-clarity.d.ts +25 -11
  95. package/dist/src/pil/layer16-clarity.js +19 -306
  96. package/dist/src/pil/layer4-gsd.js +18 -6
  97. package/dist/src/pil/layer6-output.d.ts +2 -0
  98. package/dist/src/pil/layer6-output.js +137 -22
  99. package/dist/src/pil/llm-classify.d.ts +26 -0
  100. package/dist/src/pil/llm-classify.js +34 -5
  101. package/dist/src/pil/native-capabilities-workbook.d.ts +1 -1
  102. package/dist/src/pil/native-capabilities-workbook.js +82 -76
  103. package/dist/src/pil/schema.d.ts +8 -0
  104. package/dist/src/pil/schema.js +12 -1
  105. package/dist/src/pil/task-tier-map.js +4 -0
  106. package/dist/src/pil/types.d.ts +11 -1
  107. package/dist/src/product-loop/done-gate.js +3 -3
  108. package/dist/src/product-loop/loop-driver.js +18 -18
  109. package/dist/src/product-loop/progress-snapshot.js +4 -4
  110. package/dist/src/providers/auth/gemini-oauth.js +6 -15
  111. package/dist/src/providers/auth/grok-oauth.js +6 -15
  112. package/dist/src/providers/auth/openai-oauth.js +6 -15
  113. package/dist/src/providers/mcp-vision-bridge.js +48 -48
  114. package/dist/src/reporter/index.js +1 -1
  115. package/dist/src/scaffold/bb-ecosystem-apply.js +47 -47
  116. package/dist/src/scaffold/bb-quality-gate.js +5 -5
  117. package/dist/src/scaffold/continuation-prompt.js +60 -60
  118. package/dist/src/scaffold/init-new.js +453 -453
  119. package/dist/src/self-qa/__tests__/scenario-planner.test.js +3 -3
  120. package/dist/src/self-qa/agentic-loop.js +24 -19
  121. package/dist/src/self-qa/spec-emitter.js +26 -23
  122. package/dist/src/storage/__tests__/migrations.test.js +2 -2
  123. package/dist/src/storage/interaction-log.js +5 -5
  124. package/dist/src/storage/migrations.js +122 -122
  125. package/dist/src/storage/sessions.js +42 -42
  126. package/dist/src/storage/transcript.js +91 -84
  127. package/dist/src/storage/usage.js +14 -14
  128. package/dist/src/storage/workspaces.js +12 -12
  129. package/dist/src/tools/__tests__/native-tools.test.d.ts +1 -0
  130. package/dist/src/tools/__tests__/native-tools.test.js +53 -0
  131. package/dist/src/tools/git-safety.d.ts +61 -0
  132. package/dist/src/tools/git-safety.js +141 -0
  133. package/dist/src/tools/git-safety.test.d.ts +1 -0
  134. package/dist/src/tools/git-safety.test.js +111 -0
  135. package/dist/src/tools/native-tools.d.ts +31 -0
  136. package/dist/src/tools/native-tools.js +273 -0
  137. package/dist/src/tools/registry-git-safety.test.d.ts +7 -0
  138. package/dist/src/tools/registry-git-safety.test.js +92 -0
  139. package/dist/src/tools/registry.js +39 -4
  140. package/dist/src/ui/__tests__/markdown-render.test.d.ts +1 -0
  141. package/dist/src/ui/__tests__/markdown-render.test.js +48 -0
  142. package/dist/src/ui/app.js +0 -0
  143. package/dist/src/ui/components/message-view.js +4 -1
  144. package/dist/src/ui/components/structured-response-view.js +7 -3
  145. package/dist/src/ui/components/tool-group.js +7 -1
  146. package/dist/src/ui/markdown-render.d.ts +41 -0
  147. package/dist/src/ui/markdown-render.js +223 -0
  148. package/dist/src/ui/markdown.d.ts +10 -0
  149. package/dist/src/ui/markdown.js +12 -35
  150. package/dist/src/ui/slash/council-inspect.js +4 -4
  151. package/dist/src/ui/slash/export.js +4 -4
  152. package/dist/src/ui/utils/text.d.ts +8 -0
  153. package/dist/src/ui/utils/text.js +16 -0
  154. package/dist/src/ui/utils/text.test.d.ts +1 -0
  155. package/dist/src/ui/utils/text.test.js +23 -0
  156. package/dist/src/usage/ledger.js +48 -15
  157. package/dist/src/utils/__tests__/footprint-gitignore.test.d.ts +1 -0
  158. package/dist/src/utils/__tests__/footprint-gitignore.test.js +50 -0
  159. package/dist/src/utils/clipboard-image.js +23 -23
  160. package/dist/src/utils/open-url.d.ts +56 -0
  161. package/dist/src/utils/open-url.js +58 -0
  162. package/dist/src/utils/open-url.test.d.ts +1 -0
  163. package/dist/src/utils/open-url.test.js +86 -0
  164. package/dist/src/utils/settings.d.ts +12 -0
  165. package/dist/src/utils/settings.js +48 -0
  166. package/dist/src/utils/side-question.js +2 -2
  167. package/dist/src/utils/skills.js +3 -3
  168. package/dist/src/verify/__tests__/coverage-parsers.test.js +30 -30
  169. package/dist/src/verify/environment.js +2 -1
  170. package/package.json +1 -1
  171. package/dist/src/pil/layer16-clarity.test.js +0 -31
  172. /package/dist/src/{pil/layer16-clarity.test.d.ts → council/__tests__/clarification-prompt.test.d.ts} +0 -0
@@ -1,349 +1,349 @@
1
- {
2
- "version": "2.2",
3
- "updated_at": "2026-06-04",
4
- "description": "Local fallback catalog: deepseek + siliconflow + openai (ChatGPT OAuth) + google (Gemini, via AI Studio key or Google OAuth). SiliconFlow pricing verified 2026-05-21; Gemini 2.5 pricing per ai.google.dev/pricing 2026-06-04 (3.x flash/pro entries APPROXIMATE — see per-model descriptions). Model IDs verified against ai.google.dev/gemini-api/docs/models 2026-06-04. See ./catalog.README.md for curation rationale.",
5
- "models": [
6
- {
7
- "id": "deepseek-v4-flash",
8
- "name": "DeepSeek V4 Flash (native)",
9
- "provider": "deepseek",
10
- "tier": "fast",
11
- "context_window": 128000,
12
- "max_output_tokens": 8000,
13
- "input_price_per_million": 0.27,
14
- "output_price_per_million": 1.1,
15
- "cached_input_price_per_million": 0.027,
16
- "reasoning": true,
17
- "thinking_type": "enabled",
18
- "supports_effort": false,
19
- "default_reasoning_effort": null,
20
- "description": "Native DeepSeek V4 Flash. Tool-call capable. More expensive than SiliconFlow's hosted V4-Flash ($0.14/$0.28) — prefer SF unless rate-limited.",
21
- "aliases": ["deepseek-flash-native"],
22
- "supports_vision": false
23
- },
24
- {
25
- "id": "deepseek-v4-pro",
26
- "name": "DeepSeek V4 Pro (native)",
27
- "provider": "deepseek",
28
- "tier": "premium",
29
- "context_window": 128000,
30
- "max_output_tokens": 16000,
31
- "input_price_per_million": 0.55,
32
- "output_price_per_million": 2.19,
33
- "cached_input_price_per_million": 0.055,
34
- "reasoning": true,
35
- "thinking_type": "enabled",
36
- "supports_effort": false,
37
- "default_reasoning_effort": null,
38
- "description": "Native DeepSeek V4 Pro. Tool-call capable. Cheaper than SF-hosted V4-Pro ($1.74/$3.48) — this is the premium-tier default.",
39
- "aliases": ["deepseek-pro-native"],
40
- "supports_vision": false
41
- },
42
- {
43
- "id": "deepseek-ai/DeepSeek-V4-Flash",
44
- "name": "DeepSeek V4 Flash (via SiliconFlow)",
45
- "provider": "siliconflow",
46
- "tier": "fast",
47
- "context_window": 1049000,
48
- "max_output_tokens": 8000,
49
- "input_price_per_million": 0.14,
50
- "output_price_per_million": 0.28,
51
- "reasoning": true,
52
- "thinking_type": "enabled",
53
- "supports_effort": false,
54
- "default_reasoning_effort": null,
55
- "description": "DeepSeek V4 Flash on SiliconFlow — cheaper than native. 1M context. Reasoning_tokens share output budget.",
56
- "aliases": ["deepseek-v4-flash-sf", "deepseek-flash-sf"],
57
- "supports_vision": false
58
- },
59
- {
60
- "id": "deepseek-ai/DeepSeek-V4-Pro",
61
- "name": "DeepSeek V4 Pro (via SiliconFlow)",
62
- "provider": "siliconflow",
63
- "tier": "premium",
64
- "context_window": 1049000,
65
- "max_output_tokens": 16000,
66
- "input_price_per_million": 1.74,
67
- "output_price_per_million": 3.48,
68
- "reasoning": true,
69
- "thinking_type": "enabled",
70
- "supports_effort": false,
71
- "default_reasoning_effort": null,
72
- "description": "DeepSeek V4 Pro on SiliconFlow. Prefer native deepseek-v4-pro for cost ($0.55/$2.19) when key available.",
73
- "aliases": ["deepseek-v4-pro-sf", "deepseek-pro-sf"],
74
- "supports_vision": false
75
- },
76
- {
77
- "id": "Qwen/Qwen3-8B",
78
- "name": "Qwen3 8B (SiliconFlow)",
79
- "provider": "siliconflow",
80
- "tier": "fast",
81
- "context_window": 131072,
82
- "max_output_tokens": 8192,
83
- "input_price_per_million": 0.06,
84
- "output_price_per_million": 0.06,
85
- "reasoning": false,
86
- "thinking_type": "hybrid",
87
- "supports_effort": false,
88
- "default_reasoning_effort": null,
89
- "description": "Cheapest tool-capable SiliconFlow model. Symmetric pricing — ideal for mechanical tool-execution loops. Supports thinking/non-thinking hybrid mode.",
90
- "aliases": ["qwen3-8b-sf", "alibaba/Qwen3-8B"],
91
- "supports_vision": false
92
- },
93
- {
94
- "id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
95
- "name": "Qwen3 30B A3B Instruct (SiliconFlow)",
96
- "provider": "siliconflow",
97
- "tier": "balanced",
98
- "context_window": 262144,
99
- "max_output_tokens": 8192,
100
- "input_price_per_million": 0.09,
101
- "output_price_per_million": 0.3,
102
- "reasoning": false,
103
- "thinking_type": null,
104
- "supports_effort": false,
105
- "default_reasoning_effort": null,
106
- "description": "MoE balanced default. 3B active params keeps cost low while 30B total gives strong instruction following + tool use. Recommended SF balanced pick.",
107
- "aliases": ["qwen3-30b-instruct-sf", "alibaba/Qwen3-30B-A3B-Instruct-2507"],
108
- "supports_vision": false
109
- },
110
- {
111
- "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
112
- "name": "Qwen3 Coder 30B A3B (SiliconFlow)",
113
- "provider": "siliconflow",
114
- "tier": "balanced",
115
- "context_window": 262144,
116
- "max_output_tokens": 8192,
117
- "input_price_per_million": 0.07,
118
- "output_price_per_million": 0.28,
119
- "reasoning": false,
120
- "thinking_type": null,
121
- "supports_effort": false,
122
- "default_reasoning_effort": null,
123
- "description": "Coding-specialized Qwen3 MoE. Cheaper than the generic Qwen3-30B variant. Use when pil.taskType is coding (router override pending).",
124
- "aliases": ["qwen3-coder-sf", "qwen-coder-sf", "alibaba/Qwen3-Coder-30B-A3B-Instruct"],
125
- "supports_vision": false
126
- },
127
- {
128
- "id": "deepseek-ai/DeepSeek-V3.2",
129
- "name": "DeepSeek V3.2 (SiliconFlow)",
130
- "provider": "siliconflow",
131
- "tier": "balanced",
132
- "context_window": 163840,
133
- "max_output_tokens": 8192,
134
- "input_price_per_million": 0.27,
135
- "output_price_per_million": 0.42,
136
- "reasoning": true,
137
- "thinking_type": "enabled",
138
- "supports_effort": false,
139
- "default_reasoning_effort": null,
140
- "description": "Cheap DeepSeek reasoning option for balanced-tier turns that benefit from CoT. Tool-capable. Use when Qwen3-30B is not strong enough on reasoning.",
141
- "aliases": ["deepseek-v3.2-sf"],
142
- "supports_vision": false
143
- },
144
- {
145
- "id": "deepseek-ai/DeepSeek-R1",
146
- "name": "DeepSeek R1 (SiliconFlow)",
147
- "provider": "siliconflow",
148
- "tier": "premium",
149
- "context_window": 163840,
150
- "max_output_tokens": 16384,
151
- "input_price_per_million": 0.5,
152
- "output_price_per_million": 2.18,
153
- "reasoning": true,
154
- "thinking_type": "enabled",
155
- "supports_effort": false,
156
- "default_reasoning_effort": null,
157
- "description": "Cheaper premium reasoning alternative to V4-Pro. Tool-capable. Use when V4-Pro is rate-limited or for benchmark parity.",
158
- "aliases": ["deepseek-r1-sf"],
159
- "supports_vision": false
160
- },
161
- {
162
- "id": "z-ai/GLM-4.6V",
163
- "name": "GLM 4.6V Vision (SiliconFlow)",
164
- "provider": "siliconflow",
165
- "tier": "balanced",
166
- "context_window": 131072,
167
- "max_output_tokens": 4096,
168
- "input_price_per_million": 0.3,
169
- "output_price_per_million": 0.9,
170
- "reasoning": false,
171
- "thinking_type": null,
172
- "supports_effort": false,
173
- "default_reasoning_effort": null,
174
- "description": "Vision-language model with confirmed function-calling support. Use when the turn contains image attachments (router vision-override pending).",
175
- "aliases": ["glm-4.6v-sf"],
176
- "supports_vision": true
177
- },
178
- {
179
- "id": "gpt-5.4-mini",
180
- "name": "GPT-5.4 mini (OpenAI ChatGPT OAuth)",
181
- "provider": "openai",
182
- "tier": "fast",
183
- "context_window": 256000,
184
- "max_output_tokens": 32000,
185
- "input_price_per_million": 0,
186
- "output_price_per_million": 0,
187
- "reasoning": true,
188
- "thinking_type": "enabled",
189
- "supports_effort": true,
190
- "default_reasoning_effort": "low",
191
- "description": "OpenAI GPT-5.4 mini via ChatGPT subscription OAuth (chatgpt.com/backend-api/codex, Responses API). Subscription-billed — per-token price N/A (0 placeholder). Added to local fallback catalog so OAuth-granted OpenAI models resolve when the CP catalog endpoint is unreachable.",
192
- "aliases": ["gpt-5.4-mini-oauth"],
193
- "supports_vision": false
194
- },
195
- {
196
- "id": "gpt-5.4",
197
- "name": "GPT-5.4 (OpenAI ChatGPT OAuth)",
198
- "provider": "openai",
199
- "tier": "premium",
200
- "context_window": 256000,
201
- "max_output_tokens": 32000,
202
- "input_price_per_million": 0,
203
- "output_price_per_million": 0,
204
- "reasoning": true,
205
- "thinking_type": "enabled",
206
- "supports_effort": true,
207
- "default_reasoning_effort": "medium",
208
- "description": "OpenAI GPT-5.4 via ChatGPT subscription OAuth (chatgpt.com/backend-api/codex, Responses API). Subscription-billed — per-token price N/A (0 placeholder). Added to local fallback catalog so OAuth-granted OpenAI models resolve when the CP catalog endpoint is unreachable.",
209
- "aliases": ["gpt-5.4-oauth"],
210
- "supports_vision": false
211
- },
212
- {
213
- "id": "gpt-5.3-codex",
214
- "name": "GPT-5.3 Codex (OpenAI ChatGPT OAuth)",
215
- "provider": "openai",
216
- "tier": "balanced",
217
- "context_window": 256000,
218
- "max_output_tokens": 32000,
219
- "input_price_per_million": 0,
220
- "output_price_per_million": 0,
221
- "reasoning": true,
222
- "thinking_type": "enabled",
223
- "supports_effort": true,
224
- "default_reasoning_effort": "medium",
225
- "description": "OpenAI GPT-5.3 Codex (coding-specialized) via ChatGPT subscription OAuth (chatgpt.com/backend-api/codex, Responses API). Subscription-billed — per-token price N/A (0 placeholder). Added to local fallback catalog so OAuth-granted OpenAI models resolve when the CP catalog endpoint is unreachable.",
226
- "aliases": ["gpt-5.3-codex-oauth", "codex"],
227
- "supports_vision": false
228
- },
229
- {
230
- "id": "gemini-2.5-flash",
231
- "name": "Gemini 2.5 Flash",
232
- "provider": "google",
233
- "tier": "balanced",
234
- "context_window": 1048576,
235
- "max_output_tokens": 65536,
236
- "input_price_per_million": 0.3,
237
- "output_price_per_million": 2.5,
238
- "reasoning": true,
239
- "thinking_type": "enabled",
240
- "supports_effort": false,
241
- "default_reasoning_effort": null,
242
- "description": "Google Gemini 2.5 Flash via Generative Language API (AI Studio key — console aistudio.google.com/app/apikey, or stored Google OAuth). Multimodal, 1M context, thinking-capable. Pricing per ai.google.dev/pricing (2026-06-04).",
243
- "aliases": ["gemini-flash", "gemini-2.5-flash-latest"],
244
- "supports_vision": true
245
- },
246
- {
247
- "id": "gemini-2.5-pro",
248
- "name": "Gemini 2.5 Pro",
249
- "provider": "google",
250
- "tier": "premium",
251
- "context_window": 1048576,
252
- "max_output_tokens": 65536,
253
- "input_price_per_million": 1.25,
254
- "output_price_per_million": 10.0,
255
- "reasoning": true,
256
- "thinking_type": "enabled",
257
- "supports_effort": false,
258
- "default_reasoning_effort": null,
259
- "description": "Google Gemini 2.5 Pro via Generative Language API (AI Studio key or stored Google OAuth). Most advanced 2.5 reasoning model, multimodal, 1M context. Pricing per ai.google.dev/pricing (2026-06-04).",
260
- "aliases": ["gemini-pro", "gemini-2.5-pro-latest"],
261
- "supports_vision": true
262
- },
263
- {
264
- "id": "gemini-3.5-flash",
265
- "name": "Gemini 3.5 Flash",
266
- "provider": "google",
267
- "tier": "fast",
268
- "context_window": 1048576,
269
- "max_output_tokens": 65536,
270
- "input_price_per_million": 0.5,
271
- "output_price_per_million": 3.0,
272
- "reasoning": true,
273
- "thinking_type": "enabled",
274
- "supports_effort": false,
275
- "default_reasoning_effort": null,
276
- "description": "Google Gemini 3.5 Flash via Generative Language API. Frontier agentic/coding flash model, multimodal, 1M context, thinking-capable. Pricing APPROXIMATE (verify at ai.google.dev/pricing before cost-sensitive use).",
277
- "aliases": ["gemini-flash-3.5"],
278
- "supports_vision": true
279
- },
280
- {
281
- "id": "gemini-3.1-flash-lite",
282
- "name": "Gemini 3.1 Flash-Lite",
283
- "provider": "google",
284
- "tier": "fast",
285
- "context_window": 1048576,
286
- "max_output_tokens": 65536,
287
- "input_price_per_million": 0.15,
288
- "output_price_per_million": 0.6,
289
- "reasoning": true,
290
- "thinking_type": "enabled",
291
- "supports_effort": false,
292
- "default_reasoning_effort": null,
293
- "description": "Google Gemini 3.1 Flash-Lite via Generative Language API. Cheapest multimodal Gemini, 1M context, limited thinking. Pricing APPROXIMATE (verify at ai.google.dev/pricing before cost-sensitive use).",
294
- "aliases": ["gemini-flash-lite"],
295
- "supports_vision": true
296
- },
297
- {
298
- "id": "gemini-3.1-pro-preview",
299
- "name": "Gemini 3.1 Pro Preview",
300
- "provider": "google",
301
- "tier": "premium",
302
- "context_window": 1048576,
303
- "max_output_tokens": 65536,
304
- "input_price_per_million": 2.0,
305
- "output_price_per_million": 12.0,
306
- "reasoning": true,
307
- "thinking_type": "enabled",
308
- "supports_effort": false,
309
- "default_reasoning_effort": null,
310
- "description": "Google Gemini 3.1 Pro Preview via Generative Language API. Strongest agentic/reasoning Gemini, multimodal, 1M context. Preview model — id/pricing may change; pricing APPROXIMATE (verify at ai.google.dev/pricing).",
311
- "aliases": ["gemini-pro-preview"],
312
- "supports_vision": true
313
- },
314
- {
315
- "id": "grok-4.3",
316
- "name": "Grok 4.3 (xAI)",
317
- "provider": "xai",
318
- "tier": "premium",
319
- "context_window": 1048576,
320
- "max_output_tokens": 32768,
321
- "input_price_per_million": 1.25,
322
- "output_price_per_million": 2.5,
323
- "reasoning": true,
324
- "thinking_type": "enabled",
325
- "supports_effort": true,
326
- "default_reasoning_effort": "medium",
327
- "description": "Flagship model mạnh nhất hiện tại của xAI. Agentic tool-calling xuất sắc, hallucination thấp, hỗ trợ reasoning + non-reasoning mode. Context 1M tokens. Model mặc định khuyến nghị cho Grok CLI.",
328
- "aliases": ["grok-4.3", "grok-latest", "grok-4"],
329
- "supports_vision": true
330
- },
331
- {
332
- "id": "grok-build-0.1",
333
- "name": "Grok Build 0.1 (xAI)",
334
- "provider": "xai",
335
- "tier": "balanced",
336
- "context_window": 262144,
337
- "max_output_tokens": 65536,
338
- "input_price_per_million": 1.0,
339
- "output_price_per_million": 2.0,
340
- "reasoning": true,
341
- "thinking_type": "enabled",
342
- "supports_effort": false,
343
- "default_reasoning_effort": null,
344
- "description": "Model coding chuyên biệt, nhanh, tối ưu cho agentic coding workflows. Chính model powering Grok Build CLI. Rất phù hợp cho lập trình, debug, multi-step tasks.",
345
- "aliases": ["grok-build", "grok-code-fast", "grok-build-0.1"],
346
- "supports_vision": true
347
- }
348
- ]
349
- }
1
+ {
2
+ "version": "2.2",
3
+ "updated_at": "2026-06-04",
4
+ "description": "Local fallback catalog: deepseek + siliconflow + openai (ChatGPT OAuth) + google (Gemini, via AI Studio key or Google OAuth). SiliconFlow pricing verified 2026-05-21; Gemini 2.5 pricing per ai.google.dev/pricing 2026-06-04 (3.x flash/pro entries APPROXIMATE — see per-model descriptions). Model IDs verified against ai.google.dev/gemini-api/docs/models 2026-06-04. See ./catalog.README.md for curation rationale.",
5
+ "models": [
6
+ {
7
+ "id": "deepseek-v4-flash",
8
+ "name": "DeepSeek V4 Flash (native)",
9
+ "provider": "deepseek",
10
+ "tier": "fast",
11
+ "context_window": 128000,
12
+ "max_output_tokens": 8000,
13
+ "input_price_per_million": 0.27,
14
+ "output_price_per_million": 1.1,
15
+ "cached_input_price_per_million": 0.027,
16
+ "reasoning": true,
17
+ "thinking_type": "enabled",
18
+ "supports_effort": false,
19
+ "default_reasoning_effort": null,
20
+ "description": "Native DeepSeek V4 Flash. Tool-call capable. More expensive than SiliconFlow's hosted V4-Flash ($0.14/$0.28) — prefer SF unless rate-limited.",
21
+ "aliases": ["deepseek-flash-native"],
22
+ "supports_vision": false
23
+ },
24
+ {
25
+ "id": "deepseek-v4-pro",
26
+ "name": "DeepSeek V4 Pro (native)",
27
+ "provider": "deepseek",
28
+ "tier": "premium",
29
+ "context_window": 128000,
30
+ "max_output_tokens": 16000,
31
+ "input_price_per_million": 0.55,
32
+ "output_price_per_million": 2.19,
33
+ "cached_input_price_per_million": 0.055,
34
+ "reasoning": true,
35
+ "thinking_type": "enabled",
36
+ "supports_effort": false,
37
+ "default_reasoning_effort": null,
38
+ "description": "Native DeepSeek V4 Pro. Tool-call capable. Cheaper than SF-hosted V4-Pro ($1.74/$3.48) — this is the premium-tier default.",
39
+ "aliases": ["deepseek-pro-native"],
40
+ "supports_vision": false
41
+ },
42
+ {
43
+ "id": "deepseek-ai/DeepSeek-V4-Flash",
44
+ "name": "DeepSeek V4 Flash (via SiliconFlow)",
45
+ "provider": "siliconflow",
46
+ "tier": "fast",
47
+ "context_window": 1049000,
48
+ "max_output_tokens": 8000,
49
+ "input_price_per_million": 0.14,
50
+ "output_price_per_million": 0.28,
51
+ "reasoning": true,
52
+ "thinking_type": "enabled",
53
+ "supports_effort": false,
54
+ "default_reasoning_effort": null,
55
+ "description": "DeepSeek V4 Flash on SiliconFlow — cheaper than native. 1M context. Reasoning_tokens share output budget.",
56
+ "aliases": ["deepseek-v4-flash-sf", "deepseek-flash-sf"],
57
+ "supports_vision": false
58
+ },
59
+ {
60
+ "id": "deepseek-ai/DeepSeek-V4-Pro",
61
+ "name": "DeepSeek V4 Pro (via SiliconFlow)",
62
+ "provider": "siliconflow",
63
+ "tier": "premium",
64
+ "context_window": 1049000,
65
+ "max_output_tokens": 16000,
66
+ "input_price_per_million": 1.74,
67
+ "output_price_per_million": 3.48,
68
+ "reasoning": true,
69
+ "thinking_type": "enabled",
70
+ "supports_effort": false,
71
+ "default_reasoning_effort": null,
72
+ "description": "DeepSeek V4 Pro on SiliconFlow. Prefer native deepseek-v4-pro for cost ($0.55/$2.19) when key available.",
73
+ "aliases": ["deepseek-v4-pro-sf", "deepseek-pro-sf"],
74
+ "supports_vision": false
75
+ },
76
+ {
77
+ "id": "Qwen/Qwen3-8B",
78
+ "name": "Qwen3 8B (SiliconFlow)",
79
+ "provider": "siliconflow",
80
+ "tier": "fast",
81
+ "context_window": 131072,
82
+ "max_output_tokens": 8192,
83
+ "input_price_per_million": 0.06,
84
+ "output_price_per_million": 0.06,
85
+ "reasoning": false,
86
+ "thinking_type": "hybrid",
87
+ "supports_effort": false,
88
+ "default_reasoning_effort": null,
89
+ "description": "Cheapest tool-capable SiliconFlow model. Symmetric pricing — ideal for mechanical tool-execution loops. Supports thinking/non-thinking hybrid mode.",
90
+ "aliases": ["qwen3-8b-sf", "alibaba/Qwen3-8B"],
91
+ "supports_vision": false
92
+ },
93
+ {
94
+ "id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
95
+ "name": "Qwen3 30B A3B Instruct (SiliconFlow)",
96
+ "provider": "siliconflow",
97
+ "tier": "balanced",
98
+ "context_window": 262144,
99
+ "max_output_tokens": 8192,
100
+ "input_price_per_million": 0.09,
101
+ "output_price_per_million": 0.3,
102
+ "reasoning": false,
103
+ "thinking_type": null,
104
+ "supports_effort": false,
105
+ "default_reasoning_effort": null,
106
+ "description": "MoE balanced default. 3B active params keeps cost low while 30B total gives strong instruction following + tool use. Recommended SF balanced pick.",
107
+ "aliases": ["qwen3-30b-instruct-sf", "alibaba/Qwen3-30B-A3B-Instruct-2507"],
108
+ "supports_vision": false
109
+ },
110
+ {
111
+ "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
112
+ "name": "Qwen3 Coder 30B A3B (SiliconFlow)",
113
+ "provider": "siliconflow",
114
+ "tier": "balanced",
115
+ "context_window": 262144,
116
+ "max_output_tokens": 8192,
117
+ "input_price_per_million": 0.07,
118
+ "output_price_per_million": 0.28,
119
+ "reasoning": false,
120
+ "thinking_type": null,
121
+ "supports_effort": false,
122
+ "default_reasoning_effort": null,
123
+ "description": "Coding-specialized Qwen3 MoE. Cheaper than the generic Qwen3-30B variant. Use when pil.taskType is coding (router override pending).",
124
+ "aliases": ["qwen3-coder-sf", "qwen-coder-sf", "alibaba/Qwen3-Coder-30B-A3B-Instruct"],
125
+ "supports_vision": false
126
+ },
127
+ {
128
+ "id": "deepseek-ai/DeepSeek-V3.2",
129
+ "name": "DeepSeek V3.2 (SiliconFlow)",
130
+ "provider": "siliconflow",
131
+ "tier": "balanced",
132
+ "context_window": 163840,
133
+ "max_output_tokens": 8192,
134
+ "input_price_per_million": 0.27,
135
+ "output_price_per_million": 0.42,
136
+ "reasoning": true,
137
+ "thinking_type": "enabled",
138
+ "supports_effort": false,
139
+ "default_reasoning_effort": null,
140
+ "description": "Cheap DeepSeek reasoning option for balanced-tier turns that benefit from CoT. Tool-capable. Use when Qwen3-30B is not strong enough on reasoning.",
141
+ "aliases": ["deepseek-v3.2-sf"],
142
+ "supports_vision": false
143
+ },
144
+ {
145
+ "id": "deepseek-ai/DeepSeek-R1",
146
+ "name": "DeepSeek R1 (SiliconFlow)",
147
+ "provider": "siliconflow",
148
+ "tier": "premium",
149
+ "context_window": 163840,
150
+ "max_output_tokens": 16384,
151
+ "input_price_per_million": 0.5,
152
+ "output_price_per_million": 2.18,
153
+ "reasoning": true,
154
+ "thinking_type": "enabled",
155
+ "supports_effort": false,
156
+ "default_reasoning_effort": null,
157
+ "description": "Cheaper premium reasoning alternative to V4-Pro. Tool-capable. Use when V4-Pro is rate-limited or for benchmark parity.",
158
+ "aliases": ["deepseek-r1-sf"],
159
+ "supports_vision": false
160
+ },
161
+ {
162
+ "id": "z-ai/GLM-4.6V",
163
+ "name": "GLM 4.6V Vision (SiliconFlow)",
164
+ "provider": "siliconflow",
165
+ "tier": "balanced",
166
+ "context_window": 131072,
167
+ "max_output_tokens": 4096,
168
+ "input_price_per_million": 0.3,
169
+ "output_price_per_million": 0.9,
170
+ "reasoning": false,
171
+ "thinking_type": null,
172
+ "supports_effort": false,
173
+ "default_reasoning_effort": null,
174
+ "description": "Vision-language model with confirmed function-calling support. Use when the turn contains image attachments (router vision-override pending).",
175
+ "aliases": ["glm-4.6v-sf"],
176
+ "supports_vision": true
177
+ },
178
+ {
179
+ "id": "gpt-5.4-mini",
180
+ "name": "GPT-5.4 mini (OpenAI ChatGPT OAuth)",
181
+ "provider": "openai",
182
+ "tier": "fast",
183
+ "context_window": 256000,
184
+ "max_output_tokens": 32000,
185
+ "input_price_per_million": 0,
186
+ "output_price_per_million": 0,
187
+ "reasoning": true,
188
+ "thinking_type": "enabled",
189
+ "supports_effort": true,
190
+ "default_reasoning_effort": "low",
191
+ "description": "OpenAI GPT-5.4 mini via ChatGPT subscription OAuth (chatgpt.com/backend-api/codex, Responses API). Subscription-billed — per-token price N/A (0 placeholder). Added to local fallback catalog so OAuth-granted OpenAI models resolve when the CP catalog endpoint is unreachable.",
192
+ "aliases": ["gpt-5.4-mini-oauth"],
193
+ "supports_vision": false
194
+ },
195
+ {
196
+ "id": "gpt-5.4",
197
+ "name": "GPT-5.4 (OpenAI ChatGPT OAuth)",
198
+ "provider": "openai",
199
+ "tier": "premium",
200
+ "context_window": 256000,
201
+ "max_output_tokens": 32000,
202
+ "input_price_per_million": 0,
203
+ "output_price_per_million": 0,
204
+ "reasoning": true,
205
+ "thinking_type": "enabled",
206
+ "supports_effort": true,
207
+ "default_reasoning_effort": "medium",
208
+ "description": "OpenAI GPT-5.4 via ChatGPT subscription OAuth (chatgpt.com/backend-api/codex, Responses API). Subscription-billed — per-token price N/A (0 placeholder). Added to local fallback catalog so OAuth-granted OpenAI models resolve when the CP catalog endpoint is unreachable.",
209
+ "aliases": ["gpt-5.4-oauth"],
210
+ "supports_vision": false
211
+ },
212
+ {
213
+ "id": "gpt-5.3-codex",
214
+ "name": "GPT-5.3 Codex (OpenAI ChatGPT OAuth)",
215
+ "provider": "openai",
216
+ "tier": "balanced",
217
+ "context_window": 256000,
218
+ "max_output_tokens": 32000,
219
+ "input_price_per_million": 0,
220
+ "output_price_per_million": 0,
221
+ "reasoning": true,
222
+ "thinking_type": "enabled",
223
+ "supports_effort": true,
224
+ "default_reasoning_effort": "medium",
225
+ "description": "OpenAI GPT-5.3 Codex (coding-specialized) via ChatGPT subscription OAuth (chatgpt.com/backend-api/codex, Responses API). Subscription-billed — per-token price N/A (0 placeholder). Added to local fallback catalog so OAuth-granted OpenAI models resolve when the CP catalog endpoint is unreachable.",
226
+ "aliases": ["gpt-5.3-codex-oauth", "codex"],
227
+ "supports_vision": false
228
+ },
229
+ {
230
+ "id": "gemini-2.5-flash",
231
+ "name": "Gemini 2.5 Flash",
232
+ "provider": "google",
233
+ "tier": "balanced",
234
+ "context_window": 1048576,
235
+ "max_output_tokens": 65536,
236
+ "input_price_per_million": 0.3,
237
+ "output_price_per_million": 2.5,
238
+ "reasoning": true,
239
+ "thinking_type": "enabled",
240
+ "supports_effort": false,
241
+ "default_reasoning_effort": null,
242
+ "description": "Google Gemini 2.5 Flash via Generative Language API (AI Studio key — console aistudio.google.com/app/apikey, or stored Google OAuth). Multimodal, 1M context, thinking-capable. Pricing per ai.google.dev/pricing (2026-06-04).",
243
+ "aliases": ["gemini-flash", "gemini-2.5-flash-latest"],
244
+ "supports_vision": true
245
+ },
246
+ {
247
+ "id": "gemini-2.5-pro",
248
+ "name": "Gemini 2.5 Pro",
249
+ "provider": "google",
250
+ "tier": "premium",
251
+ "context_window": 1048576,
252
+ "max_output_tokens": 65536,
253
+ "input_price_per_million": 1.25,
254
+ "output_price_per_million": 10.0,
255
+ "reasoning": true,
256
+ "thinking_type": "enabled",
257
+ "supports_effort": false,
258
+ "default_reasoning_effort": null,
259
+ "description": "Google Gemini 2.5 Pro via Generative Language API (AI Studio key or stored Google OAuth). Most advanced 2.5 reasoning model, multimodal, 1M context. Pricing per ai.google.dev/pricing (2026-06-04).",
260
+ "aliases": ["gemini-pro", "gemini-2.5-pro-latest"],
261
+ "supports_vision": true
262
+ },
263
+ {
264
+ "id": "gemini-3.5-flash",
265
+ "name": "Gemini 3.5 Flash",
266
+ "provider": "google",
267
+ "tier": "fast",
268
+ "context_window": 1048576,
269
+ "max_output_tokens": 65536,
270
+ "input_price_per_million": 0.5,
271
+ "output_price_per_million": 3.0,
272
+ "reasoning": true,
273
+ "thinking_type": "enabled",
274
+ "supports_effort": false,
275
+ "default_reasoning_effort": null,
276
+ "description": "Google Gemini 3.5 Flash via Generative Language API. Frontier agentic/coding flash model, multimodal, 1M context, thinking-capable. Pricing APPROXIMATE (verify at ai.google.dev/pricing before cost-sensitive use).",
277
+ "aliases": ["gemini-flash-3.5"],
278
+ "supports_vision": true
279
+ },
280
+ {
281
+ "id": "gemini-3.1-flash-lite",
282
+ "name": "Gemini 3.1 Flash-Lite",
283
+ "provider": "google",
284
+ "tier": "fast",
285
+ "context_window": 1048576,
286
+ "max_output_tokens": 65536,
287
+ "input_price_per_million": 0.15,
288
+ "output_price_per_million": 0.6,
289
+ "reasoning": true,
290
+ "thinking_type": "enabled",
291
+ "supports_effort": false,
292
+ "default_reasoning_effort": null,
293
+ "description": "Google Gemini 3.1 Flash-Lite via Generative Language API. Cheapest multimodal Gemini, 1M context, limited thinking. Pricing APPROXIMATE (verify at ai.google.dev/pricing before cost-sensitive use).",
294
+ "aliases": ["gemini-flash-lite"],
295
+ "supports_vision": true
296
+ },
297
+ {
298
+ "id": "gemini-3.1-pro-preview",
299
+ "name": "Gemini 3.1 Pro Preview",
300
+ "provider": "google",
301
+ "tier": "premium",
302
+ "context_window": 1048576,
303
+ "max_output_tokens": 65536,
304
+ "input_price_per_million": 2.0,
305
+ "output_price_per_million": 12.0,
306
+ "reasoning": true,
307
+ "thinking_type": "enabled",
308
+ "supports_effort": false,
309
+ "default_reasoning_effort": null,
310
+ "description": "Google Gemini 3.1 Pro Preview via Generative Language API. Strongest agentic/reasoning Gemini, multimodal, 1M context. Preview model — id/pricing may change; pricing APPROXIMATE (verify at ai.google.dev/pricing).",
311
+ "aliases": ["gemini-pro-preview"],
312
+ "supports_vision": true
313
+ },
314
+ {
315
+ "id": "grok-4.3",
316
+ "name": "Grok 4.3 (xAI)",
317
+ "provider": "xai",
318
+ "tier": "premium",
319
+ "context_window": 1048576,
320
+ "max_output_tokens": 32768,
321
+ "input_price_per_million": 1.25,
322
+ "output_price_per_million": 2.5,
323
+ "reasoning": true,
324
+ "thinking_type": "enabled",
325
+ "supports_effort": true,
326
+ "default_reasoning_effort": "medium",
327
+ "description": "xAI's current flagship. Excellent agentic tool-calling, low hallucination, supports reasoning + non-reasoning mode. 1M token context. Recommended default for Grok CLI.",
328
+ "aliases": ["grok-4.3", "grok-latest", "grok-4"],
329
+ "supports_vision": true
330
+ },
331
+ {
332
+ "id": "grok-build-0.1",
333
+ "name": "Grok Build 0.1 (xAI)",
334
+ "provider": "xai",
335
+ "tier": "balanced",
336
+ "context_window": 262144,
337
+ "max_output_tokens": 65536,
338
+ "input_price_per_million": 1.0,
339
+ "output_price_per_million": 2.0,
340
+ "reasoning": true,
341
+ "thinking_type": "enabled",
342
+ "supports_effort": false,
343
+ "default_reasoning_effort": null,
344
+ "description": "Specialized fast coding model optimized for agentic coding workflows. The model powering Grok Build CLI. Well-suited for programming, debugging, and multi-step tasks.",
345
+ "aliases": ["grok-build", "grok-code-fast", "grok-build-0.1"],
346
+ "supports_vision": true
347
+ }
348
+ ]
349
+ }