@voria/cli 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +75 -380
  2. package/bin/voria +635 -481
  3. package/docs/CHANGELOG.md +19 -0
  4. package/docs/USER_GUIDE.md +34 -5
  5. package/package.json +1 -1
  6. package/python/voria/__init__.py +1 -1
  7. package/python/voria/__pycache__/__init__.cpython-312.pyc +0 -0
  8. package/python/voria/__pycache__/engine.cpython-312.pyc +0 -0
  9. package/python/voria/core/__pycache__/__init__.cpython-312.pyc +0 -0
  10. package/python/voria/core/__pycache__/setup.cpython-312.pyc +0 -0
  11. package/python/voria/core/agent/__pycache__/__init__.cpython-312.pyc +0 -0
  12. package/python/voria/core/agent/__pycache__/loop.cpython-312.pyc +0 -0
  13. package/python/voria/core/executor/__pycache__/__init__.cpython-312.pyc +0 -0
  14. package/python/voria/core/executor/__pycache__/executor.cpython-312.pyc +0 -0
  15. package/python/voria/core/executor/executor.py +5 -0
  16. package/python/voria/core/github/__pycache__/__init__.cpython-312.pyc +0 -0
  17. package/python/voria/core/github/__pycache__/client.cpython-312.pyc +0 -0
  18. package/python/voria/core/llm/__init__.py +16 -0
  19. package/python/voria/core/llm/__pycache__/__init__.cpython-312.pyc +0 -0
  20. package/python/voria/core/llm/__pycache__/base.cpython-312.pyc +0 -0
  21. package/python/voria/core/llm/__pycache__/claude_provider.cpython-312.pyc +0 -0
  22. package/python/voria/core/llm/__pycache__/deepseek_provider.cpython-312.pyc +0 -0
  23. package/python/voria/core/llm/__pycache__/gemini_provider.cpython-312.pyc +0 -0
  24. package/python/voria/core/llm/__pycache__/kimi_provider.cpython-312.pyc +0 -0
  25. package/python/voria/core/llm/__pycache__/minimax_provider.cpython-312.pyc +0 -0
  26. package/python/voria/core/llm/__pycache__/modal_provider.cpython-312.pyc +0 -0
  27. package/python/voria/core/llm/__pycache__/model_discovery.cpython-312.pyc +0 -0
  28. package/python/voria/core/llm/__pycache__/openai_provider.cpython-312.pyc +0 -0
  29. package/python/voria/core/llm/__pycache__/siliconflow_provider.cpython-312.pyc +0 -0
  30. package/python/voria/core/llm/base.py +12 -0
  31. package/python/voria/core/llm/claude_provider.py +46 -0
  32. package/python/voria/core/llm/deepseek_provider.py +109 -0
  33. package/python/voria/core/llm/gemini_provider.py +44 -0
  34. package/python/voria/core/llm/kimi_provider.py +109 -0
  35. package/python/voria/core/llm/minimax_provider.py +187 -0
  36. package/python/voria/core/llm/modal_provider.py +33 -0
  37. package/python/voria/core/llm/model_discovery.py +104 -155
  38. package/python/voria/core/llm/openai_provider.py +33 -0
  39. package/python/voria/core/llm/siliconflow_provider.py +109 -0
  40. package/python/voria/core/patcher/__pycache__/__init__.cpython-312.pyc +0 -0
  41. package/python/voria/core/patcher/__pycache__/patcher.cpython-312.pyc +0 -0
  42. package/python/voria/core/setup.py +4 -1
  43. package/python/voria/core/testing/__pycache__/definitions.cpython-312.pyc +0 -0
  44. package/python/voria/core/testing/__pycache__/runner.cpython-312.pyc +0 -0
  45. package/python/voria/core/testing/definitions.py +87 -0
  46. package/python/voria/core/testing/runner.py +324 -0
  47. package/python/voria/engine.py +736 -232
@@ -0,0 +1,187 @@
1
+ """MiniMax LLM Provider via NVIDIA Integrate API"""
2
+
3
+ import logging
4
+ from typing import List, Dict, Any, Optional
5
+ import httpx
6
+
7
+ from .base import BaseLLMProvider, Message, LLMResponse
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class MiniMaxProvider(BaseLLMProvider):
13
+ """MiniMax LLM Provider using NVIDIA's OpenAI-compatible API"""
14
+
15
+ API_ENDPOINT = "https://integrate.api.nvidia.com/v1/chat/completions"
16
+ DEFAULT_MODEL = "minimaxai/minimax-m2.7"
17
+
18
+ def __init__(self, api_key: str, model: str = DEFAULT_MODEL):
19
+ """
20
+ Initialize MiniMax provider
21
+
22
+ Args:
23
+ api_key: NVIDIA API key
24
+ model: Model (minimaxai/minimax-m2.7, etc)
25
+ """
26
+ super().__init__(api_key, model)
27
+ self.client = httpx.AsyncClient(
28
+ headers={
29
+ "Authorization": f"Bearer {api_key}",
30
+ "Content-Type": "application/json",
31
+ },
32
+ timeout=300.0,
33
+ )
34
+
35
+ async def generate(
36
+ self, messages: List[Message], max_tokens: int = 2000, temperature: float = 0.7
37
+ ) -> LLMResponse:
38
+ """Generate response using MiniMax"""
39
+ try:
40
+ payload = {
41
+ "model": self.model,
42
+ "messages": [
43
+ {"role": msg.role, "content": msg.content} for msg in messages
44
+ ],
45
+ "max_tokens": max_tokens,
46
+ "temperature": temperature,
47
+ "top_p": 0.95,
48
+ "stream": False,
49
+ }
50
+
51
+ logger.debug(f"Calling MiniMax API with {len(messages)} messages")
52
+ logger.info(f"Sending generation request to MiniMax model {self.model}...")
53
+
54
+ response = await self.client.post(self.API_ENDPOINT, json=payload)
55
+ response.raise_for_status()
56
+
57
+ data = response.json()
58
+ content = data["choices"][0]["message"]["content"]
59
+ tokens_used = data.get("usage", {}).get("total_tokens", 0)
60
+
61
+ logger.info(f"MiniMax API response: {tokens_used} tokens used")
62
+
63
+ return LLMResponse(
64
+ content=content,
65
+ tokens_used=tokens_used,
66
+ model=self.model,
67
+ provider="MiniMax",
68
+ )
69
+
70
+ except httpx.HTTPError as e:
71
+ logger.error(f"MiniMax API error: {e}")
72
+ raise
73
+ except Exception as e:
74
+ logger.error(f"Error generating with MiniMax: {e}")
75
+ raise
76
+
77
+ async def stream_generate(
78
+ self, messages: List[Message], max_tokens: int = 2000, temperature: float = 0.7
79
+ ):
80
+ """Stream generation from MiniMax"""
81
+ try:
82
+ payload = {
83
+ "model": self.model,
84
+ "messages": [
85
+ {"role": msg.role, "content": msg.content} for msg in messages
86
+ ],
87
+ "max_tokens": max_tokens,
88
+ "temperature": temperature,
89
+ "top_p": 0.95,
90
+ "stream": True,
91
+ }
92
+
93
+ import json
94
+
95
+ async with self.client.stream("POST", self.API_ENDPOINT, json=payload) as response:
96
+ response.raise_for_status()
97
+ async for line in response.aiter_lines():
98
+ if not line: continue
99
+ if line.startswith("data: "):
100
+ data_str = line[6:]
101
+ if data_str == "[DONE]": break
102
+ try:
103
+ data = json.loads(data_str)
104
+ delta = data["choices"][0].get("delta", {})
105
+ if "content" in delta:
106
+ yield delta["content"]
107
+ except Exception as e:
108
+ logger.error(f"Error parsing stream chunk: {e}")
109
+ continue
110
+
111
+ except Exception as e:
112
+ logger.error(f"Error in MiniMax stream: {e}")
113
+ raise
114
+
115
+ async def plan(self, issue_description: str) -> str:
116
+ """Generate implementation plan"""
117
+ system_message = Message(
118
+ role="system",
119
+ content="""You are an expert software architect.
120
+ Create a detailed implementation plan for fixing this GitHub issue.""",
121
+ )
122
+
123
+ user_message = Message(role="user", content=f"Issue:\n{issue_description}")
124
+
125
+ response = await self.generate([system_message, user_message], max_tokens=2000)
126
+
127
+ return response.content
128
+
129
+ async def generate_patch(
130
+ self,
131
+ issue_description: str,
132
+ context_files: Dict[str, str],
133
+ previous_errors: Optional[str] = None,
134
+ ) -> str:
135
+ """Generate code patch in unified diff format"""
136
+ system_message = Message(
137
+ role="system",
138
+ content="""Generate a unified diff format patch.
139
+ Format:
140
+ --- a/path
141
+ +++ b/path
142
+ @@ -line,count +line,count @@""",
143
+ )
144
+
145
+ context = f"Issue:\n{issue_description}\n\n"
146
+ for filename, content in context_files.items():
147
+ context += f"\n--- {filename} ---\n{content}\n"
148
+
149
+ if previous_errors:
150
+ context += f"\nPrevious Errors:\n{previous_errors}"
151
+
152
+ user_message = Message(role="user", content=context)
153
+
154
+ response = await self.generate(
155
+ [system_message, user_message], max_tokens=3000, temperature=0.5
156
+ )
157
+
158
+ return response.content
159
+
160
+ async def analyze_test_failure(
161
+ self, test_output: str, code_context: str
162
+ ) -> Dict[str, Any]:
163
+ """Analyze test failure"""
164
+ system_message = Message(
165
+ role="system", content="Analyze the test failure and suggest fixes."
166
+ )
167
+
168
+ user_message = Message(
169
+ role="user",
170
+ content=f"""Test Output:
171
+ {test_output}
172
+
173
+ Code:
174
+ {code_context}""",
175
+ )
176
+
177
+ response = await self.generate([system_message, user_message], max_tokens=1500)
178
+
179
+ return {
180
+ "analysis": response.content,
181
+ "provider": "MiniMax",
182
+ "tokens_used": response.tokens_used,
183
+ }
184
+
185
+ async def close(self):
186
+ """Close HTTP client"""
187
+ await self.client.aclose()
@@ -214,6 +214,39 @@ Code Context:
214
214
  "tokens_used": response.tokens_used,
215
215
  }
216
216
 
217
+ async def stream_generate(
218
+ self, messages: List[Message], max_tokens: int = 2000, temperature: float = 0.7
219
+ ):
220
+ """Stream response tokens from Modal"""
221
+ import json as _json
222
+ try:
223
+ payload = {
224
+ "model": self.model,
225
+ "messages": [{"role": m.role, "content": m.content} for m in messages],
226
+ "max_tokens": max_tokens,
227
+ "temperature": temperature,
228
+ "stream": True,
229
+ }
230
+ async with self.client.stream("POST", self.API_ENDPOINT, json=payload) as response:
231
+ response.raise_for_status()
232
+ async for line in response.aiter_lines():
233
+ if not line:
234
+ continue
235
+ if line.startswith("data: "):
236
+ data_str = line[6:]
237
+ if data_str == "[DONE]":
238
+ break
239
+ try:
240
+ data = _json.loads(data_str)
241
+ delta = data["choices"][0].get("delta", {})
242
+ if "content" in delta:
243
+ yield delta["content"]
244
+ except Exception:
245
+ continue
246
+ except Exception as e:
247
+ logger.error(f"Modal stream error: {e}")
248
+ raise
249
+
217
250
  async def close(self):
218
251
  """Close HTTP client"""
219
252
  await self.client.aclose()
@@ -5,9 +5,10 @@ Fetches available models at runtime based on API keys.
5
5
 
6
6
  import asyncio
7
7
  import httpx
8
- from dataclasses import dataclass
8
+ from dataclasses import dataclass, asdict
9
9
  from typing import List, Optional
10
10
  import logging
11
+ import json
11
12
 
12
13
  logger = logging.getLogger(__name__)
13
14
 
@@ -26,6 +27,40 @@ class ModelInfo:
26
27
  class ModelDiscovery:
27
28
  """Fetch available models from LLM providers."""
28
29
 
30
+ @staticmethod
31
+ async def fetch_generic_openai_compatible(
32
+ api_key: str, base_url: str, provider_name: str
33
+ ) -> List[ModelInfo]:
34
+ """Fetch models from an OpenAI-compatible API."""
35
+ try:
36
+ async with httpx.AsyncClient() as client:
37
+ response = await client.get(
38
+ f"{base_url.rstrip('/')}/models",
39
+ headers={"Authorization": f"Bearer {api_key}"},
40
+ timeout=10.0,
41
+ )
42
+ if response.status_code == 200:
43
+ data = response.json()
44
+ models = []
45
+ for model in data.get("data", []):
46
+ model_id = model.get("id", "")
47
+ models.append(
48
+ ModelInfo(
49
+ name=model_id,
50
+ display_name=model_id,
51
+ description=f"{provider_name} Model",
52
+ )
53
+ )
54
+ return models
55
+ else:
56
+ logger.warning(
57
+ f"{provider_name} API returned {response.status_code}"
58
+ )
59
+ return []
60
+ except Exception as e:
61
+ logger.warning(f"Failed to fetch {provider_name} models: {e}")
62
+ return []
63
+
29
64
  @staticmethod
30
65
  async def fetch_modal_models(api_key: str) -> List[ModelInfo]:
31
66
  """Fetch available models from Modal Z.ai API."""
@@ -39,7 +74,6 @@ class ModelDiscovery:
39
74
  if response.status_code == 200:
40
75
  data = response.json()
41
76
  models = []
42
- # Modal returns model data in "data" key
43
77
  for model in data.get("data", []):
44
78
  models.append(
45
79
  ModelInfo(
@@ -52,31 +86,15 @@ class ModelDiscovery:
52
86
  return (
53
87
  models if models else await ModelDiscovery._get_modal_fallback()
54
88
  )
55
- else:
56
- logger.warning(
57
- f"Modal API returned {response.status_code}, using fallback models"
58
- )
59
- return await ModelDiscovery._get_modal_fallback()
60
- except Exception as e:
61
- logger.warning(f"Failed to fetch Modal models: {e}, using fallback")
89
+ return await ModelDiscovery._get_modal_fallback()
90
+ except Exception:
62
91
  return await ModelDiscovery._get_modal_fallback()
63
92
 
64
93
  @staticmethod
65
94
  async def _get_modal_fallback() -> List[ModelInfo]:
66
- """Fallback models for Modal when API unavailable."""
67
95
  return [
68
- ModelInfo(
69
- name="zai-org/GLM-5.1-FP8",
70
- display_name="GLM-5.1-FP8 (745B, Latest)",
71
- max_tokens=4096,
72
- description="Latest Modal Z.ai model - 745B parameters",
73
- ),
74
- ModelInfo(
75
- name="zai-org/GLM-4",
76
- display_name="GLM-4 (370B, Legacy)",
77
- max_tokens=2048,
78
- description="Previous generation Modal model",
79
- ),
96
+ ModelInfo(name="zai-org/GLM-5.1-FP8", display_name="GLM-5.1-FP8 (Latest)"),
97
+ ModelInfo(name="zai-org/GLM-4", display_name="GLM-4 (Legacy)"),
80
98
  ]
81
99
 
82
100
  @staticmethod
@@ -92,81 +110,32 @@ class ModelDiscovery:
92
110
  if response.status_code == 200:
93
111
  data = response.json()
94
112
  models = []
95
- # Filter to only gpt models suitable for text generation
96
- suitable_models = {
97
- "gpt-4o",
98
- "gpt-4-turbo",
99
- "gpt-4",
100
- "gpt-3.5-turbo",
101
- }
113
+ suitable_prefixes = {"gpt-4", "gpt-3.5", "o1-"}
102
114
  for model in data.get("data", []):
103
115
  model_id = model.get("id", "")
104
- # Match by prefix or exact name
105
- if any(
106
- model_id.startswith(prefix) for prefix in suitable_models
107
- ):
116
+ if any(model_id.startswith(p) for p in suitable_prefixes):
108
117
  models.append(
109
- ModelInfo(
110
- name=model_id,
111
- display_name=model_id,
112
- description=f"OpenAI - {model.get('owned_by', 'N/A')}",
113
- )
118
+ ModelInfo(name=model_id, display_name=model_id)
114
119
  )
115
- # Sort by recency (gpt-4o > gpt-4-turbo > gpt-4 > gpt-3.5-turbo)
116
120
  return (
117
- sorted(
118
- models,
119
- key=lambda x: (
120
- not x.name.startswith("gpt-4o"),
121
- not x.name.startswith("gpt-4-turbo"),
122
- not x.name.startswith("gpt-4"),
123
- ),
124
- )
121
+ models
125
122
  if models
126
123
  else await ModelDiscovery._get_openai_fallback()
127
124
  )
128
- else:
129
- logger.warning(
130
- f"OpenAI API returned {response.status_code}, using fallback models"
131
- )
132
- return await ModelDiscovery._get_openai_fallback()
133
- except Exception as e:
134
- logger.warning(f"Failed to fetch OpenAI models: {e}, using fallback")
125
+ return await ModelDiscovery._get_openai_fallback()
126
+ except Exception:
135
127
  return await ModelDiscovery._get_openai_fallback()
136
128
 
137
129
  @staticmethod
138
130
  async def _get_openai_fallback() -> List[ModelInfo]:
139
- """Fallback models for OpenAI when API unavailable."""
140
131
  return [
141
- ModelInfo(
142
- name="gpt-5.4",
143
- display_name="GPT-5.4 (Latest Frontier)",
144
- max_tokens=128000,
145
- description="Best intelligence at scale for agentic, coding, and professional workflows. $2.50 input, $15 output per 1M tokens",
146
- ),
147
- ModelInfo(
148
- name="gpt-5.4-mini",
149
- display_name="GPT-5.4-mini (Mini Model)",
150
- max_tokens=128000,
151
- description="Strongest mini model yet for coding, computer use, and agentic tasks. $0.75 input, $4.50 output per 1M tokens",
152
- ),
153
- ModelInfo(
154
- name="gpt-5.4-nano",
155
- display_name="GPT-5.4-nano (Cheapest)",
156
- max_tokens=128000,
157
- description="Cheapest GPT-5.4-class model for simple high-volume tasks. $0.20 input, $1.25 output per 1M tokens",
158
- ),
159
- ModelInfo(
160
- name="gpt-4o",
161
- display_name="GPT-4o (Previous High Quality)",
162
- max_tokens=128000,
163
- description="Previous latest model - optimized for speed and cost",
164
- ),
132
+ ModelInfo(name="gpt-4o", display_name="GPT-4o"),
133
+ ModelInfo(name="gpt-4o-mini", display_name="GPT-4o-mini"),
134
+ ModelInfo(name="o1-preview", display_name="o1-preview"),
165
135
  ]
166
136
 
167
137
  @staticmethod
168
138
  async def fetch_gemini_models(api_key: str) -> List[ModelInfo]:
169
- """Fetch available models from Google Gemini API."""
170
139
  try:
171
140
  async with httpx.AsyncClient() as client:
172
141
  response = await client.get(
@@ -176,106 +145,53 @@ class ModelDiscovery:
176
145
  if response.status_code == 200:
177
146
  data = response.json()
178
147
  models = []
179
- # Filter to generative models
180
148
  for model in data.get("models", []):
181
- model_name = model.get("name", "").replace("models/", "")
182
- if "gemini" in model_name.lower():
183
- models.append(
184
- ModelInfo(
185
- name=model_name,
186
- display_name=model_name,
187
- description=f"Google Gemini - {model.get('displayName', 'N/A')}",
188
- )
189
- )
149
+ name = model.get("name", "").replace("models/", "")
150
+ if "gemini" in name.lower():
151
+ models.append(ModelInfo(name=name, display_name=name))
190
152
  return (
191
153
  models
192
154
  if models
193
155
  else await ModelDiscovery._get_gemini_fallback()
194
156
  )
195
- else:
196
- logger.warning(
197
- f"Gemini API returned {response.status_code}, using fallback models"
198
- )
199
- return await ModelDiscovery._get_gemini_fallback()
200
- except Exception as e:
201
- logger.warning(f"Failed to fetch Gemini models: {e}, using fallback")
157
+ return await ModelDiscovery._get_gemini_fallback()
158
+ except Exception:
202
159
  return await ModelDiscovery._get_gemini_fallback()
203
160
 
204
161
  @staticmethod
205
162
  async def _get_gemini_fallback() -> List[ModelInfo]:
206
- """Fallback models for Gemini when API unavailable."""
163
+ return [
164
+ ModelInfo(name="gemini-1.5-pro", display_name="Gemini 1.5 Pro"),
165
+ ModelInfo(name="gemini-1.5-flash", display_name="Gemini 1.5 Flash"),
166
+ ]
167
+
168
+ @staticmethod
169
+ async def _get_claude_fallback() -> List[ModelInfo]:
207
170
  return [
208
171
  ModelInfo(
209
- name="gemini-3.1-pro",
210
- display_name="Gemini 3.1 Pro (Latest SOTA Reasoning)",
211
- max_tokens=200000,
212
- description="Latest SOTA reasoning model with unprecedented depth and nuance. $2 input, $12 output per context window",
213
- ),
214
- ModelInfo(
215
- name="gemini-3-flash",
216
- display_name="Gemini 3 Flash (Latest, Fastest)",
217
- max_tokens=200000,
218
- description="Most intelligent model built for speed, combining frontier intelligence with superior search and grounding",
219
- ),
220
- ModelInfo(
221
- name="gemini-3.1-flash-lite",
222
- display_name="Gemini 3.1 Flash Lite (Cheapest)",
223
- max_tokens=200000,
224
- description="Most cost-efficient model, optimized for high-volume agentic tasks. $0.25 input, $1.50 output",
225
- ),
226
- ModelInfo(
227
- name="gemini-2.0-flash",
228
- display_name="Gemini 2.0 Flash (Previous)",
229
- max_tokens=2000,
230
- description="Previous generation Gemini model",
172
+ name="claude-3-5-sonnet-20240620", display_name="Claude 3.5 Sonnet"
231
173
  ),
174
+ ModelInfo(name="claude-3-opus-20240229", display_name="Claude 3 Opus"),
175
+ ModelInfo(name="claude-3-haiku-20240307", display_name="Claude 3 Haiku"),
232
176
  ]
233
177
 
234
178
  @staticmethod
235
179
  async def fetch_claude_models(api_key: str) -> List[ModelInfo]:
236
- """Fetch available models from Anthropic Claude API."""
237
- try:
238
- async with httpx.AsyncClient() as client:
239
- # Claude doesn't have a public models endpoint, use documented models
240
- # Make a test call to verify API key works
241
- response = await client.get(
242
- "https://api.anthropic.com/v1/models",
243
- headers={"x-api-key": api_key},
244
- timeout=10.0,
245
- )
246
- # If we get here, API key works - return known models
247
- return await ModelDiscovery._get_claude_fallback()
248
- except Exception as e:
249
- logger.warning(f"Failed to verify Claude API: {e}, returning known models")
250
- return await ModelDiscovery._get_claude_fallback()
180
+ # Anthropic doesn't have a models endpoint, just return hardcoded
181
+ return await ModelDiscovery._get_claude_fallback()
251
182
 
252
183
  @staticmethod
253
- async def _get_claude_fallback() -> List[ModelInfo]:
254
- """Known Claude models (Anthropic doesn't provide list endpoint)."""
184
+ async def _get_minimax_fallback() -> List[ModelInfo]:
255
185
  return [
256
186
  ModelInfo(
257
- name="claude-opus-4.6",
258
- display_name="Claude Opus 4.6 (Most Intelligent)",
259
- max_tokens=200000,
260
- description="Most intelligent broadly available model for complex reasoning. $5 input, $25 output per 1M tokens",
261
- ),
262
- ModelInfo(
263
- name="claude-sonnet-4.6",
264
- display_name="Claude Sonnet 4.6 (Best Value)",
265
- max_tokens=200000,
266
- description="Best balance of speed and intelligence. $3 input, $15 output per 1M tokens",
267
- ),
268
- ModelInfo(
269
- name="claude-haiku-4.5",
270
- display_name="Claude Haiku 4.5 (Fastest, Cheapest)",
271
- max_tokens=200000,
272
- description="Fast and cost-efficient for simpler tasks. $0.80 input, $4 output per 1M tokens",
187
+ name="minimaxai/minimax-m2.7",
188
+ display_name="MiniMax M2.7",
189
+ description="MiniMax M2.7 via NVIDIA",
273
190
  ),
274
191
  ]
275
192
 
276
193
  @staticmethod
277
194
  async def discover_all(provider: str, api_key: str) -> List[ModelInfo]:
278
- """Discover all models for a given provider."""
279
195
  provider = provider.lower().strip()
280
196
  if provider == "modal":
281
197
  return await ModelDiscovery.fetch_modal_models(api_key)
@@ -285,5 +201,38 @@ class ModelDiscovery:
285
201
  return await ModelDiscovery.fetch_gemini_models(api_key)
286
202
  elif provider == "claude":
287
203
  return await ModelDiscovery.fetch_claude_models(api_key)
204
+ elif provider == "deepseek":
205
+ return await ModelDiscovery.fetch_generic_openai_compatible(
206
+ api_key, "https://api.deepseek.com/v1", "DeepSeek"
207
+ )
208
+ elif provider == "kimi":
209
+ return await ModelDiscovery.fetch_generic_openai_compatible(
210
+ api_key, "https://api.moonshot.cn/v1", "Kimi"
211
+ )
212
+ elif provider == "minimax":
213
+ return await ModelDiscovery.fetch_generic_openai_compatible(
214
+ api_key, "https://integrate.api.nvidia.com/v1", "MiniMax"
215
+ )
216
+ elif provider == "siliconflow":
217
+ return await ModelDiscovery.fetch_generic_openai_compatible(
218
+ api_key, "https://api.siliconflow.cn/v1", "SiliconFlow"
219
+ )
288
220
  else:
289
- raise ValueError(f"Unknown provider: {provider}")
221
+ return []
222
+
223
+
224
+ if __name__ == "__main__":
225
+ import sys
226
+
227
+ if len(sys.argv) < 3:
228
+ print(json.dumps([]))
229
+ sys.exit(0)
230
+
231
+ provider = sys.argv[1]
232
+ api_key = sys.argv[2]
233
+
234
+ async def main():
235
+ models = await ModelDiscovery.discover_all(provider, api_key)
236
+ print(json.dumps([asdict(m) for m in models]))
237
+
238
+ asyncio.run(main())
@@ -141,6 +141,39 @@ Code:
141
141
  "tokens_used": response.tokens_used,
142
142
  }
143
143
 
144
+ async def stream_generate(
145
+ self, messages: List[Message], max_tokens: int = 2000, temperature: float = 0.7
146
+ ):
147
+ """Stream response tokens from OpenAI"""
148
+ import json as _json
149
+ try:
150
+ payload = {
151
+ "model": self.model,
152
+ "messages": [{"role": m.role, "content": m.content} for m in messages],
153
+ "max_tokens": max_tokens,
154
+ "temperature": temperature,
155
+ "stream": True,
156
+ }
157
+ async with self.client.stream("POST", self.API_ENDPOINT, json=payload) as response:
158
+ response.raise_for_status()
159
+ async for line in response.aiter_lines():
160
+ if not line:
161
+ continue
162
+ if line.startswith("data: "):
163
+ data_str = line[6:]
164
+ if data_str == "[DONE]":
165
+ break
166
+ try:
167
+ data = _json.loads(data_str)
168
+ delta = data["choices"][0].get("delta", {})
169
+ if "content" in delta:
170
+ yield delta["content"]
171
+ except Exception:
172
+ continue
173
+ except Exception as e:
174
+ logger.error(f"OpenAI stream error: {e}")
175
+ raise
176
+
144
177
  async def close(self):
145
178
  """Close HTTP client"""
146
179
  await self.client.aclose()