@voria/cli 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -380
- package/bin/voria +635 -481
- package/docs/CHANGELOG.md +19 -0
- package/docs/USER_GUIDE.md +34 -5
- package/package.json +1 -1
- package/python/voria/__init__.py +1 -1
- package/python/voria/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/voria/__pycache__/engine.cpython-312.pyc +0 -0
- package/python/voria/core/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/voria/core/__pycache__/setup.cpython-312.pyc +0 -0
- package/python/voria/core/agent/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/voria/core/agent/__pycache__/loop.cpython-312.pyc +0 -0
- package/python/voria/core/executor/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/voria/core/executor/__pycache__/executor.cpython-312.pyc +0 -0
- package/python/voria/core/executor/executor.py +5 -0
- package/python/voria/core/github/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/voria/core/github/__pycache__/client.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__init__.py +16 -0
- package/python/voria/core/llm/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/base.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/claude_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/deepseek_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/gemini_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/kimi_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/minimax_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/modal_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/model_discovery.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/openai_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/__pycache__/siliconflow_provider.cpython-312.pyc +0 -0
- package/python/voria/core/llm/base.py +12 -0
- package/python/voria/core/llm/claude_provider.py +46 -0
- package/python/voria/core/llm/deepseek_provider.py +109 -0
- package/python/voria/core/llm/gemini_provider.py +44 -0
- package/python/voria/core/llm/kimi_provider.py +109 -0
- package/python/voria/core/llm/minimax_provider.py +187 -0
- package/python/voria/core/llm/modal_provider.py +33 -0
- package/python/voria/core/llm/model_discovery.py +104 -155
- package/python/voria/core/llm/openai_provider.py +33 -0
- package/python/voria/core/llm/siliconflow_provider.py +109 -0
- package/python/voria/core/patcher/__pycache__/__init__.cpython-312.pyc +0 -0
- package/python/voria/core/patcher/__pycache__/patcher.cpython-312.pyc +0 -0
- package/python/voria/core/setup.py +4 -1
- package/python/voria/core/testing/__pycache__/definitions.cpython-312.pyc +0 -0
- package/python/voria/core/testing/__pycache__/runner.cpython-312.pyc +0 -0
- package/python/voria/core/testing/definitions.py +87 -0
- package/python/voria/core/testing/runner.py +324 -0
- package/python/voria/engine.py +736 -232
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""MiniMax LLM Provider via NVIDIA Integrate API"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import List, Dict, Any, Optional
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from .base import BaseLLMProvider, Message, LLMResponse
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MiniMaxProvider(BaseLLMProvider):
|
|
13
|
+
"""MiniMax LLM Provider using NVIDIA's OpenAI-compatible API"""
|
|
14
|
+
|
|
15
|
+
API_ENDPOINT = "https://integrate.api.nvidia.com/v1/chat/completions"
|
|
16
|
+
DEFAULT_MODEL = "minimaxai/minimax-m2.7"
|
|
17
|
+
|
|
18
|
+
def __init__(self, api_key: str, model: str = DEFAULT_MODEL):
|
|
19
|
+
"""
|
|
20
|
+
Initialize MiniMax provider
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
api_key: NVIDIA API key
|
|
24
|
+
model: Model (minimaxai/minimax-m2.7, etc)
|
|
25
|
+
"""
|
|
26
|
+
super().__init__(api_key, model)
|
|
27
|
+
self.client = httpx.AsyncClient(
|
|
28
|
+
headers={
|
|
29
|
+
"Authorization": f"Bearer {api_key}",
|
|
30
|
+
"Content-Type": "application/json",
|
|
31
|
+
},
|
|
32
|
+
timeout=300.0,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
async def generate(
|
|
36
|
+
self, messages: List[Message], max_tokens: int = 2000, temperature: float = 0.7
|
|
37
|
+
) -> LLMResponse:
|
|
38
|
+
"""Generate response using MiniMax"""
|
|
39
|
+
try:
|
|
40
|
+
payload = {
|
|
41
|
+
"model": self.model,
|
|
42
|
+
"messages": [
|
|
43
|
+
{"role": msg.role, "content": msg.content} for msg in messages
|
|
44
|
+
],
|
|
45
|
+
"max_tokens": max_tokens,
|
|
46
|
+
"temperature": temperature,
|
|
47
|
+
"top_p": 0.95,
|
|
48
|
+
"stream": False,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
logger.debug(f"Calling MiniMax API with {len(messages)} messages")
|
|
52
|
+
logger.info(f"Sending generation request to MiniMax model {self.model}...")
|
|
53
|
+
|
|
54
|
+
response = await self.client.post(self.API_ENDPOINT, json=payload)
|
|
55
|
+
response.raise_for_status()
|
|
56
|
+
|
|
57
|
+
data = response.json()
|
|
58
|
+
content = data["choices"][0]["message"]["content"]
|
|
59
|
+
tokens_used = data.get("usage", {}).get("total_tokens", 0)
|
|
60
|
+
|
|
61
|
+
logger.info(f"MiniMax API response: {tokens_used} tokens used")
|
|
62
|
+
|
|
63
|
+
return LLMResponse(
|
|
64
|
+
content=content,
|
|
65
|
+
tokens_used=tokens_used,
|
|
66
|
+
model=self.model,
|
|
67
|
+
provider="MiniMax",
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
except httpx.HTTPError as e:
|
|
71
|
+
logger.error(f"MiniMax API error: {e}")
|
|
72
|
+
raise
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.error(f"Error generating with MiniMax: {e}")
|
|
75
|
+
raise
|
|
76
|
+
|
|
77
|
+
async def stream_generate(
|
|
78
|
+
self, messages: List[Message], max_tokens: int = 2000, temperature: float = 0.7
|
|
79
|
+
):
|
|
80
|
+
"""Stream generation from MiniMax"""
|
|
81
|
+
try:
|
|
82
|
+
payload = {
|
|
83
|
+
"model": self.model,
|
|
84
|
+
"messages": [
|
|
85
|
+
{"role": msg.role, "content": msg.content} for msg in messages
|
|
86
|
+
],
|
|
87
|
+
"max_tokens": max_tokens,
|
|
88
|
+
"temperature": temperature,
|
|
89
|
+
"top_p": 0.95,
|
|
90
|
+
"stream": True,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
import json
|
|
94
|
+
|
|
95
|
+
async with self.client.stream("POST", self.API_ENDPOINT, json=payload) as response:
|
|
96
|
+
response.raise_for_status()
|
|
97
|
+
async for line in response.aiter_lines():
|
|
98
|
+
if not line: continue
|
|
99
|
+
if line.startswith("data: "):
|
|
100
|
+
data_str = line[6:]
|
|
101
|
+
if data_str == "[DONE]": break
|
|
102
|
+
try:
|
|
103
|
+
data = json.loads(data_str)
|
|
104
|
+
delta = data["choices"][0].get("delta", {})
|
|
105
|
+
if "content" in delta:
|
|
106
|
+
yield delta["content"]
|
|
107
|
+
except Exception as e:
|
|
108
|
+
logger.error(f"Error parsing stream chunk: {e}")
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
except Exception as e:
|
|
112
|
+
logger.error(f"Error in MiniMax stream: {e}")
|
|
113
|
+
raise
|
|
114
|
+
|
|
115
|
+
async def plan(self, issue_description: str) -> str:
|
|
116
|
+
"""Generate implementation plan"""
|
|
117
|
+
system_message = Message(
|
|
118
|
+
role="system",
|
|
119
|
+
content="""You are an expert software architect.
|
|
120
|
+
Create a detailed implementation plan for fixing this GitHub issue.""",
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
user_message = Message(role="user", content=f"Issue:\n{issue_description}")
|
|
124
|
+
|
|
125
|
+
response = await self.generate([system_message, user_message], max_tokens=2000)
|
|
126
|
+
|
|
127
|
+
return response.content
|
|
128
|
+
|
|
129
|
+
async def generate_patch(
|
|
130
|
+
self,
|
|
131
|
+
issue_description: str,
|
|
132
|
+
context_files: Dict[str, str],
|
|
133
|
+
previous_errors: Optional[str] = None,
|
|
134
|
+
) -> str:
|
|
135
|
+
"""Generate code patch in unified diff format"""
|
|
136
|
+
system_message = Message(
|
|
137
|
+
role="system",
|
|
138
|
+
content="""Generate a unified diff format patch.
|
|
139
|
+
Format:
|
|
140
|
+
--- a/path
|
|
141
|
+
+++ b/path
|
|
142
|
+
@@ -line,count +line,count @@""",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
context = f"Issue:\n{issue_description}\n\n"
|
|
146
|
+
for filename, content in context_files.items():
|
|
147
|
+
context += f"\n--- {filename} ---\n{content}\n"
|
|
148
|
+
|
|
149
|
+
if previous_errors:
|
|
150
|
+
context += f"\nPrevious Errors:\n{previous_errors}"
|
|
151
|
+
|
|
152
|
+
user_message = Message(role="user", content=context)
|
|
153
|
+
|
|
154
|
+
response = await self.generate(
|
|
155
|
+
[system_message, user_message], max_tokens=3000, temperature=0.5
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
return response.content
|
|
159
|
+
|
|
160
|
+
async def analyze_test_failure(
|
|
161
|
+
self, test_output: str, code_context: str
|
|
162
|
+
) -> Dict[str, Any]:
|
|
163
|
+
"""Analyze test failure"""
|
|
164
|
+
system_message = Message(
|
|
165
|
+
role="system", content="Analyze the test failure and suggest fixes."
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
user_message = Message(
|
|
169
|
+
role="user",
|
|
170
|
+
content=f"""Test Output:
|
|
171
|
+
{test_output}
|
|
172
|
+
|
|
173
|
+
Code:
|
|
174
|
+
{code_context}""",
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
response = await self.generate([system_message, user_message], max_tokens=1500)
|
|
178
|
+
|
|
179
|
+
return {
|
|
180
|
+
"analysis": response.content,
|
|
181
|
+
"provider": "MiniMax",
|
|
182
|
+
"tokens_used": response.tokens_used,
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
async def close(self):
|
|
186
|
+
"""Close HTTP client"""
|
|
187
|
+
await self.client.aclose()
|
|
@@ -214,6 +214,39 @@ Code Context:
|
|
|
214
214
|
"tokens_used": response.tokens_used,
|
|
215
215
|
}
|
|
216
216
|
|
|
217
|
+
async def stream_generate(
|
|
218
|
+
self, messages: List[Message], max_tokens: int = 2000, temperature: float = 0.7
|
|
219
|
+
):
|
|
220
|
+
"""Stream response tokens from Modal"""
|
|
221
|
+
import json as _json
|
|
222
|
+
try:
|
|
223
|
+
payload = {
|
|
224
|
+
"model": self.model,
|
|
225
|
+
"messages": [{"role": m.role, "content": m.content} for m in messages],
|
|
226
|
+
"max_tokens": max_tokens,
|
|
227
|
+
"temperature": temperature,
|
|
228
|
+
"stream": True,
|
|
229
|
+
}
|
|
230
|
+
async with self.client.stream("POST", self.API_ENDPOINT, json=payload) as response:
|
|
231
|
+
response.raise_for_status()
|
|
232
|
+
async for line in response.aiter_lines():
|
|
233
|
+
if not line:
|
|
234
|
+
continue
|
|
235
|
+
if line.startswith("data: "):
|
|
236
|
+
data_str = line[6:]
|
|
237
|
+
if data_str == "[DONE]":
|
|
238
|
+
break
|
|
239
|
+
try:
|
|
240
|
+
data = _json.loads(data_str)
|
|
241
|
+
delta = data["choices"][0].get("delta", {})
|
|
242
|
+
if "content" in delta:
|
|
243
|
+
yield delta["content"]
|
|
244
|
+
except Exception:
|
|
245
|
+
continue
|
|
246
|
+
except Exception as e:
|
|
247
|
+
logger.error(f"Modal stream error: {e}")
|
|
248
|
+
raise
|
|
249
|
+
|
|
217
250
|
async def close(self):
|
|
218
251
|
"""Close HTTP client"""
|
|
219
252
|
await self.client.aclose()
|
|
@@ -5,9 +5,10 @@ Fetches available models at runtime based on API keys.
|
|
|
5
5
|
|
|
6
6
|
import asyncio
|
|
7
7
|
import httpx
|
|
8
|
-
from dataclasses import dataclass
|
|
8
|
+
from dataclasses import dataclass, asdict
|
|
9
9
|
from typing import List, Optional
|
|
10
10
|
import logging
|
|
11
|
+
import json
|
|
11
12
|
|
|
12
13
|
logger = logging.getLogger(__name__)
|
|
13
14
|
|
|
@@ -26,6 +27,40 @@ class ModelInfo:
|
|
|
26
27
|
class ModelDiscovery:
|
|
27
28
|
"""Fetch available models from LLM providers."""
|
|
28
29
|
|
|
30
|
+
@staticmethod
|
|
31
|
+
async def fetch_generic_openai_compatible(
|
|
32
|
+
api_key: str, base_url: str, provider_name: str
|
|
33
|
+
) -> List[ModelInfo]:
|
|
34
|
+
"""Fetch models from an OpenAI-compatible API."""
|
|
35
|
+
try:
|
|
36
|
+
async with httpx.AsyncClient() as client:
|
|
37
|
+
response = await client.get(
|
|
38
|
+
f"{base_url.rstrip('/')}/models",
|
|
39
|
+
headers={"Authorization": f"Bearer {api_key}"},
|
|
40
|
+
timeout=10.0,
|
|
41
|
+
)
|
|
42
|
+
if response.status_code == 200:
|
|
43
|
+
data = response.json()
|
|
44
|
+
models = []
|
|
45
|
+
for model in data.get("data", []):
|
|
46
|
+
model_id = model.get("id", "")
|
|
47
|
+
models.append(
|
|
48
|
+
ModelInfo(
|
|
49
|
+
name=model_id,
|
|
50
|
+
display_name=model_id,
|
|
51
|
+
description=f"{provider_name} Model",
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
return models
|
|
55
|
+
else:
|
|
56
|
+
logger.warning(
|
|
57
|
+
f"{provider_name} API returned {response.status_code}"
|
|
58
|
+
)
|
|
59
|
+
return []
|
|
60
|
+
except Exception as e:
|
|
61
|
+
logger.warning(f"Failed to fetch {provider_name} models: {e}")
|
|
62
|
+
return []
|
|
63
|
+
|
|
29
64
|
@staticmethod
|
|
30
65
|
async def fetch_modal_models(api_key: str) -> List[ModelInfo]:
|
|
31
66
|
"""Fetch available models from Modal Z.ai API."""
|
|
@@ -39,7 +74,6 @@ class ModelDiscovery:
|
|
|
39
74
|
if response.status_code == 200:
|
|
40
75
|
data = response.json()
|
|
41
76
|
models = []
|
|
42
|
-
# Modal returns model data in "data" key
|
|
43
77
|
for model in data.get("data", []):
|
|
44
78
|
models.append(
|
|
45
79
|
ModelInfo(
|
|
@@ -52,31 +86,15 @@ class ModelDiscovery:
|
|
|
52
86
|
return (
|
|
53
87
|
models if models else await ModelDiscovery._get_modal_fallback()
|
|
54
88
|
)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
f"Modal API returned {response.status_code}, using fallback models"
|
|
58
|
-
)
|
|
59
|
-
return await ModelDiscovery._get_modal_fallback()
|
|
60
|
-
except Exception as e:
|
|
61
|
-
logger.warning(f"Failed to fetch Modal models: {e}, using fallback")
|
|
89
|
+
return await ModelDiscovery._get_modal_fallback()
|
|
90
|
+
except Exception:
|
|
62
91
|
return await ModelDiscovery._get_modal_fallback()
|
|
63
92
|
|
|
64
93
|
@staticmethod
|
|
65
94
|
async def _get_modal_fallback() -> List[ModelInfo]:
|
|
66
|
-
"""Fallback models for Modal when API unavailable."""
|
|
67
95
|
return [
|
|
68
|
-
ModelInfo(
|
|
69
|
-
|
|
70
|
-
display_name="GLM-5.1-FP8 (745B, Latest)",
|
|
71
|
-
max_tokens=4096,
|
|
72
|
-
description="Latest Modal Z.ai model - 745B parameters",
|
|
73
|
-
),
|
|
74
|
-
ModelInfo(
|
|
75
|
-
name="zai-org/GLM-4",
|
|
76
|
-
display_name="GLM-4 (370B, Legacy)",
|
|
77
|
-
max_tokens=2048,
|
|
78
|
-
description="Previous generation Modal model",
|
|
79
|
-
),
|
|
96
|
+
ModelInfo(name="zai-org/GLM-5.1-FP8", display_name="GLM-5.1-FP8 (Latest)"),
|
|
97
|
+
ModelInfo(name="zai-org/GLM-4", display_name="GLM-4 (Legacy)"),
|
|
80
98
|
]
|
|
81
99
|
|
|
82
100
|
@staticmethod
|
|
@@ -92,81 +110,32 @@ class ModelDiscovery:
|
|
|
92
110
|
if response.status_code == 200:
|
|
93
111
|
data = response.json()
|
|
94
112
|
models = []
|
|
95
|
-
|
|
96
|
-
suitable_models = {
|
|
97
|
-
"gpt-4o",
|
|
98
|
-
"gpt-4-turbo",
|
|
99
|
-
"gpt-4",
|
|
100
|
-
"gpt-3.5-turbo",
|
|
101
|
-
}
|
|
113
|
+
suitable_prefixes = {"gpt-4", "gpt-3.5", "o1-"}
|
|
102
114
|
for model in data.get("data", []):
|
|
103
115
|
model_id = model.get("id", "")
|
|
104
|
-
|
|
105
|
-
if any(
|
|
106
|
-
model_id.startswith(prefix) for prefix in suitable_models
|
|
107
|
-
):
|
|
116
|
+
if any(model_id.startswith(p) for p in suitable_prefixes):
|
|
108
117
|
models.append(
|
|
109
|
-
ModelInfo(
|
|
110
|
-
name=model_id,
|
|
111
|
-
display_name=model_id,
|
|
112
|
-
description=f"OpenAI - {model.get('owned_by', 'N/A')}",
|
|
113
|
-
)
|
|
118
|
+
ModelInfo(name=model_id, display_name=model_id)
|
|
114
119
|
)
|
|
115
|
-
# Sort by recency (gpt-4o > gpt-4-turbo > gpt-4 > gpt-3.5-turbo)
|
|
116
120
|
return (
|
|
117
|
-
|
|
118
|
-
models,
|
|
119
|
-
key=lambda x: (
|
|
120
|
-
not x.name.startswith("gpt-4o"),
|
|
121
|
-
not x.name.startswith("gpt-4-turbo"),
|
|
122
|
-
not x.name.startswith("gpt-4"),
|
|
123
|
-
),
|
|
124
|
-
)
|
|
121
|
+
models
|
|
125
122
|
if models
|
|
126
123
|
else await ModelDiscovery._get_openai_fallback()
|
|
127
124
|
)
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
f"OpenAI API returned {response.status_code}, using fallback models"
|
|
131
|
-
)
|
|
132
|
-
return await ModelDiscovery._get_openai_fallback()
|
|
133
|
-
except Exception as e:
|
|
134
|
-
logger.warning(f"Failed to fetch OpenAI models: {e}, using fallback")
|
|
125
|
+
return await ModelDiscovery._get_openai_fallback()
|
|
126
|
+
except Exception:
|
|
135
127
|
return await ModelDiscovery._get_openai_fallback()
|
|
136
128
|
|
|
137
129
|
@staticmethod
|
|
138
130
|
async def _get_openai_fallback() -> List[ModelInfo]:
|
|
139
|
-
"""Fallback models for OpenAI when API unavailable."""
|
|
140
131
|
return [
|
|
141
|
-
ModelInfo(
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
max_tokens=128000,
|
|
145
|
-
description="Best intelligence at scale for agentic, coding, and professional workflows. $2.50 input, $15 output per 1M tokens",
|
|
146
|
-
),
|
|
147
|
-
ModelInfo(
|
|
148
|
-
name="gpt-5.4-mini",
|
|
149
|
-
display_name="GPT-5.4-mini (Mini Model)",
|
|
150
|
-
max_tokens=128000,
|
|
151
|
-
description="Strongest mini model yet for coding, computer use, and agentic tasks. $0.75 input, $4.50 output per 1M tokens",
|
|
152
|
-
),
|
|
153
|
-
ModelInfo(
|
|
154
|
-
name="gpt-5.4-nano",
|
|
155
|
-
display_name="GPT-5.4-nano (Cheapest)",
|
|
156
|
-
max_tokens=128000,
|
|
157
|
-
description="Cheapest GPT-5.4-class model for simple high-volume tasks. $0.20 input, $1.25 output per 1M tokens",
|
|
158
|
-
),
|
|
159
|
-
ModelInfo(
|
|
160
|
-
name="gpt-4o",
|
|
161
|
-
display_name="GPT-4o (Previous High Quality)",
|
|
162
|
-
max_tokens=128000,
|
|
163
|
-
description="Previous latest model - optimized for speed and cost",
|
|
164
|
-
),
|
|
132
|
+
ModelInfo(name="gpt-4o", display_name="GPT-4o"),
|
|
133
|
+
ModelInfo(name="gpt-4o-mini", display_name="GPT-4o-mini"),
|
|
134
|
+
ModelInfo(name="o1-preview", display_name="o1-preview"),
|
|
165
135
|
]
|
|
166
136
|
|
|
167
137
|
@staticmethod
|
|
168
138
|
async def fetch_gemini_models(api_key: str) -> List[ModelInfo]:
|
|
169
|
-
"""Fetch available models from Google Gemini API."""
|
|
170
139
|
try:
|
|
171
140
|
async with httpx.AsyncClient() as client:
|
|
172
141
|
response = await client.get(
|
|
@@ -176,106 +145,53 @@ class ModelDiscovery:
|
|
|
176
145
|
if response.status_code == 200:
|
|
177
146
|
data = response.json()
|
|
178
147
|
models = []
|
|
179
|
-
# Filter to generative models
|
|
180
148
|
for model in data.get("models", []):
|
|
181
|
-
|
|
182
|
-
if "gemini" in
|
|
183
|
-
models.append(
|
|
184
|
-
ModelInfo(
|
|
185
|
-
name=model_name,
|
|
186
|
-
display_name=model_name,
|
|
187
|
-
description=f"Google Gemini - {model.get('displayName', 'N/A')}",
|
|
188
|
-
)
|
|
189
|
-
)
|
|
149
|
+
name = model.get("name", "").replace("models/", "")
|
|
150
|
+
if "gemini" in name.lower():
|
|
151
|
+
models.append(ModelInfo(name=name, display_name=name))
|
|
190
152
|
return (
|
|
191
153
|
models
|
|
192
154
|
if models
|
|
193
155
|
else await ModelDiscovery._get_gemini_fallback()
|
|
194
156
|
)
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
f"Gemini API returned {response.status_code}, using fallback models"
|
|
198
|
-
)
|
|
199
|
-
return await ModelDiscovery._get_gemini_fallback()
|
|
200
|
-
except Exception as e:
|
|
201
|
-
logger.warning(f"Failed to fetch Gemini models: {e}, using fallback")
|
|
157
|
+
return await ModelDiscovery._get_gemini_fallback()
|
|
158
|
+
except Exception:
|
|
202
159
|
return await ModelDiscovery._get_gemini_fallback()
|
|
203
160
|
|
|
204
161
|
@staticmethod
|
|
205
162
|
async def _get_gemini_fallback() -> List[ModelInfo]:
|
|
206
|
-
|
|
163
|
+
return [
|
|
164
|
+
ModelInfo(name="gemini-1.5-pro", display_name="Gemini 1.5 Pro"),
|
|
165
|
+
ModelInfo(name="gemini-1.5-flash", display_name="Gemini 1.5 Flash"),
|
|
166
|
+
]
|
|
167
|
+
|
|
168
|
+
@staticmethod
|
|
169
|
+
async def _get_claude_fallback() -> List[ModelInfo]:
|
|
207
170
|
return [
|
|
208
171
|
ModelInfo(
|
|
209
|
-
name="
|
|
210
|
-
display_name="Gemini 3.1 Pro (Latest SOTA Reasoning)",
|
|
211
|
-
max_tokens=200000,
|
|
212
|
-
description="Latest SOTA reasoning model with unprecedented depth and nuance. $2 input, $12 output per context window",
|
|
213
|
-
),
|
|
214
|
-
ModelInfo(
|
|
215
|
-
name="gemini-3-flash",
|
|
216
|
-
display_name="Gemini 3 Flash (Latest, Fastest)",
|
|
217
|
-
max_tokens=200000,
|
|
218
|
-
description="Most intelligent model built for speed, combining frontier intelligence with superior search and grounding",
|
|
219
|
-
),
|
|
220
|
-
ModelInfo(
|
|
221
|
-
name="gemini-3.1-flash-lite",
|
|
222
|
-
display_name="Gemini 3.1 Flash Lite (Cheapest)",
|
|
223
|
-
max_tokens=200000,
|
|
224
|
-
description="Most cost-efficient model, optimized for high-volume agentic tasks. $0.25 input, $1.50 output",
|
|
225
|
-
),
|
|
226
|
-
ModelInfo(
|
|
227
|
-
name="gemini-2.0-flash",
|
|
228
|
-
display_name="Gemini 2.0 Flash (Previous)",
|
|
229
|
-
max_tokens=2000,
|
|
230
|
-
description="Previous generation Gemini model",
|
|
172
|
+
name="claude-3-5-sonnet-20240620", display_name="Claude 3.5 Sonnet"
|
|
231
173
|
),
|
|
174
|
+
ModelInfo(name="claude-3-opus-20240229", display_name="Claude 3 Opus"),
|
|
175
|
+
ModelInfo(name="claude-3-haiku-20240307", display_name="Claude 3 Haiku"),
|
|
232
176
|
]
|
|
233
177
|
|
|
234
178
|
@staticmethod
|
|
235
179
|
async def fetch_claude_models(api_key: str) -> List[ModelInfo]:
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
async with httpx.AsyncClient() as client:
|
|
239
|
-
# Claude doesn't have a public models endpoint, use documented models
|
|
240
|
-
# Make a test call to verify API key works
|
|
241
|
-
response = await client.get(
|
|
242
|
-
"https://api.anthropic.com/v1/models",
|
|
243
|
-
headers={"x-api-key": api_key},
|
|
244
|
-
timeout=10.0,
|
|
245
|
-
)
|
|
246
|
-
# If we get here, API key works - return known models
|
|
247
|
-
return await ModelDiscovery._get_claude_fallback()
|
|
248
|
-
except Exception as e:
|
|
249
|
-
logger.warning(f"Failed to verify Claude API: {e}, returning known models")
|
|
250
|
-
return await ModelDiscovery._get_claude_fallback()
|
|
180
|
+
# Anthropic doesn't have a models endpoint, just return hardcoded
|
|
181
|
+
return await ModelDiscovery._get_claude_fallback()
|
|
251
182
|
|
|
252
183
|
@staticmethod
|
|
253
|
-
async def
|
|
254
|
-
"""Known Claude models (Anthropic doesn't provide list endpoint)."""
|
|
184
|
+
async def _get_minimax_fallback() -> List[ModelInfo]:
|
|
255
185
|
return [
|
|
256
186
|
ModelInfo(
|
|
257
|
-
name="
|
|
258
|
-
display_name="
|
|
259
|
-
|
|
260
|
-
description="Most intelligent broadly available model for complex reasoning. $5 input, $25 output per 1M tokens",
|
|
261
|
-
),
|
|
262
|
-
ModelInfo(
|
|
263
|
-
name="claude-sonnet-4.6",
|
|
264
|
-
display_name="Claude Sonnet 4.6 (Best Value)",
|
|
265
|
-
max_tokens=200000,
|
|
266
|
-
description="Best balance of speed and intelligence. $3 input, $15 output per 1M tokens",
|
|
267
|
-
),
|
|
268
|
-
ModelInfo(
|
|
269
|
-
name="claude-haiku-4.5",
|
|
270
|
-
display_name="Claude Haiku 4.5 (Fastest, Cheapest)",
|
|
271
|
-
max_tokens=200000,
|
|
272
|
-
description="Fast and cost-efficient for simpler tasks. $0.80 input, $4 output per 1M tokens",
|
|
187
|
+
name="minimaxai/minimax-m2.7",
|
|
188
|
+
display_name="MiniMax M2.7",
|
|
189
|
+
description="MiniMax M2.7 via NVIDIA",
|
|
273
190
|
),
|
|
274
191
|
]
|
|
275
192
|
|
|
276
193
|
@staticmethod
|
|
277
194
|
async def discover_all(provider: str, api_key: str) -> List[ModelInfo]:
|
|
278
|
-
"""Discover all models for a given provider."""
|
|
279
195
|
provider = provider.lower().strip()
|
|
280
196
|
if provider == "modal":
|
|
281
197
|
return await ModelDiscovery.fetch_modal_models(api_key)
|
|
@@ -285,5 +201,38 @@ class ModelDiscovery:
|
|
|
285
201
|
return await ModelDiscovery.fetch_gemini_models(api_key)
|
|
286
202
|
elif provider == "claude":
|
|
287
203
|
return await ModelDiscovery.fetch_claude_models(api_key)
|
|
204
|
+
elif provider == "deepseek":
|
|
205
|
+
return await ModelDiscovery.fetch_generic_openai_compatible(
|
|
206
|
+
api_key, "https://api.deepseek.com/v1", "DeepSeek"
|
|
207
|
+
)
|
|
208
|
+
elif provider == "kimi":
|
|
209
|
+
return await ModelDiscovery.fetch_generic_openai_compatible(
|
|
210
|
+
api_key, "https://api.moonshot.cn/v1", "Kimi"
|
|
211
|
+
)
|
|
212
|
+
elif provider == "minimax":
|
|
213
|
+
return await ModelDiscovery.fetch_generic_openai_compatible(
|
|
214
|
+
api_key, "https://integrate.api.nvidia.com/v1", "MiniMax"
|
|
215
|
+
)
|
|
216
|
+
elif provider == "siliconflow":
|
|
217
|
+
return await ModelDiscovery.fetch_generic_openai_compatible(
|
|
218
|
+
api_key, "https://api.siliconflow.cn/v1", "SiliconFlow"
|
|
219
|
+
)
|
|
288
220
|
else:
|
|
289
|
-
|
|
221
|
+
return []
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
if __name__ == "__main__":
|
|
225
|
+
import sys
|
|
226
|
+
|
|
227
|
+
if len(sys.argv) < 3:
|
|
228
|
+
print(json.dumps([]))
|
|
229
|
+
sys.exit(0)
|
|
230
|
+
|
|
231
|
+
provider = sys.argv[1]
|
|
232
|
+
api_key = sys.argv[2]
|
|
233
|
+
|
|
234
|
+
async def main():
|
|
235
|
+
models = await ModelDiscovery.discover_all(provider, api_key)
|
|
236
|
+
print(json.dumps([asdict(m) for m in models]))
|
|
237
|
+
|
|
238
|
+
asyncio.run(main())
|
|
@@ -141,6 +141,39 @@ Code:
|
|
|
141
141
|
"tokens_used": response.tokens_used,
|
|
142
142
|
}
|
|
143
143
|
|
|
144
|
+
async def stream_generate(
|
|
145
|
+
self, messages: List[Message], max_tokens: int = 2000, temperature: float = 0.7
|
|
146
|
+
):
|
|
147
|
+
"""Stream response tokens from OpenAI"""
|
|
148
|
+
import json as _json
|
|
149
|
+
try:
|
|
150
|
+
payload = {
|
|
151
|
+
"model": self.model,
|
|
152
|
+
"messages": [{"role": m.role, "content": m.content} for m in messages],
|
|
153
|
+
"max_tokens": max_tokens,
|
|
154
|
+
"temperature": temperature,
|
|
155
|
+
"stream": True,
|
|
156
|
+
}
|
|
157
|
+
async with self.client.stream("POST", self.API_ENDPOINT, json=payload) as response:
|
|
158
|
+
response.raise_for_status()
|
|
159
|
+
async for line in response.aiter_lines():
|
|
160
|
+
if not line:
|
|
161
|
+
continue
|
|
162
|
+
if line.startswith("data: "):
|
|
163
|
+
data_str = line[6:]
|
|
164
|
+
if data_str == "[DONE]":
|
|
165
|
+
break
|
|
166
|
+
try:
|
|
167
|
+
data = _json.loads(data_str)
|
|
168
|
+
delta = data["choices"][0].get("delta", {})
|
|
169
|
+
if "content" in delta:
|
|
170
|
+
yield delta["content"]
|
|
171
|
+
except Exception:
|
|
172
|
+
continue
|
|
173
|
+
except Exception as e:
|
|
174
|
+
logger.error(f"OpenAI stream error: {e}")
|
|
175
|
+
raise
|
|
176
|
+
|
|
144
177
|
async def close(self):
|
|
145
178
|
"""Close HTTP client"""
|
|
146
179
|
await self.client.aclose()
|