sandboxy 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sandboxy/__init__.py +3 -0
- sandboxy/agents/__init__.py +21 -0
- sandboxy/agents/base.py +66 -0
- sandboxy/agents/llm_prompt.py +308 -0
- sandboxy/agents/loader.py +222 -0
- sandboxy/api/__init__.py +5 -0
- sandboxy/api/app.py +76 -0
- sandboxy/api/routes/__init__.py +1 -0
- sandboxy/api/routes/agents.py +92 -0
- sandboxy/api/routes/local.py +1388 -0
- sandboxy/api/routes/tools.py +106 -0
- sandboxy/cli/__init__.py +1 -0
- sandboxy/cli/main.py +1196 -0
- sandboxy/cli/type_detector.py +48 -0
- sandboxy/config.py +49 -0
- sandboxy/core/__init__.py +1 -0
- sandboxy/core/async_runner.py +824 -0
- sandboxy/core/mdl_parser.py +441 -0
- sandboxy/core/runner.py +599 -0
- sandboxy/core/safe_eval.py +165 -0
- sandboxy/core/state.py +234 -0
- sandboxy/datasets/__init__.py +20 -0
- sandboxy/datasets/loader.py +193 -0
- sandboxy/datasets/runner.py +442 -0
- sandboxy/errors.py +166 -0
- sandboxy/local/context.py +235 -0
- sandboxy/local/results.py +173 -0
- sandboxy/logging.py +31 -0
- sandboxy/mcp/__init__.py +25 -0
- sandboxy/mcp/client.py +360 -0
- sandboxy/mcp/wrapper.py +99 -0
- sandboxy/providers/__init__.py +34 -0
- sandboxy/providers/anthropic_provider.py +271 -0
- sandboxy/providers/base.py +123 -0
- sandboxy/providers/http_client.py +101 -0
- sandboxy/providers/openai_provider.py +282 -0
- sandboxy/providers/openrouter.py +958 -0
- sandboxy/providers/registry.py +199 -0
- sandboxy/scenarios/__init__.py +11 -0
- sandboxy/scenarios/comparison.py +491 -0
- sandboxy/scenarios/loader.py +262 -0
- sandboxy/scenarios/runner.py +468 -0
- sandboxy/scenarios/unified.py +1434 -0
- sandboxy/session/__init__.py +21 -0
- sandboxy/session/manager.py +278 -0
- sandboxy/tools/__init__.py +34 -0
- sandboxy/tools/base.py +127 -0
- sandboxy/tools/loader.py +270 -0
- sandboxy/tools/yaml_tools.py +708 -0
- sandboxy/ui/__init__.py +27 -0
- sandboxy/ui/dist/assets/index-CgAkYWrJ.css +1 -0
- sandboxy/ui/dist/assets/index-D4zoGFcr.js +347 -0
- sandboxy/ui/dist/index.html +14 -0
- sandboxy/utils/__init__.py +3 -0
- sandboxy/utils/time.py +20 -0
- sandboxy-0.0.1.dist-info/METADATA +241 -0
- sandboxy-0.0.1.dist-info/RECORD +60 -0
- sandboxy-0.0.1.dist-info/WHEEL +4 -0
- sandboxy-0.0.1.dist-info/entry_points.txt +3 -0
- sandboxy-0.0.1.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
"""Direct OpenAI provider."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
from collections.abc import AsyncIterator
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from sandboxy.providers.base import BaseProvider, ModelInfo, ModelResponse, ProviderError
|
|
9
|
+
|
|
10
|
+
OPENAI_MODELS = {
|
|
11
|
+
# GPT-5.2 Series (latest)
|
|
12
|
+
"gpt-5.2-pro": ModelInfo(
|
|
13
|
+
id="gpt-5.2-pro",
|
|
14
|
+
name="GPT-5.2 Pro",
|
|
15
|
+
provider="openai",
|
|
16
|
+
context_length=200000,
|
|
17
|
+
input_cost_per_million=5.00,
|
|
18
|
+
output_cost_per_million=20.00,
|
|
19
|
+
supports_vision=True,
|
|
20
|
+
),
|
|
21
|
+
"gpt-5.2": ModelInfo(
|
|
22
|
+
id="gpt-5.2",
|
|
23
|
+
name="GPT-5.2",
|
|
24
|
+
provider="openai",
|
|
25
|
+
context_length=128000,
|
|
26
|
+
input_cost_per_million=2.50,
|
|
27
|
+
output_cost_per_million=10.00,
|
|
28
|
+
supports_vision=True,
|
|
29
|
+
),
|
|
30
|
+
"gpt-5.2-chat": ModelInfo(
|
|
31
|
+
id="gpt-5.2-chat",
|
|
32
|
+
name="GPT-5.2 Chat",
|
|
33
|
+
provider="openai",
|
|
34
|
+
context_length=128000,
|
|
35
|
+
input_cost_per_million=1.00,
|
|
36
|
+
output_cost_per_million=4.00,
|
|
37
|
+
supports_vision=True,
|
|
38
|
+
),
|
|
39
|
+
# GPT-5.1 Series
|
|
40
|
+
"gpt-5.1": ModelInfo(
|
|
41
|
+
id="gpt-5.1",
|
|
42
|
+
name="GPT-5.1",
|
|
43
|
+
provider="openai",
|
|
44
|
+
context_length=128000,
|
|
45
|
+
input_cost_per_million=2.00,
|
|
46
|
+
output_cost_per_million=8.00,
|
|
47
|
+
supports_vision=True,
|
|
48
|
+
),
|
|
49
|
+
"gpt-5.1-codex": ModelInfo(
|
|
50
|
+
id="gpt-5.1-codex",
|
|
51
|
+
name="GPT-5.1 Codex",
|
|
52
|
+
provider="openai",
|
|
53
|
+
context_length=128000,
|
|
54
|
+
input_cost_per_million=2.50,
|
|
55
|
+
output_cost_per_million=10.00,
|
|
56
|
+
),
|
|
57
|
+
# GPT-5 Series
|
|
58
|
+
"gpt-5": ModelInfo(
|
|
59
|
+
id="gpt-5",
|
|
60
|
+
name="GPT-5",
|
|
61
|
+
provider="openai",
|
|
62
|
+
context_length=128000,
|
|
63
|
+
input_cost_per_million=5.00,
|
|
64
|
+
output_cost_per_million=20.00,
|
|
65
|
+
),
|
|
66
|
+
"gpt-5-image": ModelInfo(
|
|
67
|
+
id="gpt-5-image",
|
|
68
|
+
name="GPT-5 Image",
|
|
69
|
+
provider="openai",
|
|
70
|
+
context_length=128000,
|
|
71
|
+
input_cost_per_million=3.00,
|
|
72
|
+
output_cost_per_million=12.00,
|
|
73
|
+
supports_vision=True,
|
|
74
|
+
),
|
|
75
|
+
"gpt-5-mini": ModelInfo(
|
|
76
|
+
id="gpt-5-mini",
|
|
77
|
+
name="GPT-5 Mini",
|
|
78
|
+
provider="openai",
|
|
79
|
+
context_length=128000,
|
|
80
|
+
input_cost_per_million=1.00,
|
|
81
|
+
output_cost_per_million=4.00,
|
|
82
|
+
),
|
|
83
|
+
"gpt-5-nano": ModelInfo(
|
|
84
|
+
id="gpt-5-nano",
|
|
85
|
+
name="GPT-5 Nano",
|
|
86
|
+
provider="openai",
|
|
87
|
+
context_length=128000,
|
|
88
|
+
input_cost_per_million=0.50,
|
|
89
|
+
output_cost_per_million=2.00,
|
|
90
|
+
),
|
|
91
|
+
# o-Series (Reasoning)
|
|
92
|
+
"o3-deep-research": ModelInfo(
|
|
93
|
+
id="o3-deep-research",
|
|
94
|
+
name="o3 Deep Research",
|
|
95
|
+
provider="openai",
|
|
96
|
+
context_length=200000,
|
|
97
|
+
input_cost_per_million=20.00,
|
|
98
|
+
output_cost_per_million=80.00,
|
|
99
|
+
),
|
|
100
|
+
"o1": ModelInfo(
|
|
101
|
+
id="o1",
|
|
102
|
+
name="o1",
|
|
103
|
+
provider="openai",
|
|
104
|
+
context_length=200000,
|
|
105
|
+
input_cost_per_million=15.00,
|
|
106
|
+
output_cost_per_million=60.00,
|
|
107
|
+
),
|
|
108
|
+
"o1-mini": ModelInfo(
|
|
109
|
+
id="o1-mini",
|
|
110
|
+
name="o1 Mini",
|
|
111
|
+
provider="openai",
|
|
112
|
+
context_length=128000,
|
|
113
|
+
input_cost_per_million=3.00,
|
|
114
|
+
output_cost_per_million=12.00,
|
|
115
|
+
),
|
|
116
|
+
# GPT-4 Series (legacy)
|
|
117
|
+
"gpt-4o": ModelInfo(
|
|
118
|
+
id="gpt-4o",
|
|
119
|
+
name="GPT-4o",
|
|
120
|
+
provider="openai",
|
|
121
|
+
context_length=128000,
|
|
122
|
+
input_cost_per_million=2.50,
|
|
123
|
+
output_cost_per_million=10.00,
|
|
124
|
+
supports_vision=True,
|
|
125
|
+
),
|
|
126
|
+
"gpt-4o-mini": ModelInfo(
|
|
127
|
+
id="gpt-4o-mini",
|
|
128
|
+
name="GPT-4o Mini",
|
|
129
|
+
provider="openai",
|
|
130
|
+
context_length=128000,
|
|
131
|
+
input_cost_per_million=0.15,
|
|
132
|
+
output_cost_per_million=0.60,
|
|
133
|
+
supports_vision=True,
|
|
134
|
+
),
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class OpenAIProvider(BaseProvider):
|
|
139
|
+
"""Direct OpenAI API provider.
|
|
140
|
+
|
|
141
|
+
Use this when you have an OpenAI API key and want to call
|
|
142
|
+
OpenAI models directly (potentially lower latency than OpenRouter).
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
provider_name = "openai"
|
|
146
|
+
|
|
147
|
+
def __init__(self, api_key: str | None = None):
|
|
148
|
+
"""Initialize OpenAI provider.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
api_key: OpenAI API key. If not provided, reads from
|
|
152
|
+
OPENAI_API_KEY environment variable.
|
|
153
|
+
|
|
154
|
+
"""
|
|
155
|
+
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
|
156
|
+
if not self.api_key:
|
|
157
|
+
raise ProviderError(
|
|
158
|
+
"API key required. Set OPENAI_API_KEY or pass api_key.",
|
|
159
|
+
provider=self.provider_name,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Lazy import to avoid requiring openai package if not used
|
|
163
|
+
try:
|
|
164
|
+
from openai import AsyncOpenAI
|
|
165
|
+
|
|
166
|
+
self.client = AsyncOpenAI(api_key=self.api_key)
|
|
167
|
+
except ImportError as e:
|
|
168
|
+
raise ProviderError(
|
|
169
|
+
"openai package required. Install with: pip install openai",
|
|
170
|
+
provider=self.provider_name,
|
|
171
|
+
) from e
|
|
172
|
+
|
|
173
|
+
async def complete(
|
|
174
|
+
self,
|
|
175
|
+
model: str,
|
|
176
|
+
messages: list[dict[str, Any]],
|
|
177
|
+
temperature: float = 0.7,
|
|
178
|
+
max_tokens: int = 1024,
|
|
179
|
+
**kwargs: Any,
|
|
180
|
+
) -> ModelResponse:
|
|
181
|
+
"""Send completion request to OpenAI."""
|
|
182
|
+
start_time = time.time()
|
|
183
|
+
|
|
184
|
+
# Handle model-specific parameters
|
|
185
|
+
completion_kwargs: dict[str, Any] = {
|
|
186
|
+
"model": model,
|
|
187
|
+
"messages": messages,
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
# GPT-5 (all variants) and o1/o3 reasoning models don't support temperature
|
|
191
|
+
if not any(x in model for x in ["gpt-5", "o1", "o3"]):
|
|
192
|
+
completion_kwargs["temperature"] = temperature
|
|
193
|
+
|
|
194
|
+
# GPT-5 models use max_completion_tokens
|
|
195
|
+
if "gpt-5" in model or "gpt-4o" in model:
|
|
196
|
+
completion_kwargs["max_completion_tokens"] = max_tokens
|
|
197
|
+
else:
|
|
198
|
+
completion_kwargs["max_tokens"] = max_tokens
|
|
199
|
+
|
|
200
|
+
# Add any extra kwargs
|
|
201
|
+
completion_kwargs.update(kwargs)
|
|
202
|
+
|
|
203
|
+
try:
|
|
204
|
+
response = await self.client.chat.completions.create(**completion_kwargs)
|
|
205
|
+
except Exception as e:
|
|
206
|
+
raise ProviderError(
|
|
207
|
+
str(e),
|
|
208
|
+
provider=self.provider_name,
|
|
209
|
+
model=model,
|
|
210
|
+
) from e
|
|
211
|
+
|
|
212
|
+
latency_ms = int((time.time() - start_time) * 1000)
|
|
213
|
+
|
|
214
|
+
choice = response.choices[0]
|
|
215
|
+
usage = response.usage
|
|
216
|
+
|
|
217
|
+
input_tokens = usage.prompt_tokens if usage else 0
|
|
218
|
+
output_tokens = usage.completion_tokens if usage else 0
|
|
219
|
+
cost = self._calculate_cost(model, input_tokens, output_tokens)
|
|
220
|
+
|
|
221
|
+
return ModelResponse(
|
|
222
|
+
content=choice.message.content or "",
|
|
223
|
+
model_id=response.model,
|
|
224
|
+
latency_ms=latency_ms,
|
|
225
|
+
input_tokens=input_tokens,
|
|
226
|
+
output_tokens=output_tokens,
|
|
227
|
+
cost_usd=cost,
|
|
228
|
+
finish_reason=choice.finish_reason,
|
|
229
|
+
raw_response=response.model_dump(),
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
async def stream(
|
|
233
|
+
self,
|
|
234
|
+
model: str,
|
|
235
|
+
messages: list[dict[str, Any]],
|
|
236
|
+
temperature: float = 0.7,
|
|
237
|
+
max_tokens: int = 1024,
|
|
238
|
+
**kwargs: Any,
|
|
239
|
+
) -> AsyncIterator[str]:
|
|
240
|
+
"""Stream completion response from OpenAI."""
|
|
241
|
+
completion_kwargs: dict[str, Any] = {
|
|
242
|
+
"model": model,
|
|
243
|
+
"messages": messages,
|
|
244
|
+
"stream": True,
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
# GPT-5 (all variants) and o1/o3 reasoning models don't support temperature
|
|
248
|
+
if not any(x in model for x in ["gpt-5", "o1", "o3"]):
|
|
249
|
+
completion_kwargs["temperature"] = temperature
|
|
250
|
+
|
|
251
|
+
if "gpt-5" in model or "gpt-4o" in model:
|
|
252
|
+
completion_kwargs["max_completion_tokens"] = max_tokens
|
|
253
|
+
else:
|
|
254
|
+
completion_kwargs["max_tokens"] = max_tokens
|
|
255
|
+
|
|
256
|
+
completion_kwargs.update(kwargs)
|
|
257
|
+
|
|
258
|
+
try:
|
|
259
|
+
stream = await self.client.chat.completions.create(**completion_kwargs)
|
|
260
|
+
async for chunk in stream:
|
|
261
|
+
if chunk.choices and chunk.choices[0].delta.content:
|
|
262
|
+
yield chunk.choices[0].delta.content
|
|
263
|
+
except Exception as e:
|
|
264
|
+
raise ProviderError(
|
|
265
|
+
str(e),
|
|
266
|
+
provider=self.provider_name,
|
|
267
|
+
model=model,
|
|
268
|
+
) from e
|
|
269
|
+
|
|
270
|
+
def list_models(self) -> list[ModelInfo]:
|
|
271
|
+
"""List available OpenAI models."""
|
|
272
|
+
return list(OPENAI_MODELS.values())
|
|
273
|
+
|
|
274
|
+
def _calculate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float | None:
|
|
275
|
+
"""Calculate cost in USD for a request."""
|
|
276
|
+
model_info = OPENAI_MODELS.get(model)
|
|
277
|
+
if not model_info or not model_info.input_cost_per_million:
|
|
278
|
+
return None
|
|
279
|
+
|
|
280
|
+
input_cost = (input_tokens / 1_000_000) * model_info.input_cost_per_million
|
|
281
|
+
output_cost = (output_tokens / 1_000_000) * (model_info.output_cost_per_million or 0)
|
|
282
|
+
return round(input_cost + output_cost, 6)
|