sandboxy 0.0.4__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. {sandboxy-0.0.4 → sandboxy-0.0.5}/PKG-INFO +67 -27
  2. {sandboxy-0.0.4 → sandboxy-0.0.5}/README.md +66 -26
  3. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/components/ModelSelector.tsx +66 -17
  4. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/lib/api.ts +87 -2
  5. sandboxy-0.0.5/local-ui/src/pages/DashboardPage.tsx +416 -0
  6. {sandboxy-0.0.4 → sandboxy-0.0.5}/pyproject.toml +1 -1
  7. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/agents/llm_prompt.py +85 -14
  8. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/api/app.py +2 -1
  9. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/api/routes/local.py +34 -1
  10. sandboxy-0.0.5/sandboxy/api/routes/providers.py +369 -0
  11. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/cli/main.py +371 -0
  12. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/mlflow/exporter.py +7 -1
  13. sandboxy-0.0.5/sandboxy/providers/__init__.py +68 -0
  14. sandboxy-0.0.5/sandboxy/providers/config.py +243 -0
  15. sandboxy-0.0.5/sandboxy/providers/local.py +498 -0
  16. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/providers/registry.py +107 -13
  17. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/scenarios/unified.py +27 -3
  18. sandboxy-0.0.5/sandboxy/ui/dist/assets/index-CLxxjJuD.js +367 -0
  19. sandboxy-0.0.5/sandboxy/ui/dist/assets/index-DBB7ehs6.css +1 -0
  20. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/ui/dist/index.html +2 -2
  21. sandboxy-0.0.4/local-ui/src/pages/DashboardPage.tsx +0 -163
  22. sandboxy-0.0.4/sandboxy/providers/__init__.py +0 -34
  23. sandboxy-0.0.4/sandboxy/ui/dist/assets/index-CU06wBqc.js +0 -362
  24. sandboxy-0.0.4/sandboxy/ui/dist/assets/index-Cgg2wY2m.css +0 -1
  25. {sandboxy-0.0.4 → sandboxy-0.0.5}/.env.example +0 -0
  26. {sandboxy-0.0.4 → sandboxy-0.0.5}/.github/workflows/ci.yml +0 -0
  27. {sandboxy-0.0.4 → sandboxy-0.0.5}/.github/workflows/publish.yml +0 -0
  28. {sandboxy-0.0.4 → sandboxy-0.0.5}/.gitignore +0 -0
  29. {sandboxy-0.0.4 → sandboxy-0.0.5}/CONTRIBUTING.md +0 -0
  30. {sandboxy-0.0.4 → sandboxy-0.0.5}/LICENSE +0 -0
  31. {sandboxy-0.0.4 → sandboxy-0.0.5}/Makefile +0 -0
  32. {sandboxy-0.0.4 → sandboxy-0.0.5}/docs/yaml-tools.md +0 -0
  33. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/index.html +0 -0
  34. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/package-lock.json +0 -0
  35. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/package.json +0 -0
  36. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/postcss.config.js +0 -0
  37. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/App.tsx +0 -0
  38. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/components/Layout.tsx +0 -0
  39. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/components/ResultDisplay.tsx +0 -0
  40. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/hooks/useScenarioBuilder.ts +0 -0
  41. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/hooks/useScenarioRun.ts +0 -0
  42. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/hooks/useToolBuilder.ts +0 -0
  43. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/index.css +0 -0
  44. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/main.tsx +0 -0
  45. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/pages/BuilderPage.tsx +0 -0
  46. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/pages/DatasetPage.tsx +0 -0
  47. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/pages/ResultsPage.tsx +0 -0
  48. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/pages/RunPage.tsx +0 -0
  49. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/src/pages/ToolBuilderPage.tsx +0 -0
  50. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/tailwind.config.js +0 -0
  51. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/tsconfig.json +0 -0
  52. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/tsconfig.node.json +0 -0
  53. {sandboxy-0.0.4 → sandboxy-0.0.5}/local-ui/vite.config.ts +0 -0
  54. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/__init__.py +0 -0
  55. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/agents/__init__.py +0 -0
  56. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/agents/base.py +0 -0
  57. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/agents/loader.py +0 -0
  58. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/api/__init__.py +0 -0
  59. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/api/routes/__init__.py +0 -0
  60. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/api/routes/agents.py +0 -0
  61. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/api/routes/tools.py +0 -0
  62. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/cli/__init__.py +0 -0
  63. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/cli/type_detector.py +0 -0
  64. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/config.py +0 -0
  65. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/core/__init__.py +0 -0
  66. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/core/async_runner.py +0 -0
  67. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/core/mdl_parser.py +0 -0
  68. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/core/runner.py +0 -0
  69. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/core/safe_eval.py +0 -0
  70. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/core/state.py +0 -0
  71. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/datasets/__init__.py +0 -0
  72. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/datasets/loader.py +0 -0
  73. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/datasets/runner.py +0 -0
  74. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/errors.py +0 -0
  75. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/local/context.py +0 -0
  76. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/local/results.py +0 -0
  77. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/logging.py +0 -0
  78. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/mcp/__init__.py +0 -0
  79. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/mcp/client.py +0 -0
  80. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/mcp/wrapper.py +0 -0
  81. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/mlflow/__init__.py +0 -0
  82. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/mlflow/artifacts.py +0 -0
  83. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/mlflow/config.py +0 -0
  84. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/mlflow/metrics.py +0 -0
  85. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/mlflow/tags.py +0 -0
  86. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/mlflow/tracing.py +0 -0
  87. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/providers/anthropic_provider.py +0 -0
  88. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/providers/base.py +0 -0
  89. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/providers/http_client.py +0 -0
  90. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/providers/openai_provider.py +0 -0
  91. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/providers/openrouter.py +0 -0
  92. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/scenarios/__init__.py +0 -0
  93. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/scenarios/comparison.py +0 -0
  94. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/scenarios/loader.py +0 -0
  95. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/scenarios/runner.py +0 -0
  96. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/session/__init__.py +0 -0
  97. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/session/manager.py +0 -0
  98. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/tools/__init__.py +0 -0
  99. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/tools/base.py +0 -0
  100. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/tools/loader.py +0 -0
  101. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/tools/yaml_tools.py +0 -0
  102. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/ui/__init__.py +0 -0
  103. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/utils/__init__.py +0 -0
  104. {sandboxy-0.0.4 → sandboxy-0.0.5}/sandboxy/utils/time.py +0 -0
  105. {sandboxy-0.0.4 → sandboxy-0.0.5}/scenarios/customer_service.yml +0 -0
  106. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/__init__.py +0 -0
  107. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/conftest.py +0 -0
  108. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/factories.py +0 -0
  109. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/integration/__init__.py +0 -0
  110. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/integration/api/__init__.py +0 -0
  111. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/integration/test_mlflow_integration.py +0 -0
  112. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/mocks/__init__.py +0 -0
  113. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/mocks/providers.py +0 -0
  114. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/__init__.py +0 -0
  115. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/agents/__init__.py +0 -0
  116. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/agents/test_base.py +0 -0
  117. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/agents/test_llm_prompt.py +0 -0
  118. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/agents/test_loader.py +0 -0
  119. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/core/__init__.py +0 -0
  120. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/core/test_async_runner.py +0 -0
  121. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/core/test_mdl_parser.py +0 -0
  122. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/core/test_runner.py +0 -0
  123. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/core/test_safe_eval.py +0 -0
  124. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/core/test_state.py +0 -0
  125. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/mlflow/__init__.py +0 -0
  126. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/mlflow/test_artifacts.py +0 -0
  127. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/mlflow/test_config.py +0 -0
  128. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/mlflow/test_metrics.py +0 -0
  129. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/mlflow/test_tags.py +0 -0
  130. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/providers/test_openrouter.py +0 -0
  131. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/tools/__init__.py +0 -0
  132. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/tools/test_base.py +0 -0
  133. {sandboxy-0.0.4 → sandboxy-0.0.5}/tests/unit/tools/test_loader.py +0 -0
  134. {sandboxy-0.0.4 → sandboxy-0.0.5}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sandboxy
3
- Version: 0.0.4
3
+ Version: 0.0.5
4
4
  Summary: Open-source agent simulation and benchmarking platform
5
5
  Project-URL: Homepage, https://github.com/sandboxy-ai/sandboxy
6
6
  Project-URL: Repository, https://github.com/sandboxy-ai/sandboxy
@@ -120,7 +120,37 @@ Opens a browser with a local UI for browsing scenarios, running them, and viewin
120
120
 
121
121
  ## Writing Scenarios
122
122
 
123
- Scenarios are YAML files that define agent interactions:
123
+ Scenarios are YAML files that define agent interactions. Sandboxy supports two modes:
124
+
125
+ ### Single-turn mode
126
+
127
+ Use `prompt:` for simple request/response scenarios without tool use:
128
+
129
+ ```yaml
130
+ id: simple-qa
131
+ name: "Simple Q&A"
132
+
133
+ system_prompt: |
134
+ You are a helpful assistant.
135
+
136
+ prompt: |
137
+ What is the capital of France?
138
+
139
+ evaluation:
140
+ max_score: 100
141
+ goals:
142
+ - id: correct_answer
143
+ name: "Correct Answer"
144
+ points: 100
145
+ detection:
146
+ type: agent_contains
147
+ patterns:
148
+ - "Paris"
149
+ ```
150
+
151
+ ### Agentic mode
152
+
153
+ Use `steps:` for multi-turn scenarios with tool support:
124
154
 
125
155
  ```yaml
126
156
  id: customer-support
@@ -131,35 +161,45 @@ system_prompt: |
131
161
  You are a customer support agent for TechCo.
132
162
  Be helpful but follow company policy.
133
163
 
134
- user_prompt: |
135
- I want a refund for my purchase. Order #12345.
164
+ steps:
165
+ - id: user_request
166
+ action: inject_user
167
+ params:
168
+ content: "I want a refund for my purchase. Order #12345."
169
+ - id: agent_response
170
+ action: await_agent
136
171
 
137
- # Define tools the agent can use
172
+ # Tools are only available in agentic mode (with steps)
138
173
  tools:
139
- - name: lookup_order
174
+ lookup_order:
140
175
  description: "Look up order details"
141
- params:
142
- order_id:
143
- type: string
144
- required: true
145
- returns: "Order details for {{order_id}}"
146
-
147
- # Evaluation criteria
148
- goals:
149
- - name: acknowledged_request
150
- description: "Agent acknowledged the refund request"
151
- check:
152
- type: contains
153
- value: "refund"
154
-
155
- - name: looked_up_order
156
- description: "Agent used the lookup tool"
157
- check:
158
- type: tool_called
159
- tool: lookup_order
160
-
161
- scoring:
176
+ actions:
177
+ call:
178
+ params:
179
+ order_id:
180
+ type: string
181
+ required: true
182
+ returns: "Order details for {{order_id}}"
183
+
184
+ evaluation:
162
185
  max_score: 100
186
+ goals:
187
+ - id: acknowledged_request
188
+ name: "Acknowledged Request"
189
+ description: "Agent acknowledged the refund request"
190
+ points: 50
191
+ detection:
192
+ type: agent_contains
193
+ patterns:
194
+ - "refund"
195
+
196
+ - id: looked_up_order
197
+ name: "Looked Up Order"
198
+ description: "Agent used the lookup tool"
199
+ points: 50
200
+ detection:
201
+ type: tool_called
202
+ tool: lookup_order
163
203
  ```
164
204
 
165
205
  ## CLI Reference
@@ -75,7 +75,37 @@ Opens a browser with a local UI for browsing scenarios, running them, and viewin
75
75
 
76
76
  ## Writing Scenarios
77
77
 
78
- Scenarios are YAML files that define agent interactions:
78
+ Scenarios are YAML files that define agent interactions. Sandboxy supports two modes:
79
+
80
+ ### Single-turn mode
81
+
82
+ Use `prompt:` for simple request/response scenarios without tool use:
83
+
84
+ ```yaml
85
+ id: simple-qa
86
+ name: "Simple Q&A"
87
+
88
+ system_prompt: |
89
+ You are a helpful assistant.
90
+
91
+ prompt: |
92
+ What is the capital of France?
93
+
94
+ evaluation:
95
+ max_score: 100
96
+ goals:
97
+ - id: correct_answer
98
+ name: "Correct Answer"
99
+ points: 100
100
+ detection:
101
+ type: agent_contains
102
+ patterns:
103
+ - "Paris"
104
+ ```
105
+
106
+ ### Agentic mode
107
+
108
+ Use `steps:` for multi-turn scenarios with tool support:
79
109
 
80
110
  ```yaml
81
111
  id: customer-support
@@ -86,35 +116,45 @@ system_prompt: |
86
116
  You are a customer support agent for TechCo.
87
117
  Be helpful but follow company policy.
88
118
 
89
- user_prompt: |
90
- I want a refund for my purchase. Order #12345.
119
+ steps:
120
+ - id: user_request
121
+ action: inject_user
122
+ params:
123
+ content: "I want a refund for my purchase. Order #12345."
124
+ - id: agent_response
125
+ action: await_agent
91
126
 
92
- # Define tools the agent can use
127
+ # Tools are only available in agentic mode (with steps)
93
128
  tools:
94
- - name: lookup_order
129
+ lookup_order:
95
130
  description: "Look up order details"
96
- params:
97
- order_id:
98
- type: string
99
- required: true
100
- returns: "Order details for {{order_id}}"
101
-
102
- # Evaluation criteria
103
- goals:
104
- - name: acknowledged_request
105
- description: "Agent acknowledged the refund request"
106
- check:
107
- type: contains
108
- value: "refund"
109
-
110
- - name: looked_up_order
111
- description: "Agent used the lookup tool"
112
- check:
113
- type: tool_called
114
- tool: lookup_order
115
-
116
- scoring:
131
+ actions:
132
+ call:
133
+ params:
134
+ order_id:
135
+ type: string
136
+ required: true
137
+ returns: "Order details for {{order_id}}"
138
+
139
+ evaluation:
117
140
  max_score: 100
141
+ goals:
142
+ - id: acknowledged_request
143
+ name: "Acknowledged Request"
144
+ description: "Agent acknowledged the refund request"
145
+ points: 50
146
+ detection:
147
+ type: agent_contains
148
+ patterns:
149
+ - "refund"
150
+
151
+ - id: looked_up_order
152
+ name: "Looked Up Order"
153
+ description: "Agent used the lookup tool"
154
+ points: 50
155
+ detection:
156
+ type: tool_called
157
+ tool: lookup_order
118
158
  ```
119
159
 
120
160
  ## CLI Reference
@@ -1,7 +1,17 @@
1
1
  import { useState, useRef, useEffect } from 'react'
2
- import { ChevronDown, Check, X, Search } from 'lucide-react'
2
+ import { ChevronDown, Check, X, Search, Monitor } from 'lucide-react'
3
3
  import { ModelInfo } from '../lib/api'
4
4
 
5
+ // Badge component for local models
6
+ function LocalBadge() {
7
+ return (
8
+ <span className="inline-flex items-center gap-1 px-1.5 py-0.5 bg-emerald-500/20 border border-emerald-500/40 rounded text-xs text-emerald-400">
9
+ <Monitor size={10} />
10
+ Local
11
+ </span>
12
+ )
13
+ }
14
+
5
15
  interface ModelSelectorProps {
6
16
  models: ModelInfo[]
7
17
  value: string
@@ -43,16 +53,31 @@ export function ModelSelector({ models, value, onChange, disabled, placeholder =
43
53
 
44
54
  // Group models by provider
45
55
  const groupedModels = filteredModels.reduce((acc, model) => {
46
- const provider = model.id.split('/')[0] || 'other'
56
+ // Use provider_name for local models, otherwise extract from id
57
+ const provider = model.provider_name || model.id.split('/')[0] || 'other'
47
58
  if (!acc[provider]) acc[provider] = []
48
59
  acc[provider].push(model)
49
60
  return acc
50
61
  }, {} as Record<string, ModelInfo[]>)
51
62
 
52
- const providerOrder = ['openai', 'anthropic', 'google', 'x-ai', 'deepseek', 'meta-llama', 'mistralai', 'qwen', 'perplexity']
63
+ // Local providers first, then cloud providers in preferred order
64
+ const cloudProviderOrder = ['openai', 'anthropic', 'google', 'x-ai', 'deepseek', 'meta-llama', 'mistralai', 'qwen', 'perplexity']
65
+
66
+ // Check if a provider group has local models
67
+ const isLocalProvider = (provider: string) => {
68
+ return groupedModels[provider]?.some(m => m.is_local)
69
+ }
70
+
53
71
  const sortedProviders = Object.keys(groupedModels).sort((a, b) => {
54
- const aIdx = providerOrder.indexOf(a)
55
- const bIdx = providerOrder.indexOf(b)
72
+ // Local providers always come first
73
+ const aIsLocal = isLocalProvider(a)
74
+ const bIsLocal = isLocalProvider(b)
75
+ if (aIsLocal && !bIsLocal) return -1
76
+ if (!aIsLocal && bIsLocal) return 1
77
+
78
+ // Within same category, sort by preference
79
+ const aIdx = cloudProviderOrder.indexOf(a)
80
+ const bIdx = cloudProviderOrder.indexOf(b)
56
81
  if (aIdx === -1 && bIdx === -1) return a.localeCompare(b)
57
82
  if (aIdx === -1) return 1
58
83
  if (bIdx === -1) return -1
@@ -70,9 +95,10 @@ export function ModelSelector({ models, value, onChange, disabled, placeholder =
70
95
  } ${open ? 'ring-2 ring-orange-400' : ''}`}
71
96
  >
72
97
  {selectedModel ? (
73
- <div className="flex items-center justify-between flex-1 min-w-0">
98
+ <div className="flex items-center justify-between flex-1 min-w-0 gap-2">
74
99
  <span className="text-slate-100 truncate">{selectedModel.name}</span>
75
- <span className="text-xs text-slate-500 ml-2 shrink-0">{selectedModel.price}</span>
100
+ {selectedModel.is_local && <LocalBadge />}
101
+ <span className="text-xs text-slate-500 shrink-0">{selectedModel.price}</span>
76
102
  </div>
77
103
  ) : (
78
104
  <span className="text-slate-500">{placeholder}</span>
@@ -101,8 +127,9 @@ export function ModelSelector({ models, value, onChange, disabled, placeholder =
101
127
  <div className="overflow-y-auto flex-1">
102
128
  {sortedProviders.map(provider => (
103
129
  <div key={provider}>
104
- <div className="px-3 py-1.5 text-xs font-medium text-slate-500 uppercase bg-slate-900 sticky top-0">
130
+ <div className="px-3 py-1.5 text-xs font-medium text-slate-500 uppercase bg-slate-900 sticky top-0 flex items-center gap-2">
105
131
  {provider}
132
+ {isLocalProvider(provider) && <LocalBadge />}
106
133
  </div>
107
134
  {groupedModels[provider].map(model => (
108
135
  <button
@@ -119,8 +146,9 @@ export function ModelSelector({ models, value, onChange, disabled, placeholder =
119
146
  : 'hover:bg-slate-800 text-slate-100'
120
147
  }`}
121
148
  >
122
- <div className="flex-1 min-w-0">
149
+ <div className="flex-1 min-w-0 flex items-center gap-2">
123
150
  <div className="truncate">{model.name}</div>
151
+ {model.is_local && !isLocalProvider(provider) && <LocalBadge />}
124
152
  </div>
125
153
  <span className="text-xs text-slate-500 shrink-0">{model.price}</span>
126
154
  {model.id === value && <Check size={16} className="text-orange-400 shrink-0" />}
@@ -188,18 +216,32 @@ export function MultiModelSelector({ models, selected, onChange, disabled }: Mul
188
216
  m.id.toLowerCase().includes(search.toLowerCase())
189
217
  )
190
218
 
191
- // Group models by provider
219
+ // Group models by provider (use provider_name for local models)
192
220
  const groupedModels = filteredModels.reduce((acc, model) => {
193
- const provider = model.id.split('/')[0] || 'other'
221
+ const provider = model.provider_name || model.id.split('/')[0] || 'other'
194
222
  if (!acc[provider]) acc[provider] = []
195
223
  acc[provider].push(model)
196
224
  return acc
197
225
  }, {} as Record<string, ModelInfo[]>)
198
226
 
199
- const providerOrder = ['openai', 'anthropic', 'google', 'x-ai', 'deepseek', 'meta-llama', 'mistralai', 'qwen', 'perplexity']
227
+ // Local providers first, then cloud providers in preferred order
228
+ const cloudProviderOrder = ['openai', 'anthropic', 'google', 'x-ai', 'deepseek', 'meta-llama', 'mistralai', 'qwen', 'perplexity']
229
+
230
+ // Check if a provider group has local models
231
+ const isLocalProvider = (provider: string) => {
232
+ return groupedModels[provider]?.some(m => m.is_local)
233
+ }
234
+
200
235
  const sortedProviders = Object.keys(groupedModels).sort((a, b) => {
201
- const aIdx = providerOrder.indexOf(a)
202
- const bIdx = providerOrder.indexOf(b)
236
+ // Local providers always come first
237
+ const aIsLocal = isLocalProvider(a)
238
+ const bIsLocal = isLocalProvider(b)
239
+ if (aIsLocal && !bIsLocal) return -1
240
+ if (!aIsLocal && bIsLocal) return 1
241
+
242
+ // Within same category, sort by preference
243
+ const aIdx = cloudProviderOrder.indexOf(a)
244
+ const bIdx = cloudProviderOrder.indexOf(b)
203
245
  if (aIdx === -1 && bIdx === -1) return a.localeCompare(b)
204
246
  if (aIdx === -1) return 1
205
247
  if (bIdx === -1) return -1
@@ -216,8 +258,13 @@ export function MultiModelSelector({ models, selected, onChange, disabled }: Mul
216
258
  return (
217
259
  <span
218
260
  key={modelId}
219
- className="flex items-center gap-1.5 px-2.5 py-1 bg-orange-500/20 border border-orange-400/40 rounded-full text-sm text-slate-100"
261
+ className={`flex items-center gap-1.5 px-2.5 py-1 rounded-full text-sm text-slate-100 ${
262
+ model?.is_local
263
+ ? 'bg-emerald-500/20 border border-emerald-400/40'
264
+ : 'bg-orange-500/20 border border-orange-400/40'
265
+ }`}
220
266
  >
267
+ {model?.is_local && <Monitor size={12} className="text-emerald-400" />}
221
268
  {model?.name || modelId}
222
269
  <button
223
270
  type="button"
@@ -268,8 +315,9 @@ export function MultiModelSelector({ models, selected, onChange, disabled }: Mul
268
315
  <div className="overflow-y-auto flex-1">
269
316
  {sortedProviders.map(provider => (
270
317
  <div key={provider}>
271
- <div className="px-3 py-1.5 text-xs font-medium text-slate-500 uppercase bg-slate-900 sticky top-0">
318
+ <div className="px-3 py-1.5 text-xs font-medium text-slate-500 uppercase bg-slate-900 sticky top-0 flex items-center gap-2">
272
319
  {provider}
320
+ {isLocalProvider(provider) && <LocalBadge />}
273
321
  </div>
274
322
  {groupedModels[provider].map(model => {
275
323
  const isSelected = selected.includes(model.id)
@@ -289,8 +337,9 @@ export function MultiModelSelector({ models, selected, onChange, disabled }: Mul
289
337
  }`}>
290
338
  {isSelected && <Check size={12} className="text-slate-900" />}
291
339
  </div>
292
- <div className="flex-1 min-w-0">
340
+ <div className="flex-1 min-w-0 flex items-center gap-2">
293
341
  <div className="truncate">{model.name}</div>
342
+ {model.is_local && !isLocalProvider(provider) && <LocalBadge />}
294
343
  </div>
295
344
  <span className="text-xs text-slate-500 shrink-0">{model.price}</span>
296
345
  </button>
@@ -44,6 +44,8 @@ export interface ModelInfo {
44
44
  id: string
45
45
  name: string
46
46
  price: string
47
+ is_local?: boolean
48
+ provider_name?: string
47
49
  }
48
50
 
49
51
  export interface RunScenarioRequest {
@@ -245,7 +247,7 @@ export interface RunDatasetResponse {
245
247
  }
246
248
 
247
249
  class ApiClient {
248
- private async fetch<T>(url: string, options?: RequestInit): Promise<T> {
250
+ protected async fetch<T>(url: string, options?: RequestInit): Promise<T> {
249
251
  const response = await fetch(`${API_BASE}${url}`, {
250
252
  ...options,
251
253
  headers: {
@@ -360,4 +362,87 @@ class ApiClient {
360
362
  }
361
363
  }
362
364
 
363
- export const api = new ApiClient()
365
+ // --- Provider Types ---
366
+
367
+ export interface ProviderSummary {
368
+ name: string
369
+ type: string
370
+ base_url: string
371
+ enabled: boolean
372
+ status: 'connected' | 'disconnected' | 'error' | 'unknown'
373
+ model_count: number
374
+ models: string[]
375
+ }
376
+
377
+ export interface ProviderListResponse {
378
+ providers: ProviderSummary[]
379
+ }
380
+
381
+ export interface LocalModelInfoResponse {
382
+ id: string
383
+ name: string
384
+ context_length: number
385
+ supports_tools: boolean
386
+ is_local: boolean
387
+ }
388
+
389
+ export interface ProviderDetailResponse {
390
+ config: Record<string, unknown>
391
+ status: {
392
+ status: string
393
+ last_checked: string | null
394
+ available_models: string[]
395
+ latency_ms: number | null
396
+ error_message: string | null
397
+ }
398
+ models: LocalModelInfoResponse[]
399
+ }
400
+
401
+ export interface AddProviderRequest {
402
+ name: string
403
+ type: 'ollama' | 'lmstudio' | 'vllm' | 'openai-compatible'
404
+ base_url: string
405
+ api_key?: string | null
406
+ models?: string[]
407
+ default_params?: Record<string, unknown>
408
+ }
409
+
410
+ export interface TestConnectionResponse {
411
+ success: boolean
412
+ latency_ms: number | null
413
+ models_found: string[]
414
+ error: string | null
415
+ }
416
+
417
+ // Extend ApiClient with provider methods
418
+ class ApiClientWithProviders extends ApiClient {
419
+ async listProviders(): Promise<ProviderSummary[]> {
420
+ const response = await this.fetch<ProviderListResponse>('/providers')
421
+ return response.providers
422
+ }
423
+
424
+ async addProvider(request: AddProviderRequest): Promise<ProviderSummary> {
425
+ return this.fetch<ProviderSummary>('/providers', {
426
+ method: 'POST',
427
+ body: JSON.stringify(request),
428
+ })
429
+ }
430
+
431
+ async getProvider(name: string): Promise<ProviderDetailResponse> {
432
+ return this.fetch<ProviderDetailResponse>(`/providers/${encodeURIComponent(name)}`)
433
+ }
434
+
435
+ async deleteProvider(name: string): Promise<void> {
436
+ await this.fetch<void>(`/providers/${encodeURIComponent(name)}`, {
437
+ method: 'DELETE',
438
+ })
439
+ }
440
+
441
+ async testProvider(name: string): Promise<TestConnectionResponse> {
442
+ return this.fetch<TestConnectionResponse>(`/providers/${encodeURIComponent(name)}/test`, {
443
+ method: 'POST',
444
+ })
445
+ }
446
+ }
447
+
448
+ export const api = new ApiClientWithProviders()