sandboxy 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. sandboxy-0.0.1/.env.example +9 -0
  2. sandboxy-0.0.1/.github/workflows/ci.yml +61 -0
  3. sandboxy-0.0.1/.github/workflows/publish.yml +53 -0
  4. sandboxy-0.0.1/.gitignore +357 -0
  5. sandboxy-0.0.1/CONTRIBUTING.md +179 -0
  6. sandboxy-0.0.1/LICENSE +201 -0
  7. sandboxy-0.0.1/Makefile +60 -0
  8. sandboxy-0.0.1/PKG-INFO +241 -0
  9. sandboxy-0.0.1/README.md +198 -0
  10. sandboxy-0.0.1/docs/yaml-tools.md +652 -0
  11. sandboxy-0.0.1/local-ui/index.html +13 -0
  12. sandboxy-0.0.1/local-ui/package-lock.json +2757 -0
  13. sandboxy-0.0.1/local-ui/package.json +29 -0
  14. sandboxy-0.0.1/local-ui/postcss.config.js +6 -0
  15. sandboxy-0.0.1/local-ui/src/App.tsx +26 -0
  16. sandboxy-0.0.1/local-ui/src/components/Layout.tsx +75 -0
  17. sandboxy-0.0.1/local-ui/src/components/ModelSelector.tsx +311 -0
  18. sandboxy-0.0.1/local-ui/src/components/ResultDisplay.tsx +651 -0
  19. sandboxy-0.0.1/local-ui/src/hooks/useScenarioBuilder.ts +478 -0
  20. sandboxy-0.0.1/local-ui/src/hooks/useScenarioRun.ts +95 -0
  21. sandboxy-0.0.1/local-ui/src/hooks/useToolBuilder.ts +751 -0
  22. sandboxy-0.0.1/local-ui/src/index.css +291 -0
  23. sandboxy-0.0.1/local-ui/src/lib/api.ts +353 -0
  24. sandboxy-0.0.1/local-ui/src/main.tsx +13 -0
  25. sandboxy-0.0.1/local-ui/src/pages/BuilderPage.tsx +1170 -0
  26. sandboxy-0.0.1/local-ui/src/pages/DashboardPage.tsx +163 -0
  27. sandboxy-0.0.1/local-ui/src/pages/DatasetPage.tsx +1288 -0
  28. sandboxy-0.0.1/local-ui/src/pages/ResultsPage.tsx +719 -0
  29. sandboxy-0.0.1/local-ui/src/pages/RunPage.tsx +611 -0
  30. sandboxy-0.0.1/local-ui/src/pages/ToolBuilderPage.tsx +1004 -0
  31. sandboxy-0.0.1/local-ui/tailwind.config.js +12 -0
  32. sandboxy-0.0.1/local-ui/tsconfig.json +25 -0
  33. sandboxy-0.0.1/local-ui/tsconfig.node.json +10 -0
  34. sandboxy-0.0.1/local-ui/vite.config.ts +26 -0
  35. sandboxy-0.0.1/pyproject.toml +170 -0
  36. sandboxy-0.0.1/sandboxy/__init__.py +3 -0
  37. sandboxy-0.0.1/sandboxy/agents/__init__.py +21 -0
  38. sandboxy-0.0.1/sandboxy/agents/base.py +66 -0
  39. sandboxy-0.0.1/sandboxy/agents/llm_prompt.py +308 -0
  40. sandboxy-0.0.1/sandboxy/agents/loader.py +222 -0
  41. sandboxy-0.0.1/sandboxy/api/__init__.py +5 -0
  42. sandboxy-0.0.1/sandboxy/api/app.py +76 -0
  43. sandboxy-0.0.1/sandboxy/api/routes/__init__.py +1 -0
  44. sandboxy-0.0.1/sandboxy/api/routes/agents.py +92 -0
  45. sandboxy-0.0.1/sandboxy/api/routes/local.py +1388 -0
  46. sandboxy-0.0.1/sandboxy/api/routes/tools.py +106 -0
  47. sandboxy-0.0.1/sandboxy/cli/__init__.py +1 -0
  48. sandboxy-0.0.1/sandboxy/cli/main.py +1196 -0
  49. sandboxy-0.0.1/sandboxy/cli/type_detector.py +48 -0
  50. sandboxy-0.0.1/sandboxy/config.py +49 -0
  51. sandboxy-0.0.1/sandboxy/core/__init__.py +1 -0
  52. sandboxy-0.0.1/sandboxy/core/async_runner.py +824 -0
  53. sandboxy-0.0.1/sandboxy/core/mdl_parser.py +441 -0
  54. sandboxy-0.0.1/sandboxy/core/runner.py +599 -0
  55. sandboxy-0.0.1/sandboxy/core/safe_eval.py +165 -0
  56. sandboxy-0.0.1/sandboxy/core/state.py +234 -0
  57. sandboxy-0.0.1/sandboxy/datasets/__init__.py +20 -0
  58. sandboxy-0.0.1/sandboxy/datasets/loader.py +193 -0
  59. sandboxy-0.0.1/sandboxy/datasets/runner.py +442 -0
  60. sandboxy-0.0.1/sandboxy/errors.py +166 -0
  61. sandboxy-0.0.1/sandboxy/local/context.py +235 -0
  62. sandboxy-0.0.1/sandboxy/local/results.py +173 -0
  63. sandboxy-0.0.1/sandboxy/logging.py +31 -0
  64. sandboxy-0.0.1/sandboxy/mcp/__init__.py +25 -0
  65. sandboxy-0.0.1/sandboxy/mcp/client.py +360 -0
  66. sandboxy-0.0.1/sandboxy/mcp/wrapper.py +99 -0
  67. sandboxy-0.0.1/sandboxy/providers/__init__.py +34 -0
  68. sandboxy-0.0.1/sandboxy/providers/anthropic_provider.py +271 -0
  69. sandboxy-0.0.1/sandboxy/providers/base.py +123 -0
  70. sandboxy-0.0.1/sandboxy/providers/http_client.py +101 -0
  71. sandboxy-0.0.1/sandboxy/providers/openai_provider.py +282 -0
  72. sandboxy-0.0.1/sandboxy/providers/openrouter.py +958 -0
  73. sandboxy-0.0.1/sandboxy/providers/registry.py +199 -0
  74. sandboxy-0.0.1/sandboxy/scenarios/__init__.py +11 -0
  75. sandboxy-0.0.1/sandboxy/scenarios/comparison.py +491 -0
  76. sandboxy-0.0.1/sandboxy/scenarios/loader.py +262 -0
  77. sandboxy-0.0.1/sandboxy/scenarios/runner.py +468 -0
  78. sandboxy-0.0.1/sandboxy/scenarios/unified.py +1434 -0
  79. sandboxy-0.0.1/sandboxy/session/__init__.py +21 -0
  80. sandboxy-0.0.1/sandboxy/session/manager.py +278 -0
  81. sandboxy-0.0.1/sandboxy/tools/__init__.py +34 -0
  82. sandboxy-0.0.1/sandboxy/tools/base.py +127 -0
  83. sandboxy-0.0.1/sandboxy/tools/loader.py +270 -0
  84. sandboxy-0.0.1/sandboxy/tools/yaml_tools.py +708 -0
  85. sandboxy-0.0.1/sandboxy/ui/__init__.py +27 -0
  86. sandboxy-0.0.1/sandboxy/ui/dist/assets/index-CgAkYWrJ.css +1 -0
  87. sandboxy-0.0.1/sandboxy/ui/dist/assets/index-D4zoGFcr.js +347 -0
  88. sandboxy-0.0.1/sandboxy/ui/dist/index.html +14 -0
  89. sandboxy-0.0.1/sandboxy/utils/__init__.py +3 -0
  90. sandboxy-0.0.1/sandboxy/utils/time.py +20 -0
  91. sandboxy-0.0.1/scenarios/customer_service.yml +311 -0
  92. sandboxy-0.0.1/tests/__init__.py +1 -0
  93. sandboxy-0.0.1/tests/conftest.py +349 -0
  94. sandboxy-0.0.1/tests/factories.py +712 -0
  95. sandboxy-0.0.1/tests/integration/__init__.py +0 -0
  96. sandboxy-0.0.1/tests/integration/api/__init__.py +0 -0
  97. sandboxy-0.0.1/tests/mocks/__init__.py +0 -0
  98. sandboxy-0.0.1/tests/mocks/providers.py +465 -0
  99. sandboxy-0.0.1/tests/unit/__init__.py +0 -0
  100. sandboxy-0.0.1/tests/unit/agents/__init__.py +0 -0
  101. sandboxy-0.0.1/tests/unit/agents/test_base.py +181 -0
  102. sandboxy-0.0.1/tests/unit/agents/test_llm_prompt.py +367 -0
  103. sandboxy-0.0.1/tests/unit/agents/test_loader.py +221 -0
  104. sandboxy-0.0.1/tests/unit/core/__init__.py +0 -0
  105. sandboxy-0.0.1/tests/unit/core/test_async_runner.py +959 -0
  106. sandboxy-0.0.1/tests/unit/core/test_mdl_parser.py +567 -0
  107. sandboxy-0.0.1/tests/unit/core/test_runner.py +586 -0
  108. sandboxy-0.0.1/tests/unit/core/test_safe_eval.py +355 -0
  109. sandboxy-0.0.1/tests/unit/core/test_state.py +386 -0
  110. sandboxy-0.0.1/tests/unit/providers/test_openrouter.py +279 -0
  111. sandboxy-0.0.1/tests/unit/tools/__init__.py +0 -0
  112. sandboxy-0.0.1/tests/unit/tools/test_base.py +183 -0
  113. sandboxy-0.0.1/tests/unit/tools/test_loader.py +82 -0
  114. sandboxy-0.0.1/uv.lock +1560 -0
@@ -0,0 +1,9 @@
1
+ # Sandboxy Environment Variables
2
+ # Copy to .env and add your keys: cp .env.example .env
3
+
4
+ # LLM Provider (at least one required)
5
+ OPENROUTER_API_KEY=
6
+
7
+ # Optional: Direct provider keys (lower latency)
8
+ # OPENAI_API_KEY=
9
+ # ANTHROPIC_API_KEY=
@@ -0,0 +1,61 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ name: Lint
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - name: Install uv
17
+ uses: astral-sh/setup-uv@v4
18
+ with:
19
+ version: "latest"
20
+
21
+ - name: Set up Python
22
+ run: uv python install 3.12
23
+
24
+ - name: Install dependencies
25
+ run: uv sync --dev
26
+
27
+ - name: Ruff check
28
+ run: uv run ruff check sandboxy tests
29
+
30
+ - name: Ruff format check
31
+ run: uv run ruff format --check sandboxy tests
32
+
33
+ test:
34
+ name: Test (Python ${{ matrix.python-version }})
35
+ runs-on: ubuntu-latest
36
+ strategy:
37
+ matrix:
38
+ python-version: ["3.12", "3.13"]
39
+
40
+ steps:
41
+ - uses: actions/checkout@v4
42
+
43
+ - name: Install uv
44
+ uses: astral-sh/setup-uv@v4
45
+ with:
46
+ version: "latest"
47
+
48
+ - name: Set up Python ${{ matrix.python-version }}
49
+ run: uv python install ${{ matrix.python-version }}
50
+
51
+ - name: Install dependencies
52
+ run: uv sync --dev
53
+
54
+ - name: Run tests
55
+ run: uv run pytest tests/ -v --cov=sandboxy --cov-report=xml
56
+
57
+ - name: Upload coverage
58
+ uses: codecov/codecov-action@v4
59
+ with:
60
+ file: ./coverage.xml
61
+ fail_ci_if_error: false
@@ -0,0 +1,53 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ build:
10
+ name: Build Package
11
+ runs-on: ubuntu-latest
12
+ # Only run on main branch tags
13
+ if: github.ref_type == 'tag' && startsWith(github.ref_name, 'v')
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Install uv
19
+ uses: astral-sh/setup-uv@v4
20
+ with:
21
+ version: "latest"
22
+
23
+ - name: Set up Python
24
+ run: uv python install 3.12
25
+
26
+ - name: Build package
27
+ run: uv build
28
+
29
+ - name: Upload artifacts
30
+ uses: actions/upload-artifact@v4
31
+ with:
32
+ name: dist
33
+ path: dist/
34
+
35
+ publish:
36
+ name: Publish to PyPI
37
+ runs-on: ubuntu-latest
38
+ needs: build
39
+ environment:
40
+ name: pypi
41
+ url: https://pypi.org/project/sandboxy/
42
+ permissions:
43
+ id-token: write
44
+
45
+ steps:
46
+ - name: Download artifacts
47
+ uses: actions/download-artifact@v4
48
+ with:
49
+ name: dist
50
+ path: dist/
51
+
52
+ - name: Publish to PyPI
53
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,357 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ downloads/
14
+ eggs/
15
+ .eggs/
16
+ /lib/
17
+ /lib64/
18
+ parts/
19
+ sdist/
20
+ var/
21
+ wheels/
22
+ share/python-wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+ MANIFEST
27
+
28
+ # PyInstaller
29
+ # Usually these files are written by a python script from a template
30
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .nox/
42
+ .coverage
43
+ .coverage.*
44
+ .cache
45
+ nosetests.xml
46
+ coverage.xml
47
+ *.cover
48
+ *.py.cover
49
+ .hypothesis/
50
+ .pytest_cache/
51
+ cover/
52
+
53
+ # Translations
54
+ *.mo
55
+ *.pot
56
+
57
+ # Django stuff:
58
+ *.log
59
+ local_settings.py
60
+ db.sqlite3
61
+ db.sqlite3-journal
62
+
63
+ # Flask stuff:
64
+ instance/
65
+ .webassets-cache
66
+
67
+ # Scrapy stuff:
68
+ .scrapy
69
+
70
+ # Sphinx documentation
71
+ docs/_build/
72
+
73
+ # PyBuilder
74
+ .pybuilder/
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ # For a library or package, you might want to ignore these files since the code is
86
+ # intended to run in multiple environments; otherwise, check them in:
87
+ # .python-version
88
+
89
+ # pipenv
90
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
91
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
92
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
93
+ # install all needed dependencies.
94
+ #Pipfile.lock
95
+
96
+ # UV
97
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
98
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
99
+ # commonly ignored for libraries.
100
+ #uv.lock
101
+
102
+ # poetry
103
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
104
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
105
+ # commonly ignored for libraries.
106
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
107
+ #poetry.lock
108
+ #poetry.toml
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
113
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
114
+ #pdm.lock
115
+ #pdm.toml
116
+ .pdm-python
117
+ .pdm-build/
118
+
119
+ # pixi
120
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
121
+ #pixi.lock
122
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
123
+ # in the .venv directory. It is recommended not to include this directory in version control.
124
+ .pixi
125
+
126
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
127
+ __pypackages__/
128
+
129
+ # Celery stuff
130
+ celerybeat-schedule
131
+ celerybeat.pid
132
+
133
+ # SageMath parsed files
134
+ *.sage.py
135
+
136
+ # Environments
137
+ .env
138
+ .envrc
139
+ .venv
140
+ env/
141
+ venv/
142
+ ENV/
143
+ env.bak/
144
+ venv.bak/
145
+
146
+ # Spyder project settings
147
+ .spyderproject
148
+ .spyproject
149
+
150
+ # Rope project settings
151
+ .ropeproject
152
+
153
+ # mkdocs documentation
154
+ /site
155
+
156
+ # mypy
157
+ .mypy_cache/
158
+ .dmypy.json
159
+ dmypy.json
160
+
161
+ # Pyre type checker
162
+ .pyre/
163
+
164
+ # pytype static type analyzer
165
+ .pytype/
166
+
167
+ # Cython debug symbols
168
+ cython_debug/
169
+
170
+ # PyCharm
171
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
172
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
173
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
174
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
175
+ #.idea/
176
+
177
+ # Abstra
178
+ # Abstra is an AI-powered process automation framework.
179
+ # Ignore directories containing user credentials, local state, and settings.
180
+ # Learn more at https://abstra.io/docs
181
+ .abstra/
182
+
183
+ # Visual Studio Code
184
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
185
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
186
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
187
+ # you could uncomment the following to ignore the entire vscode folder
188
+ # .vscode/
189
+
190
+ # Ruff stuff:
191
+ .ruff_cache/
192
+
193
+ # PyPI configuration file
194
+ .pypirc
195
+
196
+ # Cursor
197
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
198
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
199
+ # refer to https://docs.cursor.com/context/ignore-files
200
+ .cursorignore
201
+ .cursorindexingignore
202
+
203
+ # Marimo
204
+ marimo/_static/
205
+ marimo/_lsp/
206
+ __marimo__/
207
+
208
+
209
+ # Logs
210
+ logs
211
+ *.log
212
+ npm-debug.log*
213
+ yarn-debug.log*
214
+ yarn-error.log*
215
+ lerna-debug.log*
216
+
217
+ # Diagnostic reports (https://nodejs.org/api/report.html)
218
+ report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
219
+
220
+ # Runtime data
221
+ pids
222
+ *.pid
223
+ *.seed
224
+ *.pid.lock
225
+
226
+ # Directory for instrumented libs generated by jscoverage/JSCover
227
+ lib-cov
228
+
229
+ # Coverage directory used by tools like istanbul
230
+ coverage
231
+ *.lcov
232
+
233
+ # nyc test coverage
234
+ .nyc_output
235
+
236
+ # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
237
+ .grunt
238
+
239
+ # Bower dependency directory (https://bower.io/)
240
+ bower_components
241
+
242
+ # node-waf configuration
243
+ .lock-wscript
244
+
245
+ # Compiled binary addons (https://nodejs.org/api/addons.html)
246
+ build/Release
247
+
248
+ # Dependency directories
249
+ node_modules/
250
+ jspm_packages/
251
+
252
+ # Snowpack dependency directory (https://snowpack.dev/)
253
+ web_modules/
254
+
255
+ # TypeScript cache
256
+ *.tsbuildinfo
257
+
258
+ # Optional npm cache directory
259
+ .npm
260
+
261
+ # Optional eslint cache
262
+ .eslintcache
263
+
264
+ # Optional stylelint cache
265
+ .stylelintcache
266
+
267
+ # Optional REPL history
268
+ .node_repl_history
269
+
270
+ # Output of 'npm pack'
271
+ *.tgz
272
+
273
+ # Yarn Integrity file
274
+ .yarn-integrity
275
+
276
+ # dotenv environment variable files
277
+ .env
278
+ .env.*
279
+ !.env.example
280
+
281
+ # parcel-bundler cache (https://parceljs.org/)
282
+ .cache
283
+ .parcel-cache
284
+
285
+ # Next.js build output
286
+ .next
287
+ out
288
+
289
+ # Nuxt.js build / generate output
290
+ .nuxt
291
+ dist
292
+ !sandboxy/ui/dist/
293
+ .output
294
+
295
+ # Gatsby files
296
+ .cache/
297
+ # Comment in the public line in if your project uses Gatsby and not Next.js
298
+ # https://nextjs.org/blog/next-9-1#public-directory-support
299
+ # public
300
+
301
+ # vuepress build output
302
+ .vuepress/dist
303
+
304
+ # vuepress v2.x temp and cache directory
305
+ .temp
306
+ .cache
307
+
308
+ # Sveltekit cache directory
309
+ .svelte-kit/
310
+
311
+ # vitepress build output
312
+ **/.vitepress/dist
313
+
314
+ # vitepress cache directory
315
+ **/.vitepress/cache
316
+
317
+ # Docusaurus cache and generated files
318
+ .docusaurus
319
+
320
+ # Serverless directories
321
+ .serverless/
322
+
323
+ # FuseBox cache
324
+ .fusebox/
325
+
326
+ # DynamoDB Local files
327
+ .dynamodb/
328
+
329
+ # Firebase cache directory
330
+ .firebase/
331
+
332
+ # TernJS port file
333
+ .tern-port
334
+
335
+ # Stores VSCode versions used for testing VSCode extensions
336
+ .vscode-test
337
+
338
+ # yarn v3
339
+ .pnp.*
340
+ .yarn/*
341
+ !.yarn/patches
342
+ !.yarn/plugins
343
+ !.yarn/releases
344
+ !.yarn/sdks
345
+ !.yarn/versions
346
+
347
+ # Vite files
348
+ vite.config.js.timestamp-*
349
+ vite.config.ts.timestamp-*
350
+ .vite/
351
+
352
+ runs/*
353
+ results.csv
354
+
355
+ ## Sandboxy Specific
356
+ work/
357
+ docs/*
@@ -0,0 +1,179 @@
1
+ # Contributing to Sandboxy
2
+
3
+ Thank you for your interest in contributing to Sandboxy! This document provides guidelines for contributing.
4
+
5
+ ## Ways to Contribute
6
+
7
+ - **Bug Reports** - Found a bug? Open an issue with reproduction steps
8
+ - **Feature Requests** - Have an idea? Open an issue to discuss
9
+ - **Scenarios** - Create new test scenarios and submit a PR
10
+ - **Tool Libraries** - Build YAML tool definitions for new use cases
11
+ - **Documentation** - Improve docs, add examples, fix typos
12
+ - **Code** - Fix bugs, add features, improve performance
13
+
14
+ ## Development Setup
15
+
16
+ ### Prerequisites
17
+
18
+ - Python 3.11+
19
+ - [uv](https://docs.astral.sh/uv/) (recommended) or pip
20
+
21
+ ### Setup
22
+
23
+ ```bash
24
+ # Clone the repo
25
+ git clone https://github.com/sandboxy-ai/sandboxy.git
26
+ cd sandboxy
27
+
28
+ # Install dependencies
29
+ uv sync --dev
30
+
31
+ # Set up environment
32
+ cp .env.example .env
33
+ # Add your OPENROUTER_API_KEY to .env
34
+ ```
35
+
36
+ ### Running Locally
37
+
38
+ ```bash
39
+ # Start local dev server with UI
40
+ sandboxy open
41
+
42
+ # Or run scenarios directly
43
+ sandboxy run scenarios/example.yml -m openai/gpt-4o
44
+ ```
45
+
46
+ ## Code Style
47
+
48
+ ### Python
49
+
50
+ - Use [ruff](https://github.com/astral-sh/ruff) for linting and formatting
51
+ - Follow PEP 8
52
+ - Add type hints to all functions
53
+ - Write docstrings for public APIs
54
+
55
+ ```bash
56
+ # Format code
57
+ ruff format .
58
+
59
+ # Lint code
60
+ ruff check .
61
+
62
+ # Run tests
63
+ pytest
64
+ ```
65
+
66
+ ## Creating Scenarios
67
+
68
+ Scenarios live in `scenarios/` as YAML files. See existing scenarios for examples.
69
+
70
+ ### Scenario Guidelines
71
+
72
+ 1. **Clear description** - Explain what the scenario tests
73
+ 2. **Meaningful goals** - Include checks that measure agent performance
74
+ 3. **Good defaults** - Work out of the box without configuration
75
+ 4. **Documentation** - Include comments explaining complex parts
76
+
77
+ ### Example Scenario
78
+
79
+ ```yaml
80
+ id: my-scenario
81
+ name: "My Test Scenario"
82
+ description: "Tests agent behavior in X situation"
83
+
84
+ system_prompt: |
85
+ You are a helpful assistant.
86
+
87
+ user_prompt: |
88
+ Help me with this task.
89
+
90
+ goals:
91
+ - name: completed_task
92
+ description: "Agent completed the task"
93
+ check:
94
+ type: contains
95
+ value: "done"
96
+
97
+ scoring:
98
+ max_score: 100
99
+ ```
100
+
101
+ ### Testing Your Scenario
102
+
103
+ ```bash
104
+ # Run with a model
105
+ sandboxy run scenarios/my_scenario.yml -m openai/gpt-4o
106
+
107
+ # Compare models
108
+ sandboxy run scenarios/my_scenario.yml -m openai/gpt-4o -m anthropic/claude-3.5-sonnet
109
+ ```
110
+
111
+ ## Creating Tool Libraries
112
+
113
+ Tool libraries are YAML files that define tools agents can use. Place them in your project's `tools/` directory.
114
+
115
+ ### Tool Library Guidelines
116
+
117
+ 1. **Clear actions** - Each tool action should have a clear purpose
118
+ 2. **Good descriptions** - Help the agent understand what tools do
119
+ 3. **Sensible returns** - Return useful information
120
+ 4. **Side effects** - Use side_effects to update state
121
+
122
+ ### Example Tool Library
123
+
124
+ ```yaml
125
+ name: mock_inventory
126
+ description: "Inventory management tools"
127
+
128
+ tools:
129
+ check_stock:
130
+ description: "Check stock level for an item"
131
+ params:
132
+ item_id:
133
+ type: string
134
+ required: true
135
+ returns: "Stock level for {{item_id}}: 50 units"
136
+
137
+ update_stock:
138
+ description: "Update stock level"
139
+ params:
140
+ item_id:
141
+ type: string
142
+ required: true
143
+ quantity:
144
+ type: number
145
+ required: true
146
+ returns: "Updated {{item_id}} to {{quantity}} units"
147
+ side_effects:
148
+ - set: "stock_{{item_id}}"
149
+ value: "{{quantity}}"
150
+ ```
151
+
152
+ Use in scenarios with:
153
+
154
+ ```yaml
155
+ tools_from:
156
+ - mock_inventory
157
+ ```
158
+
159
+ ## Pull Request Process
160
+
161
+ 1. **Fork** the repository
162
+ 2. **Create a branch** for your feature/fix
163
+ 3. **Make changes** following the code style guidelines
164
+ 4. **Write tests** if applicable
165
+ 5. **Update documentation** if needed
166
+ 6. **Submit a PR** with a clear description
167
+
168
+ ### PR Checklist
169
+
170
+ - [ ] Code follows style guidelines
171
+ - [ ] Tests pass locally
172
+ - [ ] Documentation updated
173
+ - [ ] No secrets or API keys committed
174
+
175
+ ## Questions?
176
+
177
+ - Open an issue for questions
178
+
179
+ Thank you for contributing!