PyPI - sandboxy - Versions diffs - 0.0.1__tar.gz - Mend

sandboxy 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

sandboxy-0.0.1/.env.example +9 -0
sandboxy-0.0.1/.github/workflows/ci.yml +61 -0
sandboxy-0.0.1/.github/workflows/publish.yml +53 -0
sandboxy-0.0.1/.gitignore +357 -0
sandboxy-0.0.1/CONTRIBUTING.md +179 -0
sandboxy-0.0.1/LICENSE +201 -0
sandboxy-0.0.1/Makefile +60 -0
sandboxy-0.0.1/PKG-INFO +241 -0
sandboxy-0.0.1/README.md +198 -0
sandboxy-0.0.1/docs/yaml-tools.md +652 -0
sandboxy-0.0.1/local-ui/index.html +13 -0
sandboxy-0.0.1/local-ui/package-lock.json +2757 -0
sandboxy-0.0.1/local-ui/package.json +29 -0
sandboxy-0.0.1/local-ui/postcss.config.js +6 -0
sandboxy-0.0.1/local-ui/src/App.tsx +26 -0
sandboxy-0.0.1/local-ui/src/components/Layout.tsx +75 -0
sandboxy-0.0.1/local-ui/src/components/ModelSelector.tsx +311 -0
sandboxy-0.0.1/local-ui/src/components/ResultDisplay.tsx +651 -0
sandboxy-0.0.1/local-ui/src/hooks/useScenarioBuilder.ts +478 -0
sandboxy-0.0.1/local-ui/src/hooks/useScenarioRun.ts +95 -0
sandboxy-0.0.1/local-ui/src/hooks/useToolBuilder.ts +751 -0
sandboxy-0.0.1/local-ui/src/index.css +291 -0
sandboxy-0.0.1/local-ui/src/lib/api.ts +353 -0
sandboxy-0.0.1/local-ui/src/main.tsx +13 -0
sandboxy-0.0.1/local-ui/src/pages/BuilderPage.tsx +1170 -0
sandboxy-0.0.1/local-ui/src/pages/DashboardPage.tsx +163 -0
sandboxy-0.0.1/local-ui/src/pages/DatasetPage.tsx +1288 -0
sandboxy-0.0.1/local-ui/src/pages/ResultsPage.tsx +719 -0
sandboxy-0.0.1/local-ui/src/pages/RunPage.tsx +611 -0
sandboxy-0.0.1/local-ui/src/pages/ToolBuilderPage.tsx +1004 -0
sandboxy-0.0.1/local-ui/tailwind.config.js +12 -0
sandboxy-0.0.1/local-ui/tsconfig.json +25 -0
sandboxy-0.0.1/local-ui/tsconfig.node.json +10 -0
sandboxy-0.0.1/local-ui/vite.config.ts +26 -0
sandboxy-0.0.1/pyproject.toml +170 -0
sandboxy-0.0.1/sandboxy/__init__.py +3 -0
sandboxy-0.0.1/sandboxy/agents/__init__.py +21 -0
sandboxy-0.0.1/sandboxy/agents/base.py +66 -0
sandboxy-0.0.1/sandboxy/agents/llm_prompt.py +308 -0
sandboxy-0.0.1/sandboxy/agents/loader.py +222 -0
sandboxy-0.0.1/sandboxy/api/__init__.py +5 -0
sandboxy-0.0.1/sandboxy/api/app.py +76 -0
sandboxy-0.0.1/sandboxy/api/routes/__init__.py +1 -0
sandboxy-0.0.1/sandboxy/api/routes/agents.py +92 -0
sandboxy-0.0.1/sandboxy/api/routes/local.py +1388 -0
sandboxy-0.0.1/sandboxy/api/routes/tools.py +106 -0
sandboxy-0.0.1/sandboxy/cli/__init__.py +1 -0
sandboxy-0.0.1/sandboxy/cli/main.py +1196 -0
sandboxy-0.0.1/sandboxy/cli/type_detector.py +48 -0
sandboxy-0.0.1/sandboxy/config.py +49 -0
sandboxy-0.0.1/sandboxy/core/__init__.py +1 -0
sandboxy-0.0.1/sandboxy/core/async_runner.py +824 -0
sandboxy-0.0.1/sandboxy/core/mdl_parser.py +441 -0
sandboxy-0.0.1/sandboxy/core/runner.py +599 -0
sandboxy-0.0.1/sandboxy/core/safe_eval.py +165 -0
sandboxy-0.0.1/sandboxy/core/state.py +234 -0
sandboxy-0.0.1/sandboxy/datasets/__init__.py +20 -0
sandboxy-0.0.1/sandboxy/datasets/loader.py +193 -0
sandboxy-0.0.1/sandboxy/datasets/runner.py +442 -0
sandboxy-0.0.1/sandboxy/errors.py +166 -0
sandboxy-0.0.1/sandboxy/local/context.py +235 -0
sandboxy-0.0.1/sandboxy/local/results.py +173 -0
sandboxy-0.0.1/sandboxy/logging.py +31 -0
sandboxy-0.0.1/sandboxy/mcp/__init__.py +25 -0
sandboxy-0.0.1/sandboxy/mcp/client.py +360 -0
sandboxy-0.0.1/sandboxy/mcp/wrapper.py +99 -0
sandboxy-0.0.1/sandboxy/providers/__init__.py +34 -0
sandboxy-0.0.1/sandboxy/providers/anthropic_provider.py +271 -0
sandboxy-0.0.1/sandboxy/providers/base.py +123 -0
sandboxy-0.0.1/sandboxy/providers/http_client.py +101 -0
sandboxy-0.0.1/sandboxy/providers/openai_provider.py +282 -0
sandboxy-0.0.1/sandboxy/providers/openrouter.py +958 -0
sandboxy-0.0.1/sandboxy/providers/registry.py +199 -0
sandboxy-0.0.1/sandboxy/scenarios/__init__.py +11 -0
sandboxy-0.0.1/sandboxy/scenarios/comparison.py +491 -0
sandboxy-0.0.1/sandboxy/scenarios/loader.py +262 -0
sandboxy-0.0.1/sandboxy/scenarios/runner.py +468 -0
sandboxy-0.0.1/sandboxy/scenarios/unified.py +1434 -0
sandboxy-0.0.1/sandboxy/session/__init__.py +21 -0
sandboxy-0.0.1/sandboxy/session/manager.py +278 -0
sandboxy-0.0.1/sandboxy/tools/__init__.py +34 -0
sandboxy-0.0.1/sandboxy/tools/base.py +127 -0
sandboxy-0.0.1/sandboxy/tools/loader.py +270 -0
sandboxy-0.0.1/sandboxy/tools/yaml_tools.py +708 -0
sandboxy-0.0.1/sandboxy/ui/__init__.py +27 -0
sandboxy-0.0.1/sandboxy/ui/dist/assets/index-CgAkYWrJ.css +1 -0
sandboxy-0.0.1/sandboxy/ui/dist/assets/index-D4zoGFcr.js +347 -0
sandboxy-0.0.1/sandboxy/ui/dist/index.html +14 -0
sandboxy-0.0.1/sandboxy/utils/__init__.py +3 -0
sandboxy-0.0.1/sandboxy/utils/time.py +20 -0
sandboxy-0.0.1/scenarios/customer_service.yml +311 -0
sandboxy-0.0.1/tests/__init__.py +1 -0
sandboxy-0.0.1/tests/conftest.py +349 -0
sandboxy-0.0.1/tests/factories.py +712 -0
sandboxy-0.0.1/tests/integration/__init__.py +0 -0
sandboxy-0.0.1/tests/integration/api/__init__.py +0 -0
sandboxy-0.0.1/tests/mocks/__init__.py +0 -0
sandboxy-0.0.1/tests/mocks/providers.py +465 -0
sandboxy-0.0.1/tests/unit/__init__.py +0 -0
sandboxy-0.0.1/tests/unit/agents/__init__.py +0 -0
sandboxy-0.0.1/tests/unit/agents/test_base.py +181 -0
sandboxy-0.0.1/tests/unit/agents/test_llm_prompt.py +367 -0
sandboxy-0.0.1/tests/unit/agents/test_loader.py +221 -0
sandboxy-0.0.1/tests/unit/core/__init__.py +0 -0
sandboxy-0.0.1/tests/unit/core/test_async_runner.py +959 -0
sandboxy-0.0.1/tests/unit/core/test_mdl_parser.py +567 -0
sandboxy-0.0.1/tests/unit/core/test_runner.py +586 -0
sandboxy-0.0.1/tests/unit/core/test_safe_eval.py +355 -0
sandboxy-0.0.1/tests/unit/core/test_state.py +386 -0
sandboxy-0.0.1/tests/unit/providers/test_openrouter.py +279 -0
sandboxy-0.0.1/tests/unit/tools/__init__.py +0 -0
sandboxy-0.0.1/tests/unit/tools/test_base.py +183 -0
sandboxy-0.0.1/tests/unit/tools/test_loader.py +82 -0
sandboxy-0.0.1/uv.lock +1560 -0

sandboxy-0.0.1/.env.example ADDED Viewed

@@ -0,0 +1,9 @@
+# Sandboxy Environment Variables
+# Copy to .env and add your keys: cp .env.example .env
+# LLM Provider (at least one required)
+OPENROUTER_API_KEY=
+# Optional: Direct provider keys (lower latency)
+# OPENAI_API_KEY=
+# ANTHROPIC_API_KEY=

sandboxy-0.0.1/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,61 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+      - name: Set up Python
+        run: uv python install 3.12
+      - name: Install dependencies
+        run: uv sync --dev
+      - name: Ruff check
+        run: uv run ruff check sandboxy tests
+      - name: Ruff format check
+        run: uv run ruff format --check sandboxy tests
+  test:
+    name: Test (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+      - name: Set up Python ${{ matrix.python-version }}
+        run: uv python install ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: uv sync --dev
+      - name: Run tests
+        run: uv run pytest tests/ -v --cov=sandboxy --cov-report=xml
+      - name: Upload coverage
+        uses: codecov/codecov-action@v4
+        with:
+          file: ./coverage.xml
+          fail_ci_if_error: false

sandboxy-0.0.1/.github/workflows/publish.yml ADDED Viewed

@@ -0,0 +1,53 @@
+name: Publish to PyPI
+on:
+  push:
+    tags:
+      - "v*"
+jobs:
+  build:
+    name: Build Package
+    runs-on: ubuntu-latest
+    # Only run on main branch tags
+    if: github.ref_type == 'tag' && startsWith(github.ref_name, 'v')
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+      - name: Set up Python
+        run: uv python install 3.12
+      - name: Build package
+        run: uv build
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
+  publish:
+    name: Publish to PyPI
+    runs-on: ubuntu-latest
+    needs: build
+    environment:
+      name: pypi
+      url: https://pypi.org/project/sandboxy/
+    permissions:
+      id-token: write
+    steps:
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

sandboxy-0.0.1/.gitignore ADDED Viewed

@@ -0,0 +1,357 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+downloads/
+eggs/
+.eggs/
+/lib/
+/lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+# nyc test coverage
+.nyc_output
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+# Bower dependency directory (https://bower.io/)
+bower_components
+# node-waf configuration
+.lock-wscript
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+# Dependency directories
+node_modules/
+jspm_packages/
+# Snowpack dependency directory (https://snowpack.dev/)
+web_modules/
+# TypeScript cache
+*.tsbuildinfo
+# Optional npm cache directory
+.npm
+# Optional eslint cache
+.eslintcache
+# Optional stylelint cache
+.stylelintcache
+# Optional REPL history
+.node_repl_history
+# Output of 'npm pack'
+*.tgz
+# Yarn Integrity file
+.yarn-integrity
+# dotenv environment variable files
+.env
+.env.*
+!.env.example
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+.parcel-cache
+# Next.js build output
+.next
+out
+# Nuxt.js build / generate output
+.nuxt
+dist
+!sandboxy/ui/dist/
+.output
+# Gatsby files
+.cache/
+# Comment in the public line in if your project uses Gatsby and not Next.js
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# public
+# vuepress build output
+.vuepress/dist
+# vuepress v2.x temp and cache directory
+.temp
+.cache
+# Sveltekit cache directory
+.svelte-kit/
+# vitepress build output
+**/.vitepress/dist
+# vitepress cache directory
+**/.vitepress/cache
+# Docusaurus cache and generated files
+.docusaurus
+# Serverless directories
+.serverless/
+# FuseBox cache
+.fusebox/
+# DynamoDB Local files
+.dynamodb/
+# Firebase cache directory
+.firebase/
+# TernJS port file
+.tern-port
+# Stores VSCode versions used for testing VSCode extensions
+.vscode-test
+# yarn v3
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/sdks
+!.yarn/versions
+# Vite files
+vite.config.js.timestamp-*
+vite.config.ts.timestamp-*
+.vite/
+runs/*
+results.csv
+## Sandboxy Specific
+work/
+docs/*

sandboxy-0.0.1/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1,179 @@
+# Contributing to Sandboxy
+Thank you for your interest in contributing to Sandboxy! This document provides guidelines for contributing.
+## Ways to Contribute
+- **Bug Reports** - Found a bug? Open an issue with reproduction steps
+- **Feature Requests** - Have an idea? Open an issue to discuss
+- **Scenarios** - Create new test scenarios and submit a PR
+- **Tool Libraries** - Build YAML tool definitions for new use cases
+- **Documentation** - Improve docs, add examples, fix typos
+- **Code** - Fix bugs, add features, improve performance
+## Development Setup
+### Prerequisites
+- Python 3.11+
+- [uv](https://docs.astral.sh/uv/) (recommended) or pip
+### Setup
+```bash
+# Clone the repo
+git clone https://github.com/sandboxy-ai/sandboxy.git
+cd sandboxy
+# Install dependencies
+uv sync --dev
+# Set up environment
+cp .env.example .env
+# Add your OPENROUTER_API_KEY to .env
+```
+### Running Locally
+```bash
+# Start local dev server with UI
+sandboxy open
+# Or run scenarios directly
+sandboxy run scenarios/example.yml -m openai/gpt-4o
+```
+## Code Style
+### Python
+- Use [ruff](https://github.com/astral-sh/ruff) for linting and formatting
+- Follow PEP 8
+- Add type hints to all functions
+- Write docstrings for public APIs
+```bash
+# Format code
+ruff format .
+# Lint code
+ruff check .
+# Run tests
+pytest
+```
+## Creating Scenarios
+Scenarios live in `scenarios/` as YAML files. See existing scenarios for examples.
+### Scenario Guidelines
+1. **Clear description** - Explain what the scenario tests
+2. **Meaningful goals** - Include checks that measure agent performance
+3. **Good defaults** - Work out of the box without configuration
+4. **Documentation** - Include comments explaining complex parts
+### Example Scenario
+```yaml
+id: my-scenario
+name: "My Test Scenario"
+description: "Tests agent behavior in X situation"
+system_prompt: |
+  You are a helpful assistant.
+user_prompt: |
+  Help me with this task.
+goals:
+  - name: completed_task
+    description: "Agent completed the task"
+    check:
+      type: contains
+      value: "done"
+scoring:
+  max_score: 100
+```
+### Testing Your Scenario
+```bash
+# Run with a model
+sandboxy run scenarios/my_scenario.yml -m openai/gpt-4o
+# Compare models
+sandboxy run scenarios/my_scenario.yml -m openai/gpt-4o -m anthropic/claude-3.5-sonnet
+```
+## Creating Tool Libraries
+Tool libraries are YAML files that define tools agents can use. Place them in your project's `tools/` directory.
+### Tool Library Guidelines
+1. **Clear actions** - Each tool action should have a clear purpose
+2. **Good descriptions** - Help the agent understand what tools do
+3. **Sensible returns** - Return useful information
+4. **Side effects** - Use side_effects to update state
+### Example Tool Library
+```yaml
+name: mock_inventory
+description: "Inventory management tools"
+tools:
+  check_stock:
+    description: "Check stock level for an item"
+    params:
+      item_id:
+        type: string
+        required: true
+    returns: "Stock level for {{item_id}}: 50 units"
+  update_stock:
+    description: "Update stock level"
+    params:
+      item_id:
+        type: string
+        required: true
+      quantity:
+        type: number
+        required: true
+    returns: "Updated {{item_id}} to {{quantity}} units"
+    side_effects:
+      - set: "stock_{{item_id}}"
+        value: "{{quantity}}"
+```
+Use in scenarios with:
+```yaml
+tools_from:
+  - mock_inventory
+```
+## Pull Request Process
+1. **Fork** the repository
+2. **Create a branch** for your feature/fix
+3. **Make changes** following the code style guidelines
+4. **Write tests** if applicable
+5. **Update documentation** if needed
+6. **Submit a PR** with a clear description
+### PR Checklist
+- [ ] Code follows style guidelines
+- [ ] Tests pass locally
+- [ ] Documentation updated
+- [ ] No secrets or API keys committed
+## Questions?
+- Open an issue for questions
+Thank you for contributing!