arcade-e2b 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arcade_e2b-2.0.0/.gitignore +175 -0
- arcade_e2b-2.0.0/.pre-commit-config.yaml +18 -0
- arcade_e2b-2.0.0/.ruff.toml +47 -0
- arcade_e2b-2.0.0/LICENSE +21 -0
- arcade_e2b-2.0.0/Makefile +55 -0
- arcade_e2b-2.0.0/PKG-INFO +20 -0
- arcade_e2b-2.0.0/arcade_e2b/__init__.py +3 -0
- arcade_e2b-2.0.0/arcade_e2b/enums.py +10 -0
- arcade_e2b-2.0.0/arcade_e2b/tools/__init__.py +4 -0
- arcade_e2b-2.0.0/arcade_e2b/tools/create_chart.py +31 -0
- arcade_e2b-2.0.0/arcade_e2b/tools/run_code.py +27 -0
- arcade_e2b-2.0.0/evals/eval_e2b.py +120 -0
- arcade_e2b-2.0.0/pyproject.toml +57 -0
- arcade_e2b-2.0.0/tests/__init__.py +0 -0
- arcade_e2b-2.0.0/tests/test_e2b.py +82 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
.DS_Store
|
|
2
|
+
credentials.yaml
|
|
3
|
+
docker/credentials.yaml
|
|
4
|
+
|
|
5
|
+
*.lock
|
|
6
|
+
|
|
7
|
+
# example data
|
|
8
|
+
examples/data
|
|
9
|
+
scratch
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
docs/source
|
|
13
|
+
|
|
14
|
+
# From https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore
|
|
15
|
+
|
|
16
|
+
# Byte-compiled / optimized / DLL files
|
|
17
|
+
__pycache__/
|
|
18
|
+
*.py[cod]
|
|
19
|
+
*$py.class
|
|
20
|
+
|
|
21
|
+
# C extensions
|
|
22
|
+
*.so
|
|
23
|
+
|
|
24
|
+
# Distribution / packaging
|
|
25
|
+
.Python
|
|
26
|
+
build/
|
|
27
|
+
develop-eggs/
|
|
28
|
+
dist/
|
|
29
|
+
downloads/
|
|
30
|
+
eggs/
|
|
31
|
+
.eggs/
|
|
32
|
+
lib/
|
|
33
|
+
lib64/
|
|
34
|
+
parts/
|
|
35
|
+
sdist/
|
|
36
|
+
var/
|
|
37
|
+
wheels/
|
|
38
|
+
share/python-wheels/
|
|
39
|
+
*.egg-info/
|
|
40
|
+
.installed.cfg
|
|
41
|
+
*.egg
|
|
42
|
+
MANIFEST
|
|
43
|
+
|
|
44
|
+
# PyInstaller
|
|
45
|
+
# Usually these files are written by a python script from a template
|
|
46
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
47
|
+
*.manifest
|
|
48
|
+
*.spec
|
|
49
|
+
|
|
50
|
+
# Installer logs
|
|
51
|
+
pip-log.txt
|
|
52
|
+
pip-delete-this-directory.txt
|
|
53
|
+
|
|
54
|
+
# Unit test / coverage reports
|
|
55
|
+
htmlcov/
|
|
56
|
+
.tox/
|
|
57
|
+
.nox/
|
|
58
|
+
.coverage
|
|
59
|
+
.coverage.*
|
|
60
|
+
.cache
|
|
61
|
+
nosetests.xml
|
|
62
|
+
coverage.xml
|
|
63
|
+
*.cover
|
|
64
|
+
*.py,cover
|
|
65
|
+
.hypothesis/
|
|
66
|
+
.pytest_cache/
|
|
67
|
+
cover/
|
|
68
|
+
|
|
69
|
+
# Translations
|
|
70
|
+
*.mo
|
|
71
|
+
*.pot
|
|
72
|
+
|
|
73
|
+
# Django stuff:
|
|
74
|
+
*.log
|
|
75
|
+
local_settings.py
|
|
76
|
+
db.sqlite3
|
|
77
|
+
db.sqlite3-journal
|
|
78
|
+
|
|
79
|
+
# Flask stuff:
|
|
80
|
+
instance/
|
|
81
|
+
.webassets-cache
|
|
82
|
+
|
|
83
|
+
# Scrapy stuff:
|
|
84
|
+
.scrapy
|
|
85
|
+
|
|
86
|
+
# Sphinx documentation
|
|
87
|
+
docs/_build/
|
|
88
|
+
|
|
89
|
+
# PyBuilder
|
|
90
|
+
.pybuilder/
|
|
91
|
+
target/
|
|
92
|
+
|
|
93
|
+
# Jupyter Notebook
|
|
94
|
+
.ipynb_checkpoints
|
|
95
|
+
|
|
96
|
+
# IPython
|
|
97
|
+
profile_default/
|
|
98
|
+
ipython_config.py
|
|
99
|
+
|
|
100
|
+
# pyenv
|
|
101
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
102
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
103
|
+
# .python-version
|
|
104
|
+
|
|
105
|
+
# pipenv
|
|
106
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
107
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
108
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
109
|
+
# install all needed dependencies.
|
|
110
|
+
#Pipfile.lock
|
|
111
|
+
|
|
112
|
+
# poetry
|
|
113
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
114
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
115
|
+
# commonly ignored for libraries.
|
|
116
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
117
|
+
poetry.lock
|
|
118
|
+
|
|
119
|
+
# pdm
|
|
120
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
121
|
+
#pdm.lock
|
|
122
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
123
|
+
# in version control.
|
|
124
|
+
# https://pdm.fming.dev/#use-with-ide
|
|
125
|
+
.pdm.toml
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# SageMath parsed files
|
|
135
|
+
*.sage.py
|
|
136
|
+
|
|
137
|
+
# Environments
|
|
138
|
+
.env
|
|
139
|
+
.venv
|
|
140
|
+
env/
|
|
141
|
+
venv/
|
|
142
|
+
ENV/
|
|
143
|
+
env.bak/
|
|
144
|
+
venv.bak/
|
|
145
|
+
|
|
146
|
+
# Spyder project settings
|
|
147
|
+
.spyderproject
|
|
148
|
+
.spyproject
|
|
149
|
+
|
|
150
|
+
# Rope project settings
|
|
151
|
+
.ropeproject
|
|
152
|
+
|
|
153
|
+
# mkdocs documentation
|
|
154
|
+
/site
|
|
155
|
+
|
|
156
|
+
# mypy
|
|
157
|
+
.mypy_cache/
|
|
158
|
+
.dmypy.json
|
|
159
|
+
dmypy.json
|
|
160
|
+
|
|
161
|
+
# Pyre type checker
|
|
162
|
+
.pyre/
|
|
163
|
+
|
|
164
|
+
# pytype static type analyzer
|
|
165
|
+
.pytype/
|
|
166
|
+
|
|
167
|
+
# Cython debug symbols
|
|
168
|
+
cython_debug/
|
|
169
|
+
|
|
170
|
+
# PyCharm
|
|
171
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
172
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
173
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
174
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
175
|
+
#.idea/
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
files: ^.*/e2b/.*
|
|
2
|
+
repos:
|
|
3
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
4
|
+
rev: "v4.4.0"
|
|
5
|
+
hooks:
|
|
6
|
+
- id: check-case-conflict
|
|
7
|
+
- id: check-merge-conflict
|
|
8
|
+
- id: check-toml
|
|
9
|
+
- id: check-yaml
|
|
10
|
+
- id: end-of-file-fixer
|
|
11
|
+
- id: trailing-whitespace
|
|
12
|
+
|
|
13
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
14
|
+
rev: v0.6.7
|
|
15
|
+
hooks:
|
|
16
|
+
- id: ruff
|
|
17
|
+
args: [--fix]
|
|
18
|
+
- id: ruff-format
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
target-version = "py310"
|
|
2
|
+
line-length = 100
|
|
3
|
+
fix = true
|
|
4
|
+
|
|
5
|
+
[lint]
|
|
6
|
+
select = [
|
|
7
|
+
# flake8-2020
|
|
8
|
+
"YTT",
|
|
9
|
+
# flake8-bandit
|
|
10
|
+
"S",
|
|
11
|
+
# flake8-bugbear
|
|
12
|
+
"B",
|
|
13
|
+
# flake8-builtins
|
|
14
|
+
"A",
|
|
15
|
+
# flake8-comprehensions
|
|
16
|
+
"C4",
|
|
17
|
+
# flake8-debugger
|
|
18
|
+
"T10",
|
|
19
|
+
# flake8-simplify
|
|
20
|
+
"SIM",
|
|
21
|
+
# isort
|
|
22
|
+
"I",
|
|
23
|
+
# mccabe
|
|
24
|
+
"C90",
|
|
25
|
+
# pycodestyle
|
|
26
|
+
"E", "W",
|
|
27
|
+
# pyflakes
|
|
28
|
+
"F",
|
|
29
|
+
# pygrep-hooks
|
|
30
|
+
"PGH",
|
|
31
|
+
# pyupgrade
|
|
32
|
+
"UP",
|
|
33
|
+
# ruff
|
|
34
|
+
"RUF",
|
|
35
|
+
# tryceratops
|
|
36
|
+
"TRY",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[lint.per-file-ignores]
|
|
40
|
+
"*" = ["TRY003", "B904"]
|
|
41
|
+
"**/tests/*" = ["S101", "E501"]
|
|
42
|
+
"**/evals/*" = ["S101", "E501"]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
[format]
|
|
46
|
+
preview = true
|
|
47
|
+
skip-magic-trailing-comma = false
|
arcade_e2b-2.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025, Arcade AI
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
.PHONY: help
|
|
2
|
+
|
|
3
|
+
help:
|
|
4
|
+
@echo "🛠️ github Commands:\n"
|
|
5
|
+
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
|
|
6
|
+
|
|
7
|
+
.PHONY: install
|
|
8
|
+
install: ## Install the uv environment and install all packages with dependencies
|
|
9
|
+
@echo "🚀 Creating virtual environment and installing all packages using uv"
|
|
10
|
+
@uv sync --active --all-extras --no-sources
|
|
11
|
+
@if [ -f .pre-commit-config.yaml ]; then uv run --no-sources pre-commit install; fi
|
|
12
|
+
@echo "✅ All packages and dependencies installed via uv"
|
|
13
|
+
|
|
14
|
+
.PHONY: install-local
|
|
15
|
+
install-local: ## Install the uv environment and install all packages with dependencies with local Arcade sources
|
|
16
|
+
@echo "🚀 Creating virtual environment and installing all packages using uv"
|
|
17
|
+
@uv sync --active --all-extras
|
|
18
|
+
@if [ -f .pre-commit-config.yaml ]; then uv run pre-commit install; fi
|
|
19
|
+
@echo "✅ All packages and dependencies installed via uv"
|
|
20
|
+
|
|
21
|
+
.PHONY: build
|
|
22
|
+
build: clean-build ## Build wheel file using poetry
|
|
23
|
+
@echo "🚀 Creating wheel file"
|
|
24
|
+
uv build
|
|
25
|
+
|
|
26
|
+
.PHONY: clean-build
|
|
27
|
+
clean-build: ## clean build artifacts
|
|
28
|
+
@echo "🗑️ Cleaning dist directory"
|
|
29
|
+
rm -rf dist
|
|
30
|
+
|
|
31
|
+
.PHONY: test
|
|
32
|
+
test: ## Test the code with pytest
|
|
33
|
+
@echo "🚀 Testing code: Running pytest"
|
|
34
|
+
@uv run --no-sources pytest -W ignore -v --cov --cov-config=pyproject.toml --cov-report=xml
|
|
35
|
+
|
|
36
|
+
.PHONY: coverage
|
|
37
|
+
coverage: ## Generate coverage report
|
|
38
|
+
@echo "coverage report"
|
|
39
|
+
@uv run --no-sources coverage report
|
|
40
|
+
@echo "Generating coverage report"
|
|
41
|
+
@uv run --no-sources coverage html
|
|
42
|
+
|
|
43
|
+
.PHONY: bump-version
|
|
44
|
+
bump-version: ## Bump the version in the pyproject.toml file by a patch version
|
|
45
|
+
@echo "🚀 Bumping version in pyproject.toml"
|
|
46
|
+
uv version --no-sources --bump patch
|
|
47
|
+
|
|
48
|
+
.PHONY: check
|
|
49
|
+
check: ## Run code quality tools.
|
|
50
|
+
@if [ -f .pre-commit-config.yaml ]; then\
|
|
51
|
+
echo "🚀 Linting code: Running pre-commit";\
|
|
52
|
+
uv run --no-sources pre-commit run -a;\
|
|
53
|
+
fi
|
|
54
|
+
@echo "🚀 Static type checking: Running mypy"
|
|
55
|
+
@uv run --no-sources mypy --config-file=pyproject.toml
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: arcade_e2b
|
|
3
|
+
Version: 2.0.0
|
|
4
|
+
Summary: Arcade.dev LLM tools for running code in a sandbox using E2B
|
|
5
|
+
Author-email: Arcade <dev@arcade.dev>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Requires-Dist: arcade-tdk<3.0.0,>=2.0.0
|
|
9
|
+
Requires-Dist: e2b-code-interpreter<2.0.0,>=1.0.1
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: arcade-ai[evals]<3.0.0,>=2.0.0; extra == 'dev'
|
|
12
|
+
Requires-Dist: arcade-serve<3.0.0,>=2.0.0; extra == 'dev'
|
|
13
|
+
Requires-Dist: mypy<1.6.0,>=1.5.1; extra == 'dev'
|
|
14
|
+
Requires-Dist: pre-commit<3.5.0,>=3.4.0; extra == 'dev'
|
|
15
|
+
Requires-Dist: pytest-asyncio<0.25.0,>=0.24.0; extra == 'dev'
|
|
16
|
+
Requires-Dist: pytest-cov<4.1.0,>=4.0.0; extra == 'dev'
|
|
17
|
+
Requires-Dist: pytest-mock<3.12.0,>=3.11.1; extra == 'dev'
|
|
18
|
+
Requires-Dist: pytest<8.4.0,>=8.3.0; extra == 'dev'
|
|
19
|
+
Requires-Dist: ruff<0.8.0,>=0.7.4; extra == 'dev'
|
|
20
|
+
Requires-Dist: tox<4.12.0,>=4.11.1; extra == 'dev'
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
from arcade_tdk import ToolContext, tool
|
|
4
|
+
from e2b_code_interpreter import Sandbox
|
|
5
|
+
|
|
6
|
+
# See https://e2b.dev/docs to learn more about E2B
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# Note: Not recommended to use tool_choice='generate' with this tool
|
|
10
|
+
# since it contains base64 encoded image.
|
|
11
|
+
@tool(requires_secrets=["E2B_API_KEY"])
|
|
12
|
+
def create_static_matplotlib_chart(
|
|
13
|
+
context: ToolContext,
|
|
14
|
+
code: Annotated[str, "The Python code to run"],
|
|
15
|
+
) -> Annotated[dict, "A dictionary with the following keys: base64_image, logs, error"]:
|
|
16
|
+
"""
|
|
17
|
+
Run the provided Python code to generate a static matplotlib chart.
|
|
18
|
+
The resulting chart is returned as a base64 encoded image.
|
|
19
|
+
"""
|
|
20
|
+
api_key = context.get_secret("E2B_API_KEY")
|
|
21
|
+
|
|
22
|
+
with Sandbox(api_key=api_key) as sbx:
|
|
23
|
+
execution = sbx.run_code(code=code)
|
|
24
|
+
|
|
25
|
+
result = {
|
|
26
|
+
"base64_image": execution.results[0].png if execution.results else None,
|
|
27
|
+
"logs": execution.logs.to_json(),
|
|
28
|
+
"error": execution.error.to_json() if execution.error else None,
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return result
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
from arcade_tdk import ToolContext, tool
|
|
4
|
+
from e2b_code_interpreter import Sandbox
|
|
5
|
+
|
|
6
|
+
from arcade_e2b.enums import E2BSupportedLanguage
|
|
7
|
+
|
|
8
|
+
# See https://e2b.dev/docs to learn more about E2B
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@tool(requires_secrets=["E2B_API_KEY"])
|
|
12
|
+
def run_code(
|
|
13
|
+
context: ToolContext,
|
|
14
|
+
code: Annotated[str, "The code to run"],
|
|
15
|
+
language: Annotated[
|
|
16
|
+
E2BSupportedLanguage, "The language of the code"
|
|
17
|
+
] = E2BSupportedLanguage.PYTHON,
|
|
18
|
+
) -> Annotated[str, "The sandbox execution as a JSON string"]:
|
|
19
|
+
"""
|
|
20
|
+
Run code in a sandbox and return the output.
|
|
21
|
+
"""
|
|
22
|
+
api_key = context.get_secret("E2B_API_KEY")
|
|
23
|
+
|
|
24
|
+
with Sandbox(api_key=api_key) as sbx:
|
|
25
|
+
execution = sbx.run_code(code=code, language=language)
|
|
26
|
+
|
|
27
|
+
return str(execution.to_json())
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
from arcade_evals import (
|
|
2
|
+
BinaryCritic,
|
|
3
|
+
EvalRubric,
|
|
4
|
+
EvalSuite,
|
|
5
|
+
ExpectedToolCall,
|
|
6
|
+
SimilarityCritic,
|
|
7
|
+
tool_eval,
|
|
8
|
+
)
|
|
9
|
+
from arcade_tdk import ToolCatalog
|
|
10
|
+
|
|
11
|
+
import arcade_e2b
|
|
12
|
+
from arcade_e2b.enums import E2BSupportedLanguage
|
|
13
|
+
from arcade_e2b.tools.create_chart import create_static_matplotlib_chart
|
|
14
|
+
from arcade_e2b.tools.run_code import run_code
|
|
15
|
+
|
|
16
|
+
merge_sort_code = """
|
|
17
|
+
def merge_sort(arr):
|
|
18
|
+
if len(arr) <= 1:
|
|
19
|
+
return arr
|
|
20
|
+
|
|
21
|
+
mid = len(arr) // 2
|
|
22
|
+
left = merge_sort(arr[:mid])
|
|
23
|
+
right = merge_sort(arr[mid:])
|
|
24
|
+
|
|
25
|
+
return merge(left, right)
|
|
26
|
+
|
|
27
|
+
def merge(left, right):
|
|
28
|
+
result = []
|
|
29
|
+
i, j = 0, 0
|
|
30
|
+
|
|
31
|
+
while i < len(left) and j < len(right):
|
|
32
|
+
if left[i] < right[j]:
|
|
33
|
+
result.append(left[i])
|
|
34
|
+
i += 1
|
|
35
|
+
else:
|
|
36
|
+
result.append(right[j])
|
|
37
|
+
j += 1
|
|
38
|
+
|
|
39
|
+
result.extend(left[i:])
|
|
40
|
+
result.extend(right[j:])
|
|
41
|
+
|
|
42
|
+
return result
|
|
43
|
+
|
|
44
|
+
sample_list = ["banana", "apple", "cherry", "date", "elderberry"]
|
|
45
|
+
|
|
46
|
+
sorted_list = merge_sort(sample_list)
|
|
47
|
+
print("Sorted list:", sorted_list)
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
matplotlib_chart_code = """
|
|
51
|
+
import matplotlib.pyplot as plt
|
|
52
|
+
|
|
53
|
+
labels = ['Apples', 'Bananas', 'Cherries', 'Dates']
|
|
54
|
+
sizes = [30, 25, 20, 25]
|
|
55
|
+
colors = ['red', 'yellow', 'purple', 'brown']
|
|
56
|
+
|
|
57
|
+
plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
|
|
58
|
+
|
|
59
|
+
plt.axis('equal')
|
|
60
|
+
|
|
61
|
+
plt.title('Fruit Distribution')
|
|
62
|
+
|
|
63
|
+
plt.savefig('fruit_pie_chart.png')
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
# Evaluation rubric
|
|
67
|
+
rubric = EvalRubric(
|
|
68
|
+
fail_threshold=0.85,
|
|
69
|
+
warn_threshold=0.95,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
catalog = ToolCatalog()
|
|
74
|
+
catalog.add_module(arcade_e2b)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@tool_eval()
|
|
78
|
+
def e2b_eval_suite():
|
|
79
|
+
suite = EvalSuite(
|
|
80
|
+
name="E2B Tools Evaluation",
|
|
81
|
+
system_message="You are an AI assistant with access to E2B tools. Use them to help the user with their tasks.",
|
|
82
|
+
catalog=catalog,
|
|
83
|
+
rubric=rubric,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
suite.add_case(
|
|
87
|
+
name="Run code",
|
|
88
|
+
user_message=f"Can you please run my merge sort algo?\n\n{merge_sort_code}",
|
|
89
|
+
expected_tool_calls=[
|
|
90
|
+
ExpectedToolCall(
|
|
91
|
+
func=run_code,
|
|
92
|
+
args={
|
|
93
|
+
"code": merge_sort_code,
|
|
94
|
+
"language": E2BSupportedLanguage.PYTHON,
|
|
95
|
+
},
|
|
96
|
+
)
|
|
97
|
+
],
|
|
98
|
+
critics=[
|
|
99
|
+
SimilarityCritic(critic_field="code", weight=0.8),
|
|
100
|
+
BinaryCritic(critic_field="language", weight=0.2),
|
|
101
|
+
],
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
suite.add_case(
|
|
105
|
+
name="Create static matplotlib chart",
|
|
106
|
+
user_message=f"Run this code:\n\n{matplotlib_chart_code}",
|
|
107
|
+
expected_tool_calls=[
|
|
108
|
+
ExpectedToolCall(
|
|
109
|
+
func=create_static_matplotlib_chart,
|
|
110
|
+
args={
|
|
111
|
+
"code": matplotlib_chart_code,
|
|
112
|
+
},
|
|
113
|
+
)
|
|
114
|
+
],
|
|
115
|
+
critics=[
|
|
116
|
+
SimilarityCritic(critic_field="code", weight=1.0),
|
|
117
|
+
],
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
return suite
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = [ "hatchling",]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "arcade_e2b"
|
|
7
|
+
version = "2.0.0"
|
|
8
|
+
description = "Arcade.dev LLM tools for running code in a sandbox using E2B"
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"arcade-tdk>=2.0.0,<3.0.0",
|
|
12
|
+
"e2b-code-interpreter>=1.0.1,<2.0.0",
|
|
13
|
+
]
|
|
14
|
+
[[project.authors]]
|
|
15
|
+
name = "Arcade"
|
|
16
|
+
email = "dev@arcade.dev"
|
|
17
|
+
|
|
18
|
+
[project.optional-dependencies]
|
|
19
|
+
dev = [
|
|
20
|
+
"arcade-ai[evals]>=2.0.0,<3.0.0",
|
|
21
|
+
"arcade-serve>=2.0.0,<3.0.0",
|
|
22
|
+
"pytest>=8.3.0,<8.4.0",
|
|
23
|
+
"pytest-cov>=4.0.0,<4.1.0",
|
|
24
|
+
"pytest-asyncio>=0.24.0,<0.25.0",
|
|
25
|
+
"pytest-mock>=3.11.1,<3.12.0",
|
|
26
|
+
"mypy>=1.5.1,<1.6.0",
|
|
27
|
+
"pre-commit>=3.4.0,<3.5.0",
|
|
28
|
+
"tox>=4.11.1,<4.12.0",
|
|
29
|
+
"ruff>=0.7.4,<0.8.0",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
# Use local path sources for arcade libs when working locally
|
|
33
|
+
[tool.uv.sources]
|
|
34
|
+
arcade-ai = {path = "../../", editable = true}
|
|
35
|
+
arcade-tdk = { path = "../../libs/arcade-tdk/", editable = true }
|
|
36
|
+
arcade-serve = { path = "../../libs/arcade-serve/", editable = true }
|
|
37
|
+
|
|
38
|
+
[tool.mypy]
|
|
39
|
+
files = [ "arcade_e2b/**/*.py",]
|
|
40
|
+
python_version = "3.10"
|
|
41
|
+
disallow_untyped_defs = "True"
|
|
42
|
+
disallow_any_unimported = "True"
|
|
43
|
+
no_implicit_optional = "True"
|
|
44
|
+
check_untyped_defs = "True"
|
|
45
|
+
warn_return_any = "True"
|
|
46
|
+
warn_unused_ignores = "True"
|
|
47
|
+
show_error_codes = "True"
|
|
48
|
+
ignore_missing_imports = "True"
|
|
49
|
+
|
|
50
|
+
[tool.pytest.ini_options]
|
|
51
|
+
testpaths = [ "tests",]
|
|
52
|
+
|
|
53
|
+
[tool.coverage.report]
|
|
54
|
+
skip_empty = true
|
|
55
|
+
|
|
56
|
+
[tool.hatch.build.targets.wheel]
|
|
57
|
+
packages = [ "arcade_e2b",]
|
|
File without changes
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from unittest.mock import MagicMock, patch
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from arcade_tdk import ToolContext, ToolSecretItem
|
|
5
|
+
from arcade_tdk.errors import ToolExecutionError
|
|
6
|
+
|
|
7
|
+
import arcade_e2b.tools.create_chart
|
|
8
|
+
import arcade_e2b.tools.run_code
|
|
9
|
+
from arcade_e2b.enums import E2BSupportedLanguage
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@pytest.fixture
|
|
13
|
+
def mock_run_code_sandbox():
|
|
14
|
+
with patch("arcade_e2b.tools.run_code.Sandbox") as mock:
|
|
15
|
+
yield mock.return_value.__enter__.return_value
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@pytest.fixture
|
|
19
|
+
def mock_create_chart_sandbox():
|
|
20
|
+
with patch("arcade_e2b.tools.create_chart.Sandbox") as mock:
|
|
21
|
+
yield mock.return_value.__enter__.return_value
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@pytest.fixture
|
|
25
|
+
def mock_context():
|
|
26
|
+
return ToolContext(secrets=[ToolSecretItem(key="e2b_api_key", value="fake_api_key")])
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_run_code_success(mock_run_code_sandbox, mock_context):
|
|
30
|
+
mock_execution = MagicMock()
|
|
31
|
+
mock_execution.to_json.return_value = '{"result": "success"}'
|
|
32
|
+
mock_run_code_sandbox.run_code.return_value = mock_execution
|
|
33
|
+
|
|
34
|
+
result = arcade_e2b.tools.run_code(
|
|
35
|
+
mock_context, "print('Hello, World!')", E2BSupportedLanguage.PYTHON
|
|
36
|
+
)
|
|
37
|
+
assert result == '{"result": "success"}'
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_run_code_error(mock_run_code_sandbox, mock_context):
|
|
41
|
+
mock_execution = MagicMock()
|
|
42
|
+
mock_execution.to_json.side_effect = ToolExecutionError("Execution failed")
|
|
43
|
+
mock_run_code_sandbox.run_code.return_value = mock_execution
|
|
44
|
+
|
|
45
|
+
with pytest.raises(ToolExecutionError, match="Execution failed"):
|
|
46
|
+
arcade_e2b.tools.run_code(
|
|
47
|
+
mock_context, "print('Hello, World!')", E2BSupportedLanguage.PYTHON
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_create_static_matplotlib_chart_success(mock_create_chart_sandbox, mock_context):
|
|
52
|
+
mock_execution = MagicMock()
|
|
53
|
+
mock_execution.results = [MagicMock(png="base64encodedimage")]
|
|
54
|
+
mock_execution.logs.to_json.return_value = '{"logs": "log data"}'
|
|
55
|
+
mock_execution.error = None
|
|
56
|
+
mock_create_chart_sandbox.run_code.return_value = mock_execution
|
|
57
|
+
|
|
58
|
+
result = arcade_e2b.tools.create_chart.create_static_matplotlib_chart(
|
|
59
|
+
mock_context, "import matplotlib.pyplot as plt"
|
|
60
|
+
)
|
|
61
|
+
assert result == {
|
|
62
|
+
"base64_image": "base64encodedimage",
|
|
63
|
+
"logs": '{"logs": "log data"}',
|
|
64
|
+
"error": None,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_create_static_matplotlib_chart_error(mock_create_chart_sandbox, mock_context):
|
|
69
|
+
mock_execution = MagicMock()
|
|
70
|
+
mock_execution.results = []
|
|
71
|
+
mock_execution.logs.to_json.return_value = '{"logs": "log data"}'
|
|
72
|
+
mock_execution.error.to_json.return_value = '{"error": "some error"}'
|
|
73
|
+
mock_create_chart_sandbox.run_code.return_value = mock_execution
|
|
74
|
+
|
|
75
|
+
result = arcade_e2b.tools.create_chart.create_static_matplotlib_chart(
|
|
76
|
+
mock_context, "import matplotlib.pyplot as plt"
|
|
77
|
+
)
|
|
78
|
+
assert result == {
|
|
79
|
+
"base64_image": None,
|
|
80
|
+
"logs": '{"logs": "log data"}',
|
|
81
|
+
"error": '{"error": "some error"}',
|
|
82
|
+
}
|