aiqtoolkit 1.2.0a20250707__py3-none-any.whl → 1.2.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (197) hide show
  1. aiq/agent/base.py +170 -8
  2. aiq/agent/dual_node.py +1 -1
  3. aiq/agent/react_agent/agent.py +112 -111
  4. aiq/agent/react_agent/register.py +31 -14
  5. aiq/agent/rewoo_agent/agent.py +36 -35
  6. aiq/agent/rewoo_agent/register.py +2 -2
  7. aiq/agent/tool_calling_agent/agent.py +3 -7
  8. aiq/authentication/__init__.py +14 -0
  9. aiq/authentication/api_key/__init__.py +14 -0
  10. aiq/authentication/api_key/api_key_auth_provider.py +92 -0
  11. aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
  12. aiq/authentication/api_key/register.py +26 -0
  13. aiq/authentication/exceptions/__init__.py +14 -0
  14. aiq/authentication/exceptions/api_key_exceptions.py +38 -0
  15. aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
  16. aiq/authentication/exceptions/call_back_exceptions.py +38 -0
  17. aiq/authentication/exceptions/request_exceptions.py +54 -0
  18. aiq/authentication/http_basic_auth/__init__.py +0 -0
  19. aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
  20. aiq/authentication/http_basic_auth/register.py +30 -0
  21. aiq/authentication/interfaces.py +93 -0
  22. aiq/authentication/oauth2/__init__.py +14 -0
  23. aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
  24. aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
  25. aiq/authentication/oauth2/register.py +25 -0
  26. aiq/authentication/register.py +21 -0
  27. aiq/builder/builder.py +64 -2
  28. aiq/builder/component_utils.py +16 -3
  29. aiq/builder/context.py +26 -0
  30. aiq/builder/eval_builder.py +43 -2
  31. aiq/builder/function.py +32 -4
  32. aiq/builder/function_base.py +1 -1
  33. aiq/builder/intermediate_step_manager.py +6 -8
  34. aiq/builder/user_interaction_manager.py +3 -0
  35. aiq/builder/workflow.py +23 -18
  36. aiq/builder/workflow_builder.py +420 -73
  37. aiq/cli/commands/info/list_mcp.py +103 -16
  38. aiq/cli/commands/sizing/__init__.py +14 -0
  39. aiq/cli/commands/sizing/calc.py +294 -0
  40. aiq/cli/commands/sizing/sizing.py +27 -0
  41. aiq/cli/commands/start.py +1 -0
  42. aiq/cli/entrypoint.py +2 -0
  43. aiq/cli/register_workflow.py +80 -0
  44. aiq/cli/type_registry.py +151 -30
  45. aiq/data_models/api_server.py +123 -11
  46. aiq/data_models/authentication.py +231 -0
  47. aiq/data_models/common.py +35 -7
  48. aiq/data_models/component.py +17 -9
  49. aiq/data_models/component_ref.py +33 -0
  50. aiq/data_models/config.py +60 -3
  51. aiq/data_models/embedder.py +1 -0
  52. aiq/data_models/function_dependencies.py +8 -0
  53. aiq/data_models/interactive.py +10 -1
  54. aiq/data_models/intermediate_step.py +15 -5
  55. aiq/data_models/its_strategy.py +30 -0
  56. aiq/data_models/llm.py +1 -0
  57. aiq/data_models/memory.py +1 -0
  58. aiq/data_models/object_store.py +44 -0
  59. aiq/data_models/retry_mixin.py +35 -0
  60. aiq/data_models/span.py +187 -0
  61. aiq/data_models/telemetry_exporter.py +2 -2
  62. aiq/embedder/nim_embedder.py +2 -1
  63. aiq/embedder/openai_embedder.py +2 -1
  64. aiq/eval/config.py +19 -1
  65. aiq/eval/dataset_handler/dataset_handler.py +75 -1
  66. aiq/eval/evaluate.py +53 -10
  67. aiq/eval/rag_evaluator/evaluate.py +23 -12
  68. aiq/eval/remote_workflow.py +7 -2
  69. aiq/eval/runners/__init__.py +14 -0
  70. aiq/eval/runners/config.py +39 -0
  71. aiq/eval/runners/multi_eval_runner.py +54 -0
  72. aiq/eval/usage_stats.py +6 -0
  73. aiq/eval/utils/weave_eval.py +5 -1
  74. aiq/experimental/__init__.py +0 -0
  75. aiq/experimental/decorators/__init__.py +0 -0
  76. aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
  77. aiq/experimental/inference_time_scaling/__init__.py +0 -0
  78. aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
  79. aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
  80. aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
  81. aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
  82. aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
  83. aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
  84. aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
  85. aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
  86. aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
  87. aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
  88. aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
  89. aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
  90. aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
  91. aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
  92. aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
  93. aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
  94. aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
  95. aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
  96. aiq/experimental/inference_time_scaling/register.py +36 -0
  97. aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
  98. aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
  99. aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
  100. aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
  101. aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
  102. aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
  103. aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
  104. aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
  105. aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
  106. aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
  107. aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
  108. aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
  109. aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
  110. aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
  111. aiq/front_ends/console/authentication_flow_handler.py +233 -0
  112. aiq/front_ends/console/console_front_end_plugin.py +11 -2
  113. aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
  114. aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
  115. aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
  116. aiq/front_ends/fastapi/fastapi_front_end_config.py +20 -0
  117. aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
  118. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
  119. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +353 -31
  120. aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
  121. aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
  122. aiq/front_ends/fastapi/main.py +2 -0
  123. aiq/front_ends/fastapi/message_handler.py +102 -84
  124. aiq/front_ends/fastapi/step_adaptor.py +2 -1
  125. aiq/llm/aws_bedrock_llm.py +2 -1
  126. aiq/llm/nim_llm.py +2 -1
  127. aiq/llm/openai_llm.py +2 -1
  128. aiq/object_store/__init__.py +20 -0
  129. aiq/object_store/in_memory_object_store.py +74 -0
  130. aiq/object_store/interfaces.py +84 -0
  131. aiq/object_store/models.py +36 -0
  132. aiq/object_store/register.py +20 -0
  133. aiq/observability/__init__.py +14 -0
  134. aiq/observability/exporter/__init__.py +14 -0
  135. aiq/observability/exporter/base_exporter.py +449 -0
  136. aiq/observability/exporter/exporter.py +78 -0
  137. aiq/observability/exporter/file_exporter.py +33 -0
  138. aiq/observability/exporter/processing_exporter.py +269 -0
  139. aiq/observability/exporter/raw_exporter.py +52 -0
  140. aiq/observability/exporter/span_exporter.py +264 -0
  141. aiq/observability/exporter_manager.py +335 -0
  142. aiq/observability/mixin/__init__.py +14 -0
  143. aiq/observability/mixin/batch_config_mixin.py +26 -0
  144. aiq/observability/mixin/collector_config_mixin.py +23 -0
  145. aiq/observability/mixin/file_mixin.py +288 -0
  146. aiq/observability/mixin/file_mode.py +23 -0
  147. aiq/observability/mixin/resource_conflict_mixin.py +134 -0
  148. aiq/observability/mixin/serialize_mixin.py +61 -0
  149. aiq/observability/mixin/type_introspection_mixin.py +183 -0
  150. aiq/observability/processor/__init__.py +14 -0
  151. aiq/observability/processor/batching_processor.py +316 -0
  152. aiq/observability/processor/intermediate_step_serializer.py +28 -0
  153. aiq/observability/processor/processor.py +68 -0
  154. aiq/observability/register.py +32 -116
  155. aiq/observability/utils/__init__.py +14 -0
  156. aiq/observability/utils/dict_utils.py +236 -0
  157. aiq/observability/utils/time_utils.py +31 -0
  158. aiq/profiler/calc/__init__.py +14 -0
  159. aiq/profiler/calc/calc_runner.py +623 -0
  160. aiq/profiler/calc/calculations.py +288 -0
  161. aiq/profiler/calc/data_models.py +176 -0
  162. aiq/profiler/calc/plot.py +345 -0
  163. aiq/profiler/data_models.py +2 -0
  164. aiq/profiler/profile_runner.py +16 -13
  165. aiq/runtime/loader.py +8 -2
  166. aiq/runtime/runner.py +23 -9
  167. aiq/runtime/session.py +16 -5
  168. aiq/tool/chat_completion.py +74 -0
  169. aiq/tool/code_execution/README.md +152 -0
  170. aiq/tool/code_execution/code_sandbox.py +151 -72
  171. aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
  172. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
  173. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
  174. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
  175. aiq/tool/code_execution/register.py +7 -3
  176. aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
  177. aiq/tool/mcp/exceptions.py +142 -0
  178. aiq/tool/mcp/mcp_client.py +17 -3
  179. aiq/tool/mcp/mcp_tool.py +1 -1
  180. aiq/tool/register.py +1 -0
  181. aiq/tool/server_tools.py +2 -2
  182. aiq/utils/exception_handlers/automatic_retries.py +289 -0
  183. aiq/utils/exception_handlers/mcp.py +211 -0
  184. aiq/utils/io/model_processing.py +28 -0
  185. aiq/utils/log_utils.py +37 -0
  186. aiq/utils/string_utils.py +38 -0
  187. aiq/utils/type_converter.py +18 -2
  188. aiq/utils/type_utils.py +87 -0
  189. {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/METADATA +37 -9
  190. {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/RECORD +195 -80
  191. {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/entry_points.txt +3 -0
  192. aiq/front_ends/fastapi/websocket.py +0 -153
  193. aiq/observability/async_otel_listener.py +0 -470
  194. {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/WHEEL +0 -0
  195. {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  196. {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE.md +0 -0
  197. {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/top_level.txt +0 -0
@@ -12,16 +12,59 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from __future__ import annotations
16
+
17
+ import contextlib
15
18
  import logging
16
19
  import multiprocessing
20
+ import os
17
21
  import resource
18
- import sys
22
+ from enum import Enum
19
23
  from io import StringIO
20
24
 
21
25
  from flask import Flask
26
+ from flask import Request
27
+ from flask import Response
22
28
  from flask import request
29
+ from pydantic import BaseModel
30
+ from pydantic import Field
23
31
 
24
32
  app = Flask(__name__)
33
+ logger = logging.getLogger(__name__)
34
+ logger.setLevel(logging.WARNING)
35
+
36
+
37
+ class CodeExecutionStatus(str, Enum):
38
+ """
39
+ Status of code execution.
40
+ """
41
+ COMPLETED = "completed"
42
+ ERROR = "error"
43
+ TIMEOUT = "timeout"
44
+
45
+
46
+ class CodeExecutionResult(BaseModel):
47
+ """
48
+ Result of code execution.
49
+ """
50
+ process_status: CodeExecutionStatus = Field(default=CodeExecutionStatus.COMPLETED,
51
+ description="Status of the process")
52
+ stdout: str = Field(description="Standard output of the process")
53
+ stderr: str = Field(description="Standard error of the process")
54
+
55
+
56
+ class CodeExecutionResponse(Response):
57
+ """
58
+ Response class that returns a JSON response with the given status code and result.
59
+ """
60
+
61
+ def __init__(self, status_code: int, result: CodeExecutionResult):
62
+ super().__init__(status=status_code, mimetype="application/json", response=result.model_dump_json())
63
+
64
+ @classmethod
65
+ def with_error(cls, status_code: int, error_message: str) -> 'CodeExecutionResponse':
66
+ return cls(status_code,
67
+ CodeExecutionResult(process_status=CodeExecutionStatus.ERROR, stdout="", stderr=error_message))
25
68
 
26
69
 
27
70
  @app.after_request
@@ -34,50 +77,122 @@ def add_hsts_header(response):
34
77
  return response
35
78
 
36
79
 
37
- def execute_python(generated_code, timeout):
80
+ def execute_python(generated_code: str, timeout: float) -> CodeExecutionResult:
81
+ """
82
+ Execute Python code in a subprocess.
83
+
84
+ Args:
85
+ generated_code: The code to execute
86
+ timeout: The timeout for the execution
87
+
88
+ Returns:
89
+ CodeExecutionResult object containing the execution result
90
+ """
91
+
38
92
  # running in a separate process to ensure any kind of crashes are properly handled
39
93
  queue = multiprocessing.Queue()
40
94
  process = multiprocessing.Process(target=execute_code_subprocess, args=(generated_code, queue))
95
+
41
96
  process.start()
97
+ # wait until the process finishes or the timeout expires
42
98
  process.join(timeout=timeout)
43
-
44
- if process.is_alive(): # didn't finish successfully
99
+ if process.exitcode is None:
45
100
  process.kill()
46
- return {"process_status": "timeout", "stdout": "", "stderr": "Timed out\n"}
101
+ return CodeExecutionResult(process_status=CodeExecutionStatus.TIMEOUT, stdout="", stderr="Timed out\n")
47
102
 
48
103
  return queue.get()
49
104
 
50
105
 
51
106
  # need to memory-limit to avoid common errors of allocating too much
52
107
  # but this has to be done in a subprocess to not crush server itself
53
- def execute_code_subprocess(generated_code, queue):
54
- limit = 1024 * 1024 * 1024 * 10 # 10gb - somehow with a smaller limit the server dies when numpy is used
55
- resource.setrlimit(resource.RLIMIT_AS, (limit, limit))
56
- resource.setrlimit(resource.RLIMIT_DATA, (limit, limit))
108
+ def execute_code_subprocess(generated_code: str, queue):
109
+ """
110
+ Execute code in a subprocess.
111
+
112
+ Args:
113
+ generated_code: The code to execute
114
+ queue: The queue to put the result in
115
+ """
116
+
117
+ logger.debug("execute_code_subprocess started, PID: %s", os.getpid())
57
118
 
58
- # this can be overriden inside generated code, so it's not a guaranteed protection
59
- sys.stdout = StringIO()
60
119
  try:
61
- exec(generated_code, {}) # pylint: disable=W0122
62
- queue.put(sys.stdout.getvalue())
120
+ limit = 1024 * 1024 * 1024 * 10 # 10gb - somehow with a smaller limit the server dies when numpy is used
121
+ resource.setrlimit(resource.RLIMIT_AS, (limit, limit))
122
+ resource.setrlimit(resource.RLIMIT_DATA, (limit, limit))
63
123
  except Exception as e:
64
- print(f"Error: {str(e)}")
65
- queue.put({"process_status": "error", "stdout": "", "stderr": str(e) + "\n"})
124
+ logger.error("Failed to set resource limits, PID: %s, error: %s", os.getpid(), e)
125
+
126
+ stdout_capture = StringIO()
127
+ stderr_capture = StringIO()
128
+ try:
129
+ with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
130
+ exec(generated_code, {}) # pylint: disable=W0122
131
+ logger.debug("execute_code_subprocess finished, PID: %s", os.getpid())
132
+ queue.put(CodeExecutionResult(stdout=stdout_capture.getvalue(), stderr=stderr_capture.getvalue()))
133
+ except Exception as e:
134
+ import traceback
135
+ with contextlib.redirect_stderr(stderr_capture):
136
+ traceback.print_exc()
137
+ logger.debug("execute_code_subprocess failed, PID: %s, error: %s", os.getpid(), e)
138
+ queue.put(
139
+ CodeExecutionResult(process_status=CodeExecutionStatus.ERROR,
140
+ stdout=stdout_capture.getvalue(),
141
+ stderr=stderr_capture.getvalue()))
142
+
143
+
144
+ def do_execute(request: Request) -> CodeExecutionResponse:
145
+ """
146
+ Main function to handle execution requests.
147
+
148
+ Args:
149
+ request: Request object containing the execution request
150
+
151
+ Returns:
152
+ CodeExecutionResponse object containing the execution result
153
+ """
154
+ try:
155
+ # Check if request has JSON data
156
+ if not request.is_json:
157
+ return CodeExecutionResponse.with_error(400, "Request must be JSON")
158
+
159
+ # Get JSON data safely
160
+ json_data = request.get_json(silent=True)
161
+
162
+ if json_data is None:
163
+ return CodeExecutionResponse.with_error(400, "Invalid JSON data")
164
+
165
+ # Check for required fields
166
+ if 'generated_code' not in json_data:
167
+ return CodeExecutionResponse.with_error(400, "Missing required field: generated_code")
168
+
169
+ if 'timeout' not in json_data:
170
+ return CodeExecutionResponse.with_error(400, "Missing required field: timeout")
171
+
172
+ if 'language' not in json_data:
173
+ return CodeExecutionResponse.with_error(400, "Missing required field: language")
174
+
175
+ generated_code: str | None = json_data.get('generated_code', None)
176
+ assert generated_code is not None
177
+ timeout: float | None = json_data.get('timeout', None)
178
+ assert timeout is not None
179
+ language: str | None = json_data.get('language', None)
180
+ assert language is not None
181
+
182
+ if language != 'python':
183
+ return CodeExecutionResponse.with_error(400, "Only python execution is supported")
184
+
185
+ return CodeExecutionResponse(200, execute_python(generated_code, timeout))
186
+
187
+ except Exception as e:
188
+ return CodeExecutionResponse.with_error(500, f"Server error: {str(e)}")
66
189
 
67
190
 
68
191
  # Main Flask endpoint to handle execution requests
69
192
  @app.route("/execute", methods=["POST"])
70
193
  def execute():
71
- generated_code = request.json['generated_code']
72
- timeout = request.json['timeout']
73
- language = request.json.get('language', 'python')
74
-
75
- if language == 'python':
76
- return execute_python(generated_code, timeout)
77
- return {"process_status": "error", "stdout": "", "stderr": "Only python execution is supported"}
194
+ return do_execute(request)
78
195
 
79
196
 
80
197
  if __name__ == '__main__':
81
- log = logging.getLogger('werkzeug')
82
- log.setLevel(logging.WARNING)
83
198
  app.run(port=6000)
@@ -1,4 +1,6 @@
1
1
  numpy
2
2
  pandas
3
3
  scipy
4
- ipython
4
+ ipython
5
+ plotly
6
+ pydantic
@@ -14,12 +14,37 @@
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
16
 
17
+ # Usage: ./start_local_sandbox.sh [SANDBOX_NAME] [OUTPUT_DATA_PATH]
17
18
  # NOTE: needs to run from the root of the repo!
18
19
 
20
+ DOCKER_COMMAND=${DOCKER_COMMAND:-"docker"}
19
21
  SANDBOX_NAME=${1:-'local-sandbox'}
20
22
  NUM_THREADS=10
21
23
 
24
+ # Get the output_data directory path for mounting
25
+ # Priority: command line argument > environment variable > default path (current directory)
26
+ OUTPUT_DATA_PATH=${2:-${OUTPUT_DATA_PATH:-$(pwd)}}
22
27
 
23
- docker build --tag=${SANDBOX_NAME} --build-arg="UWSGI_PROCESSES=$((${NUM_THREADS} * 10))" --build-arg="UWSGI_CHEAPER=${NUM_THREADS}" -f Dockerfile.sandbox .
28
+ echo "Starting sandbox with container name: ${SANDBOX_NAME}"
29
+ echo "Mounting output_data directory: ${OUTPUT_DATA_PATH}"
24
30
 
25
- docker run --network=host --rm --name=local-sandbox ${SANDBOX_NAME}
31
+ # Verify the path exists before mounting, create if it doesn't
32
+ if [ ! -d "${OUTPUT_DATA_PATH}" ]; then
33
+ echo "Output data directory does not exist, creating: ${OUTPUT_DATA_PATH}"
34
+ mkdir -p "${OUTPUT_DATA_PATH}"
35
+ fi
36
+
37
+ # Check if the Docker image already exists
38
+ if ! ${DOCKER_COMMAND} images ${SANDBOX_NAME} | grep -q "${SANDBOX_NAME}"; then
39
+ echo "Docker image not found locally. Building ${SANDBOX_NAME}..."
40
+ ${DOCKER_COMMAND} build --tag=${SANDBOX_NAME} --build-arg="UWSGI_PROCESSES=$((${NUM_THREADS} * 10))" --build-arg="UWSGI_CHEAPER=${NUM_THREADS}" -f Dockerfile.sandbox .
41
+ else
42
+ echo "Using existing Docker image: ${SANDBOX_NAME}"
43
+ fi
44
+
45
+ # Mount the output_data directory directly so files created in container appear in the local directory
46
+ ${DOCKER_COMMAND} run --rm --name=local-sandbox \
47
+ --network=host \
48
+ -v "${OUTPUT_DATA_PATH}:/workspace" \
49
+ -w /workspace \
50
+ ${SANDBOX_NAME}
@@ -46,7 +46,11 @@ async def code_execution_tool(config: CodeExecutionToolConfig, builder: Builder)
46
46
  class CodeExecutionInputSchema(BaseModel):
47
47
  generated_code: str = Field(description="String containing the code to be executed")
48
48
 
49
- sandbox = get_sandbox(sandbox_type=config.sandbox_type, uri=config.uri)
49
+ # Create sandbox without working_directory
50
+ sandbox_kwargs = {"uri": config.uri}
51
+
52
+ sandbox = get_sandbox(sandbox_type=config.sandbox_type, **sandbox_kwargs)
53
+ logger.info(f"[DEBUG] Created sandbox of type: {config.sandbox_type}")
50
54
 
51
55
  async def _execute_code(generated_code: str) -> dict:
52
56
  logger.info("Executing code in the sandbox at %s", config.uri)
@@ -54,12 +58,12 @@ async def code_execution_tool(config: CodeExecutionToolConfig, builder: Builder)
54
58
  output = await sandbox.execute_code(
55
59
  generated_code=generated_code,
56
60
  language="python",
57
- timeout=config.timeout,
61
+ timeout_seconds=config.timeout,
58
62
  max_output_characters=config.max_output_characters,
59
63
  )
60
64
  except Exception as e:
61
65
  logger.exception("Error when executing code in the sandbox, %s", e)
62
- return {"process_status": "error", "stdout": "", "stderr": e}
66
+ return {"process_status": "error", "stdout": "", "stderr": str(e)}
63
67
  return output
64
68
 
65
69
  yield FunctionInfo.from_fn(