aiqtoolkit 1.2.0a20250706__py3-none-any.whl → 1.2.0a20250730__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (197) hide show
  1. aiq/agent/base.py +171 -8
  2. aiq/agent/dual_node.py +1 -1
  3. aiq/agent/react_agent/agent.py +113 -113
  4. aiq/agent/react_agent/register.py +31 -14
  5. aiq/agent/rewoo_agent/agent.py +36 -35
  6. aiq/agent/rewoo_agent/register.py +2 -2
  7. aiq/agent/tool_calling_agent/agent.py +3 -7
  8. aiq/authentication/__init__.py +14 -0
  9. aiq/authentication/api_key/__init__.py +14 -0
  10. aiq/authentication/api_key/api_key_auth_provider.py +92 -0
  11. aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
  12. aiq/authentication/api_key/register.py +26 -0
  13. aiq/authentication/exceptions/__init__.py +14 -0
  14. aiq/authentication/exceptions/api_key_exceptions.py +38 -0
  15. aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
  16. aiq/authentication/exceptions/call_back_exceptions.py +38 -0
  17. aiq/authentication/exceptions/request_exceptions.py +54 -0
  18. aiq/authentication/http_basic_auth/__init__.py +0 -0
  19. aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
  20. aiq/authentication/http_basic_auth/register.py +30 -0
  21. aiq/authentication/interfaces.py +93 -0
  22. aiq/authentication/oauth2/__init__.py +14 -0
  23. aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
  24. aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
  25. aiq/authentication/oauth2/register.py +25 -0
  26. aiq/authentication/register.py +21 -0
  27. aiq/builder/builder.py +64 -2
  28. aiq/builder/component_utils.py +16 -3
  29. aiq/builder/context.py +26 -0
  30. aiq/builder/eval_builder.py +43 -2
  31. aiq/builder/function.py +32 -4
  32. aiq/builder/function_base.py +1 -1
  33. aiq/builder/intermediate_step_manager.py +6 -8
  34. aiq/builder/user_interaction_manager.py +3 -0
  35. aiq/builder/workflow.py +23 -18
  36. aiq/builder/workflow_builder.py +420 -73
  37. aiq/cli/commands/info/list_mcp.py +103 -16
  38. aiq/cli/commands/sizing/__init__.py +14 -0
  39. aiq/cli/commands/sizing/calc.py +294 -0
  40. aiq/cli/commands/sizing/sizing.py +27 -0
  41. aiq/cli/commands/start.py +1 -0
  42. aiq/cli/entrypoint.py +2 -0
  43. aiq/cli/register_workflow.py +80 -0
  44. aiq/cli/type_registry.py +151 -30
  45. aiq/data_models/api_server.py +117 -11
  46. aiq/data_models/authentication.py +231 -0
  47. aiq/data_models/common.py +35 -7
  48. aiq/data_models/component.py +17 -9
  49. aiq/data_models/component_ref.py +33 -0
  50. aiq/data_models/config.py +60 -3
  51. aiq/data_models/embedder.py +1 -0
  52. aiq/data_models/function_dependencies.py +8 -0
  53. aiq/data_models/interactive.py +10 -1
  54. aiq/data_models/intermediate_step.py +15 -5
  55. aiq/data_models/its_strategy.py +30 -0
  56. aiq/data_models/llm.py +1 -0
  57. aiq/data_models/memory.py +1 -0
  58. aiq/data_models/object_store.py +44 -0
  59. aiq/data_models/retry_mixin.py +35 -0
  60. aiq/data_models/span.py +187 -0
  61. aiq/data_models/telemetry_exporter.py +2 -2
  62. aiq/embedder/nim_embedder.py +2 -1
  63. aiq/embedder/openai_embedder.py +2 -1
  64. aiq/eval/config.py +19 -1
  65. aiq/eval/dataset_handler/dataset_handler.py +75 -1
  66. aiq/eval/evaluate.py +53 -10
  67. aiq/eval/rag_evaluator/evaluate.py +23 -12
  68. aiq/eval/remote_workflow.py +7 -2
  69. aiq/eval/runners/__init__.py +14 -0
  70. aiq/eval/runners/config.py +39 -0
  71. aiq/eval/runners/multi_eval_runner.py +54 -0
  72. aiq/eval/usage_stats.py +6 -0
  73. aiq/eval/utils/weave_eval.py +5 -1
  74. aiq/experimental/__init__.py +0 -0
  75. aiq/experimental/decorators/__init__.py +0 -0
  76. aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
  77. aiq/experimental/inference_time_scaling/__init__.py +0 -0
  78. aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
  79. aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
  80. aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
  81. aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
  82. aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
  83. aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
  84. aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
  85. aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
  86. aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
  87. aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
  88. aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
  89. aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
  90. aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
  91. aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
  92. aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
  93. aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
  94. aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
  95. aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
  96. aiq/experimental/inference_time_scaling/register.py +36 -0
  97. aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
  98. aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
  99. aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
  100. aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
  101. aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
  102. aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
  103. aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
  104. aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
  105. aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
  106. aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
  107. aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
  108. aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
  109. aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
  110. aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
  111. aiq/front_ends/console/authentication_flow_handler.py +233 -0
  112. aiq/front_ends/console/console_front_end_plugin.py +11 -2
  113. aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
  114. aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
  115. aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
  116. aiq/front_ends/fastapi/fastapi_front_end_config.py +20 -0
  117. aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
  118. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
  119. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +353 -31
  120. aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
  121. aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
  122. aiq/front_ends/fastapi/main.py +2 -0
  123. aiq/front_ends/fastapi/message_handler.py +102 -84
  124. aiq/front_ends/fastapi/step_adaptor.py +2 -1
  125. aiq/llm/aws_bedrock_llm.py +2 -1
  126. aiq/llm/nim_llm.py +2 -1
  127. aiq/llm/openai_llm.py +2 -1
  128. aiq/object_store/__init__.py +20 -0
  129. aiq/object_store/in_memory_object_store.py +74 -0
  130. aiq/object_store/interfaces.py +84 -0
  131. aiq/object_store/models.py +36 -0
  132. aiq/object_store/register.py +20 -0
  133. aiq/observability/__init__.py +14 -0
  134. aiq/observability/exporter/__init__.py +14 -0
  135. aiq/observability/exporter/base_exporter.py +449 -0
  136. aiq/observability/exporter/exporter.py +78 -0
  137. aiq/observability/exporter/file_exporter.py +33 -0
  138. aiq/observability/exporter/processing_exporter.py +269 -0
  139. aiq/observability/exporter/raw_exporter.py +52 -0
  140. aiq/observability/exporter/span_exporter.py +264 -0
  141. aiq/observability/exporter_manager.py +335 -0
  142. aiq/observability/mixin/__init__.py +14 -0
  143. aiq/observability/mixin/batch_config_mixin.py +26 -0
  144. aiq/observability/mixin/collector_config_mixin.py +23 -0
  145. aiq/observability/mixin/file_mixin.py +288 -0
  146. aiq/observability/mixin/file_mode.py +23 -0
  147. aiq/observability/mixin/resource_conflict_mixin.py +134 -0
  148. aiq/observability/mixin/serialize_mixin.py +61 -0
  149. aiq/observability/mixin/type_introspection_mixin.py +183 -0
  150. aiq/observability/processor/__init__.py +14 -0
  151. aiq/observability/processor/batching_processor.py +316 -0
  152. aiq/observability/processor/intermediate_step_serializer.py +28 -0
  153. aiq/observability/processor/processor.py +68 -0
  154. aiq/observability/register.py +32 -116
  155. aiq/observability/utils/__init__.py +14 -0
  156. aiq/observability/utils/dict_utils.py +236 -0
  157. aiq/observability/utils/time_utils.py +31 -0
  158. aiq/profiler/calc/__init__.py +14 -0
  159. aiq/profiler/calc/calc_runner.py +623 -0
  160. aiq/profiler/calc/calculations.py +288 -0
  161. aiq/profiler/calc/data_models.py +176 -0
  162. aiq/profiler/calc/plot.py +345 -0
  163. aiq/profiler/data_models.py +2 -0
  164. aiq/profiler/profile_runner.py +16 -13
  165. aiq/runtime/loader.py +8 -2
  166. aiq/runtime/runner.py +23 -9
  167. aiq/runtime/session.py +16 -5
  168. aiq/tool/chat_completion.py +74 -0
  169. aiq/tool/code_execution/README.md +152 -0
  170. aiq/tool/code_execution/code_sandbox.py +151 -72
  171. aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
  172. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
  173. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
  174. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
  175. aiq/tool/code_execution/register.py +7 -3
  176. aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
  177. aiq/tool/mcp/exceptions.py +142 -0
  178. aiq/tool/mcp/mcp_client.py +17 -3
  179. aiq/tool/mcp/mcp_tool.py +1 -1
  180. aiq/tool/register.py +1 -0
  181. aiq/tool/server_tools.py +2 -2
  182. aiq/utils/exception_handlers/automatic_retries.py +289 -0
  183. aiq/utils/exception_handlers/mcp.py +211 -0
  184. aiq/utils/io/model_processing.py +28 -0
  185. aiq/utils/log_utils.py +37 -0
  186. aiq/utils/string_utils.py +38 -0
  187. aiq/utils/type_converter.py +18 -2
  188. aiq/utils/type_utils.py +87 -0
  189. {aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/METADATA +37 -9
  190. {aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/RECORD +195 -80
  191. {aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/entry_points.txt +3 -0
  192. aiq/front_ends/fastapi/websocket.py +0 -153
  193. aiq/observability/async_otel_listener.py +0 -470
  194. {aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/WHEEL +0 -0
  195. {aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  196. {aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/licenses/LICENSE.md +0 -0
  197. {aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,74 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """
16
+ Simple Completion Function for AIQ Toolkit
17
+
18
+ This module provides a simple completion function that can handle
19
+ natural language queries and perform basic text completion tasks.
20
+ """
21
+
22
+ from pydantic import Field
23
+
24
+ from aiq.builder.builder import Builder
25
+ from aiq.builder.framework_enum import LLMFrameworkEnum
26
+ from aiq.cli.register_workflow import register_function
27
+ from aiq.data_models.component_ref import LLMRef
28
+ from aiq.data_models.function import FunctionBaseConfig
29
+
30
+
31
+ class ChatCompletionConfig(FunctionBaseConfig, name="chat_completion"):
32
+ """Configuration for the Chat Completion Function."""
33
+
34
+ system_prompt: str = Field(("You are a helpful AI assistant. Provide clear, accurate, and helpful "
35
+ "responses to user queries. You can give general advice, recommendations, "
36
+ "tips, and engage in conversation. Be helpful and informative."),
37
+ description="The system prompt to use for chat completion.")
38
+
39
+ llm_name: LLMRef = Field(description="The LLM to use for generating responses.")
40
+
41
+
42
+ @register_function(config_type=ChatCompletionConfig)
43
+ async def register_chat_completion(config: ChatCompletionConfig, builder: Builder):
44
+ """Registers a chat completion function that can handle natural language queries."""
45
+
46
+ # Get the LLM from the builder context using the configured LLM reference
47
+ # Use LangChain framework wrapper since we're using LangChain-based LLM
48
+ llm = await builder.get_llm(config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
49
+
50
+ async def _chat_completion(query: str) -> str:
51
+ """A simple chat completion function that responds to natural language queries.
52
+
53
+ Args:
54
+ query: The user's natural language query
55
+
56
+ Returns:
57
+ A helpful response to the query
58
+ """
59
+ try:
60
+ # Create a simple prompt with the system message and user query
61
+ prompt = f"{config.system_prompt}\n\nUser: {query}\n\nAssistant:"
62
+
63
+ # Generate response using the LLM
64
+ response = await llm.ainvoke(prompt)
65
+
66
+ return response
67
+
68
+ except Exception as e:
69
+ # Fallback response if LLM call fails
70
+ return (f"I apologize, but I encountered an error while processing your "
71
+ f"query: '{query}'. Please try rephrasing your question or try "
72
+ f"again later. Error: {str(e)}")
73
+
74
+ yield _chat_completion
@@ -0,0 +1,152 @@
1
+ <!--
2
+ SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ SPDX-License-Identifier: Apache-2.0
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ -->
17
+
18
+ # Code Execution Sandbox
19
+
20
+ A secure, containerized Python code execution environment that allows safe execution of Python code with comprehensive error handling and debugging capabilities.
21
+
22
+ ## Overview
23
+
24
+ The Code Execution Sandbox provides:
25
+ - **Secure code execution** in isolated Docker containers
26
+ - **Comprehensive error handling** with detailed stdout/stderr capture
27
+ - **Multiple input formats** including raw code, dictionary format, and markdown
28
+ - **Dependency management** with pre-installed libraries
29
+ - **Flexible configuration** with customizable timeouts and output limits
30
+ - **Robust debugging** with extensive logging and error reporting
31
+
32
+ ## Quick Start
33
+
34
+ ### Step 1: Start the Sandbox Server
35
+
36
+ Navigate to the local sandbox directory and start the server:
37
+
38
+ ```bash
39
+ cd src/aiq/tool/code_execution/local_sandbox
40
+ ./start_local_sandbox.sh
41
+ ```
42
+
43
+ The script will:
44
+ - Build the Docker image if it doesn't exist
45
+ - Start the sandbox server on port 6000
46
+ - Mount your working directory for file operations
47
+
48
+ #### Advanced Usage:
49
+ ```bash
50
+ # Custom container name
51
+ ./start_local_sandbox.sh my-sandbox
52
+
53
+ # Custom output directory
54
+ ./start_local_sandbox.sh my-sandbox /path/to/output
55
+
56
+ # Using environment variable
57
+ export OUTPUT_DATA_PATH=/path/to/output
58
+ ./start_local_sandbox.sh
59
+ ```
60
+
61
+ ### Step 2: Test the Installation
62
+
63
+ Run the comprehensive test suite to verify everything is working:
64
+
65
+ ```bash
66
+ cd src/aiq/tool/code_execution
67
+ pytest test_code_execution_sandbox.py
68
+ ```
69
+
70
+ Note: a running instance of a local sandbox is required.
71
+
72
+ ## Using the Code Execution Tool
73
+
74
+ ### Basic Usage
75
+
76
+ The sandbox accepts HTTP POST requests to `http://localhost:6000/execute` with JSON payloads:
77
+
78
+ ```bash
79
+ curl -X POST \
80
+ -H "Content-Type: application/json" \
81
+ -d '{
82
+ "generated_code": "print(\"Hello, World!\")",
83
+ "timeout": 30,
84
+ "language": "python"
85
+ }' \
86
+ http://localhost:6000/execute
87
+ ```
88
+
89
+ ### Supported Input Formats
90
+
91
+ #### 1. Raw Python Code
92
+ ```json
93
+ {
94
+ "generated_code": "import numpy as np\nprint(np.array([1, 2, 3]))",
95
+ "timeout": 30,
96
+ "language": "python"
97
+ }
98
+ ```
99
+
100
+ #### 2. Dictionary Format
101
+ ```json
102
+ {
103
+ "generated_code": "{'generated_code': 'print(\"Hello from dict format\")'}",
104
+ "timeout": 30,
105
+ "language": "python"
106
+ }
107
+ ```
108
+
109
+ #### 3. Markdown Code Blocks
110
+ ```json
111
+ {
112
+ "generated_code": "```python\nprint('Hello from markdown')\n```",
113
+ "timeout": 30,
114
+ "language": "python"
115
+ }
116
+ ```
117
+
118
+ ### Response Format
119
+
120
+ The sandbox returns JSON responses with the following structure:
121
+
122
+ ```json
123
+ {
124
+ "process_status": "completed|error|timeout",
125
+ "stdout": "Standard output content",
126
+ "stderr": "Standard error content"
127
+ }
128
+ ```
129
+
130
+ ## Configuration Options
131
+
132
+ ### Sandbox Configuration
133
+
134
+ - **URI**: Default `http://127.0.0.1:6000`
135
+ - **Timeout**: Default 10 seconds (configurable)
136
+ - **Max Output Characters**: Default 1000 characters
137
+ - **Memory Limit**: 10GB (configurable in Docker)
138
+ - **Working Directory**: Mounted volume for file operations
139
+
140
+ ### Environment Variables
141
+
142
+ - `OUTPUT_DATA_PATH`: Custom path for file operations
143
+ - `SANDBOX_HOST`: Custom sandbox host
144
+ - `SANDBOX_PORT`: Custom sandbox port
145
+
146
+ ## Security Considerations
147
+
148
+ - **Isolated execution**: All code runs in Docker containers
149
+ - **Resource limits**: Memory and CPU limits prevent resource exhaustion
150
+ - **Network isolation**: Containers have limited network access
151
+ - **File system isolation**: Mounted volumes provide controlled file access
152
+ - **Process isolation**: Each execution runs in a separate process
@@ -15,11 +15,16 @@
15
15
  import abc
16
16
  import json
17
17
  import logging
18
+ import textwrap
19
+ from typing import Any
18
20
  from urllib.parse import urljoin
19
21
 
20
22
  import requests
23
+ import requests.adapters
21
24
  from pydantic import HttpUrl
22
25
 
26
+ from aiq.utils.type_utils import override
27
+
23
28
  logger = logging.getLogger(__file__)
24
29
 
25
30
 
@@ -43,18 +48,18 @@ class Sandbox(abc.ABC):
43
48
  *,
44
49
  uri: HttpUrl,
45
50
  ):
46
- self.url = self._get_execute_url(uri)
51
+ self.url: str = self._get_execute_url(uri)
47
52
  session = requests.Session()
48
53
  adapter = requests.adapters.HTTPAdapter(pool_maxsize=1500, pool_connections=1500, max_retries=3)
49
54
  session.mount('http://', adapter)
50
55
  session.mount('https://', adapter)
51
- self.http_session = session
56
+ self.http_session: requests.Session = session
52
57
 
53
- def _send_request(self, request, timeout):
58
+ def _send_request(self, request: dict[str, Any], timeout_seconds: float) -> dict[str, str]:
54
59
  output = self.http_session.post(
55
60
  url=self.url,
56
61
  data=json.dumps(request),
57
- timeout=timeout,
62
+ timeout=timeout_seconds,
58
63
  headers={"Content-Type": "application/json"},
59
64
  )
60
65
  # retrying 502 errors
@@ -64,104 +69,180 @@ class Sandbox(abc.ABC):
64
69
  return self._parse_request_output(output)
65
70
 
66
71
  @abc.abstractmethod
67
- def _parse_request_output(self, output):
72
+ def _parse_request_output(self, output: requests.Response) -> dict[str, str]:
68
73
  pass
69
74
 
70
75
  @abc.abstractmethod
71
- def _get_execute_url(self, uri):
76
+ def _get_execute_url(self, uri: HttpUrl) -> str:
72
77
  pass
73
78
 
74
79
  @abc.abstractmethod
75
- def _prepare_request(self, generated_code, timeout):
80
+ def _prepare_request(self, generated_code: str, timeout_seconds: float) -> dict[str, Any]:
76
81
  pass
77
82
 
78
83
  async def execute_code(
79
84
  self,
80
85
  generated_code: str,
81
- timeout: float = 10.0,
86
+ timeout_seconds: float = 10.0,
82
87
  language: str = "python",
83
88
  max_output_characters: int = 1000,
84
- ) -> tuple[dict, str]:
89
+ ) -> dict[str, str]:
85
90
 
86
- generated_code = generated_code.lstrip().rstrip().lstrip("`").rstrip("`")
87
- code_to_execute = """
88
- import traceback
89
- import json
90
- import os
91
- import warnings
92
- import contextlib
93
- import io
94
- warnings.filterwarnings('ignore')
95
- os.environ['OPENBLAS_NUM_THREADS'] = '16'
96
- """
97
-
98
- code_to_execute += f"""
99
- \ngenerated_code = {repr(generated_code)}\n
100
- stdout = io.StringIO()
101
- stderr = io.StringIO()
102
-
103
- with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
104
- try:
105
- exec(generated_code)
106
- status = "completed"
107
- except Exception:
108
- status = "error"
109
- stderr.write(traceback.format_exc())
110
- stdout = stdout.getvalue()
111
- stderr = stderr.getvalue()
112
- if len(stdout) > {max_output_characters}:
113
- stdout = stdout[:{max_output_characters}] + "<output cut>"
114
- if len(stderr) > {max_output_characters}:
115
- stderr = stderr[:{max_output_characters}] + "<output cut>"
116
- if stdout:
117
- stdout += "\\n"
118
- if stderr:
119
- stderr += "\\n"
120
- output = {{"process_status": status, "stdout": stdout, "stderr": stderr}}
121
- print(json.dumps(output))
122
- """
123
- request = self._prepare_request(code_to_execute, timeout)
91
+ if language != "python":
92
+ raise ValueError(f"Language {language} not supported")
93
+
94
+ generated_code = generated_code.strip().strip("`")
95
+ code_to_execute = textwrap.dedent("""
96
+ import traceback
97
+ import json
98
+ import os
99
+ import warnings
100
+ import contextlib
101
+ import io
102
+ warnings.filterwarnings('ignore')
103
+ os.environ['OPENBLAS_NUM_THREADS'] = '16'
104
+ """).strip()
105
+
106
+ # Use json.dumps to properly escape the generated_code instead of repr()
107
+ escaped_code = json.dumps(generated_code)
108
+ code_to_execute += textwrap.dedent(f"""
109
+
110
+ generated_code = {escaped_code}
111
+
112
+ stdout = io.StringIO()
113
+ stderr = io.StringIO()
114
+
115
+ with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
116
+ try:
117
+ exec(generated_code)
118
+ status = "completed"
119
+ except Exception:
120
+ status = "error"
121
+ stderr.write(traceback.format_exc())
122
+ stdout = stdout.getvalue()
123
+ stderr = stderr.getvalue()
124
+ if len(stdout) > {max_output_characters}:
125
+ stdout = stdout[:{max_output_characters}] + "<output cut>"
126
+ if len(stderr) > {max_output_characters}:
127
+ stderr = stderr[:{max_output_characters}] + "<output cut>"
128
+ if stdout:
129
+ stdout += "\\n"
130
+ if stderr:
131
+ stderr += "\\n"
132
+ output = {{"process_status": status, "stdout": stdout, "stderr": stderr}}
133
+ print(json.dumps(output))
134
+ """).strip()
135
+ request = self._prepare_request(code_to_execute, timeout_seconds)
124
136
  try:
125
- output = self._send_request(request, timeout)
137
+ return self._send_request(request, timeout_seconds)
126
138
  except requests.exceptions.Timeout:
127
- output = {"process_status": "timeout", "stdout": "", "stderr": "Timed out\n"}
128
- return output
139
+ return {"process_status": "timeout", "stdout": "", "stderr": "Timed out\n"}
129
140
 
130
141
 
131
142
  class LocalSandbox(Sandbox):
132
143
  """Locally hosted sandbox."""
133
144
 
134
- def _get_execute_url(self, uri):
145
+ def __init__(self, *, uri: HttpUrl):
146
+ super().__init__(uri=uri)
147
+
148
+ @override
149
+ def _get_execute_url(self, uri: HttpUrl) -> str:
135
150
  return urljoin(str(uri), "execute")
136
151
 
137
- def _parse_request_output(self, output):
152
+ @override
153
+ def _parse_request_output(self, output: requests.Response) -> dict[str, str]:
138
154
  try:
139
- return output.json()
155
+ output_json = output.json()
156
+ assert isinstance(output_json, dict)
157
+ return output_json
140
158
  except json.JSONDecodeError as e:
141
- logger.exception("Error parsing output: %s. %s", output.text, e)
142
- return {'process_status': 'error', 'stdout': '', 'stderr': 'Unknown error'}
159
+ logger.exception("Error parsing output: %s. %s", output.text, e)
160
+ return {'process_status': 'error', 'stdout': '', 'stderr': f'Unknown error: {e} \"{output.text}\"'}
143
161
 
144
- def _prepare_request(self, generated_code, timeout, language='python', **kwargs):
145
- return {
162
+ @override
163
+ def _prepare_request(self,
164
+ generated_code: str,
165
+ timeout_seconds: float,
166
+ language: str = "python",
167
+ **kwargs) -> dict[str, Any]:
168
+ request = {
146
169
  "generated_code": generated_code,
147
- "timeout": timeout,
170
+ "timeout": timeout_seconds,
148
171
  "language": language,
149
172
  }
173
+ return request
174
+
175
+ @override
176
+ async def execute_code(
177
+ self,
178
+ generated_code: str,
179
+ timeout_seconds: float = 10.0,
180
+ language: str = "python",
181
+ max_output_characters: int = 1000,
182
+ ) -> dict[str, str]:
183
+ """Override execute_code to bypass the wrapper logic and send user code directly to our server."""
184
+
185
+ logger.debug("Raw input generated_code: %s", generated_code)
186
+
187
+ # The input appears to be a string representation of a dictionary
188
+ # We need to parse it and extract the actual code
189
+ try:
190
+ # Try to evaluate the string as a Python literal (dictionary)
191
+ import ast
192
+ parsed_dict = ast.literal_eval(generated_code)
193
+ if isinstance(parsed_dict, dict) and 'generated_code' in parsed_dict:
194
+ actual_code = parsed_dict['generated_code']
195
+ assert isinstance(actual_code, str)
196
+ logger.debug("Extracted code from dict: %s...", actual_code[:100])
197
+ else:
198
+ # If it's not a dict or doesn't have the expected key, use as-is
199
+ actual_code = generated_code
200
+ logger.debug("Using code as-is: %s...", actual_code[:100])
201
+ except (ValueError, SyntaxError):
202
+ # If parsing fails, use the input as-is
203
+ actual_code = generated_code
204
+ logger.debug("Failed to parse, using as-is: %s...", actual_code[:100])
205
+
206
+ # Clean the actual code more carefully to avoid removing backticks that are part of Python code
207
+ # remove all leading/trailing whitespace -- strip()
208
+ # remove all leading/trailing backticks -- strip("`")
209
+ # may potentially start with python, so just trim from the front.
210
+ POTENTIAL_PREFIXES = ["python"]
211
+ actual_code = actual_code.strip().strip("`")
212
+ for prefix in POTENTIAL_PREFIXES:
213
+ if actual_code.startswith(prefix):
214
+ actual_code = actual_code[len(prefix):]
215
+ break
216
+
217
+ # Send the user's code directly to our server without any wrapper logic
218
+ # Our server already handles stdout/stderr capture and error handling
219
+ request = self._prepare_request(actual_code, timeout_seconds, language)
220
+ try:
221
+ return self._send_request(request, timeout_seconds)
222
+ except requests.exceptions.Timeout:
223
+ return {"process_status": "timeout", "stdout": "", "stderr": "Timed out\n"}
150
224
 
151
225
 
152
226
  class PistonSandbox(Sandbox):
153
227
  """Piston sandbox (https://github.com/engineer-man/piston)"""
154
228
 
155
- def _get_execute_url(self, uri):
229
+ @override
230
+ def _get_execute_url(self, uri: HttpUrl) -> str:
156
231
  return urljoin(str(uri), "execute")
157
232
 
158
- def _parse_request_output(self, output):
159
- output = output.json()
160
- if output['run']['signal'] == "SIGKILL":
161
- return {'result': None, 'error_message': 'Unknown error: SIGKILL'}
162
- return json.loads(output['run']['output'])
233
+ @override
234
+ def _parse_request_output(self, output: requests.Response) -> dict[str, str]:
235
+ output_json = output.json()
236
+ assert isinstance(output_json, dict)
237
+ assert 'run' in output_json
238
+ run_json = output_json['run']
239
+ assert isinstance(run_json, dict)
240
+ if run_json["code"] != 0:
241
+ return {'process_status': "error", 'stdout': run_json['stdout'], 'stderr': run_json['stderr']}
242
+ return {'process_status': "completed", 'stdout': run_json['stdout'], 'stderr': run_json['stderr']}
163
243
 
164
- def _prepare_request(self, generated_code: str, timeout, **kwargs):
244
+ @override
245
+ def _prepare_request(self, generated_code: str, timeout_seconds: float, **kwargs) -> dict[str, Any]:
165
246
  return {
166
247
  "language": "py",
167
248
  "version": "3.10.0",
@@ -170,19 +251,17 @@ class PistonSandbox(Sandbox):
170
251
  }],
171
252
  "stdin": "",
172
253
  "args": [],
173
- "run_timeout": timeout * 1000.0, # milliseconds
254
+ "run_timeout": timeout_seconds * 1000.0, # milliseconds
174
255
  "compile_memory_limit": -1,
175
256
  "run_memory_limit": -1,
176
257
  }
177
258
 
178
259
 
179
- sandboxes = {
180
- 'local': LocalSandbox,
181
- 'piston': PistonSandbox,
182
- }
183
-
184
-
185
260
  def get_sandbox(sandbox_type: str = "local", **kwargs):
186
261
  """A helper function to make it easier to set sandbox through cmd."""
262
+ sandboxes = {
263
+ 'local': LocalSandbox,
264
+ 'piston': PistonSandbox,
265
+ }
187
266
  sandbox_class = sandboxes[sandbox_type.lower()]
188
267
  return sandbox_class(**kwargs)
@@ -0,0 +1 @@
1
+ persistence_test.*