aiqtoolkit 1.2.0.dev0__py3-none-any.whl → 1.2.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (220) hide show
  1. aiq/agent/base.py +170 -8
  2. aiq/agent/dual_node.py +1 -1
  3. aiq/agent/react_agent/agent.py +146 -112
  4. aiq/agent/react_agent/prompt.py +1 -6
  5. aiq/agent/react_agent/register.py +36 -35
  6. aiq/agent/rewoo_agent/agent.py +36 -35
  7. aiq/agent/rewoo_agent/register.py +2 -2
  8. aiq/agent/tool_calling_agent/agent.py +3 -7
  9. aiq/agent/tool_calling_agent/register.py +1 -1
  10. aiq/authentication/__init__.py +14 -0
  11. aiq/authentication/api_key/__init__.py +14 -0
  12. aiq/authentication/api_key/api_key_auth_provider.py +92 -0
  13. aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
  14. aiq/authentication/api_key/register.py +26 -0
  15. aiq/authentication/exceptions/__init__.py +14 -0
  16. aiq/authentication/exceptions/api_key_exceptions.py +38 -0
  17. aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
  18. aiq/authentication/exceptions/call_back_exceptions.py +38 -0
  19. aiq/authentication/exceptions/request_exceptions.py +54 -0
  20. aiq/authentication/http_basic_auth/__init__.py +0 -0
  21. aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
  22. aiq/authentication/http_basic_auth/register.py +30 -0
  23. aiq/authentication/interfaces.py +93 -0
  24. aiq/authentication/oauth2/__init__.py +14 -0
  25. aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
  26. aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
  27. aiq/authentication/oauth2/register.py +25 -0
  28. aiq/authentication/register.py +21 -0
  29. aiq/builder/builder.py +64 -2
  30. aiq/builder/component_utils.py +16 -3
  31. aiq/builder/context.py +37 -0
  32. aiq/builder/eval_builder.py +43 -2
  33. aiq/builder/function.py +44 -12
  34. aiq/builder/function_base.py +1 -1
  35. aiq/builder/intermediate_step_manager.py +6 -8
  36. aiq/builder/user_interaction_manager.py +3 -0
  37. aiq/builder/workflow.py +23 -18
  38. aiq/builder/workflow_builder.py +421 -61
  39. aiq/cli/commands/info/list_mcp.py +103 -16
  40. aiq/cli/commands/sizing/__init__.py +14 -0
  41. aiq/cli/commands/sizing/calc.py +294 -0
  42. aiq/cli/commands/sizing/sizing.py +27 -0
  43. aiq/cli/commands/start.py +2 -1
  44. aiq/cli/entrypoint.py +2 -0
  45. aiq/cli/register_workflow.py +80 -0
  46. aiq/cli/type_registry.py +151 -30
  47. aiq/data_models/api_server.py +124 -12
  48. aiq/data_models/authentication.py +231 -0
  49. aiq/data_models/common.py +35 -7
  50. aiq/data_models/component.py +17 -9
  51. aiq/data_models/component_ref.py +33 -0
  52. aiq/data_models/config.py +60 -3
  53. aiq/data_models/dataset_handler.py +2 -1
  54. aiq/data_models/embedder.py +1 -0
  55. aiq/data_models/evaluate.py +23 -0
  56. aiq/data_models/function_dependencies.py +8 -0
  57. aiq/data_models/interactive.py +10 -1
  58. aiq/data_models/intermediate_step.py +38 -5
  59. aiq/data_models/its_strategy.py +30 -0
  60. aiq/data_models/llm.py +1 -0
  61. aiq/data_models/memory.py +1 -0
  62. aiq/data_models/object_store.py +44 -0
  63. aiq/data_models/profiler.py +1 -0
  64. aiq/data_models/retry_mixin.py +35 -0
  65. aiq/data_models/span.py +187 -0
  66. aiq/data_models/telemetry_exporter.py +2 -2
  67. aiq/embedder/nim_embedder.py +2 -1
  68. aiq/embedder/openai_embedder.py +2 -1
  69. aiq/eval/config.py +19 -1
  70. aiq/eval/dataset_handler/dataset_handler.py +87 -2
  71. aiq/eval/evaluate.py +208 -27
  72. aiq/eval/evaluator/base_evaluator.py +73 -0
  73. aiq/eval/evaluator/evaluator_model.py +1 -0
  74. aiq/eval/intermediate_step_adapter.py +11 -5
  75. aiq/eval/rag_evaluator/evaluate.py +55 -15
  76. aiq/eval/rag_evaluator/register.py +6 -1
  77. aiq/eval/remote_workflow.py +7 -2
  78. aiq/eval/runners/__init__.py +14 -0
  79. aiq/eval/runners/config.py +39 -0
  80. aiq/eval/runners/multi_eval_runner.py +54 -0
  81. aiq/eval/trajectory_evaluator/evaluate.py +22 -65
  82. aiq/eval/tunable_rag_evaluator/evaluate.py +150 -168
  83. aiq/eval/tunable_rag_evaluator/register.py +2 -0
  84. aiq/eval/usage_stats.py +41 -0
  85. aiq/eval/utils/output_uploader.py +10 -1
  86. aiq/eval/utils/weave_eval.py +184 -0
  87. aiq/experimental/__init__.py +0 -0
  88. aiq/experimental/decorators/__init__.py +0 -0
  89. aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
  90. aiq/experimental/inference_time_scaling/__init__.py +0 -0
  91. aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
  92. aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
  93. aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
  94. aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
  95. aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
  96. aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
  97. aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
  98. aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
  99. aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
  100. aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
  101. aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
  102. aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
  103. aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
  104. aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
  105. aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
  106. aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
  107. aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
  108. aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
  109. aiq/experimental/inference_time_scaling/register.py +36 -0
  110. aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
  111. aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
  112. aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
  113. aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
  114. aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
  115. aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
  116. aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
  117. aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
  118. aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
  119. aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
  120. aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
  121. aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
  122. aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
  123. aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
  124. aiq/front_ends/console/authentication_flow_handler.py +233 -0
  125. aiq/front_ends/console/console_front_end_plugin.py +11 -2
  126. aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
  127. aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
  128. aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
  129. aiq/front_ends/fastapi/fastapi_front_end_config.py +93 -9
  130. aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
  131. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
  132. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +537 -52
  133. aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
  134. aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
  135. aiq/front_ends/fastapi/job_store.py +47 -25
  136. aiq/front_ends/fastapi/main.py +2 -0
  137. aiq/front_ends/fastapi/message_handler.py +108 -89
  138. aiq/front_ends/fastapi/step_adaptor.py +2 -1
  139. aiq/llm/aws_bedrock_llm.py +57 -0
  140. aiq/llm/nim_llm.py +2 -1
  141. aiq/llm/openai_llm.py +3 -2
  142. aiq/llm/register.py +1 -0
  143. aiq/meta/pypi.md +12 -12
  144. aiq/object_store/__init__.py +20 -0
  145. aiq/object_store/in_memory_object_store.py +74 -0
  146. aiq/object_store/interfaces.py +84 -0
  147. aiq/object_store/models.py +36 -0
  148. aiq/object_store/register.py +20 -0
  149. aiq/observability/__init__.py +14 -0
  150. aiq/observability/exporter/__init__.py +14 -0
  151. aiq/observability/exporter/base_exporter.py +449 -0
  152. aiq/observability/exporter/exporter.py +78 -0
  153. aiq/observability/exporter/file_exporter.py +33 -0
  154. aiq/observability/exporter/processing_exporter.py +269 -0
  155. aiq/observability/exporter/raw_exporter.py +52 -0
  156. aiq/observability/exporter/span_exporter.py +264 -0
  157. aiq/observability/exporter_manager.py +335 -0
  158. aiq/observability/mixin/__init__.py +14 -0
  159. aiq/observability/mixin/batch_config_mixin.py +26 -0
  160. aiq/observability/mixin/collector_config_mixin.py +23 -0
  161. aiq/observability/mixin/file_mixin.py +288 -0
  162. aiq/observability/mixin/file_mode.py +23 -0
  163. aiq/observability/mixin/resource_conflict_mixin.py +134 -0
  164. aiq/observability/mixin/serialize_mixin.py +61 -0
  165. aiq/observability/mixin/type_introspection_mixin.py +183 -0
  166. aiq/observability/processor/__init__.py +14 -0
  167. aiq/observability/processor/batching_processor.py +316 -0
  168. aiq/observability/processor/intermediate_step_serializer.py +28 -0
  169. aiq/observability/processor/processor.py +68 -0
  170. aiq/observability/register.py +36 -39
  171. aiq/observability/utils/__init__.py +14 -0
  172. aiq/observability/utils/dict_utils.py +236 -0
  173. aiq/observability/utils/time_utils.py +31 -0
  174. aiq/profiler/calc/__init__.py +14 -0
  175. aiq/profiler/calc/calc_runner.py +623 -0
  176. aiq/profiler/calc/calculations.py +288 -0
  177. aiq/profiler/calc/data_models.py +176 -0
  178. aiq/profiler/calc/plot.py +345 -0
  179. aiq/profiler/callbacks/langchain_callback_handler.py +22 -10
  180. aiq/profiler/data_models.py +24 -0
  181. aiq/profiler/inference_metrics_model.py +3 -0
  182. aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +8 -0
  183. aiq/profiler/inference_optimization/data_models.py +2 -2
  184. aiq/profiler/inference_optimization/llm_metrics.py +2 -2
  185. aiq/profiler/profile_runner.py +61 -21
  186. aiq/runtime/loader.py +9 -3
  187. aiq/runtime/runner.py +23 -9
  188. aiq/runtime/session.py +25 -7
  189. aiq/runtime/user_metadata.py +2 -3
  190. aiq/tool/chat_completion.py +74 -0
  191. aiq/tool/code_execution/README.md +152 -0
  192. aiq/tool/code_execution/code_sandbox.py +151 -72
  193. aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
  194. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
  195. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
  196. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
  197. aiq/tool/code_execution/register.py +7 -3
  198. aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
  199. aiq/tool/mcp/exceptions.py +142 -0
  200. aiq/tool/mcp/mcp_client.py +41 -6
  201. aiq/tool/mcp/mcp_tool.py +3 -2
  202. aiq/tool/register.py +1 -0
  203. aiq/tool/server_tools.py +6 -3
  204. aiq/utils/exception_handlers/automatic_retries.py +289 -0
  205. aiq/utils/exception_handlers/mcp.py +211 -0
  206. aiq/utils/io/model_processing.py +28 -0
  207. aiq/utils/log_utils.py +37 -0
  208. aiq/utils/string_utils.py +38 -0
  209. aiq/utils/type_converter.py +18 -2
  210. aiq/utils/type_utils.py +87 -0
  211. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/METADATA +53 -21
  212. aiqtoolkit-1.2.0rc2.dist-info/RECORD +436 -0
  213. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/WHEEL +1 -1
  214. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/entry_points.txt +3 -0
  215. aiq/front_ends/fastapi/websocket.py +0 -148
  216. aiq/observability/async_otel_listener.py +0 -429
  217. aiqtoolkit-1.2.0.dev0.dist-info/RECORD +0 -316
  218. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  219. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/licenses/LICENSE.md +0 -0
  220. {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/top_level.txt +0 -0
@@ -15,11 +15,16 @@
15
15
  import abc
16
16
  import json
17
17
  import logging
18
+ import textwrap
19
+ from typing import Any
18
20
  from urllib.parse import urljoin
19
21
 
20
22
  import requests
23
+ import requests.adapters
21
24
  from pydantic import HttpUrl
22
25
 
26
+ from aiq.utils.type_utils import override
27
+
23
28
  logger = logging.getLogger(__file__)
24
29
 
25
30
 
@@ -43,18 +48,18 @@ class Sandbox(abc.ABC):
43
48
  *,
44
49
  uri: HttpUrl,
45
50
  ):
46
- self.url = self._get_execute_url(uri)
51
+ self.url: str = self._get_execute_url(uri)
47
52
  session = requests.Session()
48
53
  adapter = requests.adapters.HTTPAdapter(pool_maxsize=1500, pool_connections=1500, max_retries=3)
49
54
  session.mount('http://', adapter)
50
55
  session.mount('https://', adapter)
51
- self.http_session = session
56
+ self.http_session: requests.Session = session
52
57
 
53
- def _send_request(self, request, timeout):
58
+ def _send_request(self, request: dict[str, Any], timeout_seconds: float) -> dict[str, str]:
54
59
  output = self.http_session.post(
55
60
  url=self.url,
56
61
  data=json.dumps(request),
57
- timeout=timeout,
62
+ timeout=timeout_seconds,
58
63
  headers={"Content-Type": "application/json"},
59
64
  )
60
65
  # retrying 502 errors
@@ -64,104 +69,180 @@ class Sandbox(abc.ABC):
64
69
  return self._parse_request_output(output)
65
70
 
66
71
  @abc.abstractmethod
67
- def _parse_request_output(self, output):
72
+ def _parse_request_output(self, output: requests.Response) -> dict[str, str]:
68
73
  pass
69
74
 
70
75
  @abc.abstractmethod
71
- def _get_execute_url(self, uri):
76
+ def _get_execute_url(self, uri: HttpUrl) -> str:
72
77
  pass
73
78
 
74
79
  @abc.abstractmethod
75
- def _prepare_request(self, generated_code, timeout):
80
+ def _prepare_request(self, generated_code: str, timeout_seconds: float) -> dict[str, Any]:
76
81
  pass
77
82
 
78
83
  async def execute_code(
79
84
  self,
80
85
  generated_code: str,
81
- timeout: float = 10.0,
86
+ timeout_seconds: float = 10.0,
82
87
  language: str = "python",
83
88
  max_output_characters: int = 1000,
84
- ) -> tuple[dict, str]:
89
+ ) -> dict[str, str]:
85
90
 
86
- generated_code = generated_code.lstrip().rstrip().lstrip("`").rstrip("`")
87
- code_to_execute = """
88
- import traceback
89
- import json
90
- import os
91
- import warnings
92
- import contextlib
93
- import io
94
- warnings.filterwarnings('ignore')
95
- os.environ['OPENBLAS_NUM_THREADS'] = '16'
96
- """
97
-
98
- code_to_execute += f"""
99
- \ngenerated_code = {repr(generated_code)}\n
100
- stdout = io.StringIO()
101
- stderr = io.StringIO()
102
-
103
- with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
104
- try:
105
- exec(generated_code)
106
- status = "completed"
107
- except Exception:
108
- status = "error"
109
- stderr.write(traceback.format_exc())
110
- stdout = stdout.getvalue()
111
- stderr = stderr.getvalue()
112
- if len(stdout) > {max_output_characters}:
113
- stdout = stdout[:{max_output_characters}] + "<output cut>"
114
- if len(stderr) > {max_output_characters}:
115
- stderr = stderr[:{max_output_characters}] + "<output cut>"
116
- if stdout:
117
- stdout += "\\n"
118
- if stderr:
119
- stderr += "\\n"
120
- output = {{"process_status": status, "stdout": stdout, "stderr": stderr}}
121
- print(json.dumps(output))
122
- """
123
- request = self._prepare_request(code_to_execute, timeout)
91
+ if language != "python":
92
+ raise ValueError(f"Language {language} not supported")
93
+
94
+ generated_code = generated_code.strip().strip("`")
95
+ code_to_execute = textwrap.dedent("""
96
+ import traceback
97
+ import json
98
+ import os
99
+ import warnings
100
+ import contextlib
101
+ import io
102
+ warnings.filterwarnings('ignore')
103
+ os.environ['OPENBLAS_NUM_THREADS'] = '16'
104
+ """).strip()
105
+
106
+ # Use json.dumps to properly escape the generated_code instead of repr()
107
+ escaped_code = json.dumps(generated_code)
108
+ code_to_execute += textwrap.dedent(f"""
109
+
110
+ generated_code = {escaped_code}
111
+
112
+ stdout = io.StringIO()
113
+ stderr = io.StringIO()
114
+
115
+ with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
116
+ try:
117
+ exec(generated_code)
118
+ status = "completed"
119
+ except Exception:
120
+ status = "error"
121
+ stderr.write(traceback.format_exc())
122
+ stdout = stdout.getvalue()
123
+ stderr = stderr.getvalue()
124
+ if len(stdout) > {max_output_characters}:
125
+ stdout = stdout[:{max_output_characters}] + "<output cut>"
126
+ if len(stderr) > {max_output_characters}:
127
+ stderr = stderr[:{max_output_characters}] + "<output cut>"
128
+ if stdout:
129
+ stdout += "\\n"
130
+ if stderr:
131
+ stderr += "\\n"
132
+ output = {{"process_status": status, "stdout": stdout, "stderr": stderr}}
133
+ print(json.dumps(output))
134
+ """).strip()
135
+ request = self._prepare_request(code_to_execute, timeout_seconds)
124
136
  try:
125
- output = self._send_request(request, timeout)
137
+ return self._send_request(request, timeout_seconds)
126
138
  except requests.exceptions.Timeout:
127
- output = {"process_status": "timeout", "stdout": "", "stderr": "Timed out\n"}
128
- return output
139
+ return {"process_status": "timeout", "stdout": "", "stderr": "Timed out\n"}
129
140
 
130
141
 
131
142
  class LocalSandbox(Sandbox):
132
143
  """Locally hosted sandbox."""
133
144
 
134
- def _get_execute_url(self, uri):
145
+ def __init__(self, *, uri: HttpUrl):
146
+ super().__init__(uri=uri)
147
+
148
+ @override
149
+ def _get_execute_url(self, uri: HttpUrl) -> str:
135
150
  return urljoin(str(uri), "execute")
136
151
 
137
- def _parse_request_output(self, output):
152
+ @override
153
+ def _parse_request_output(self, output: requests.Response) -> dict[str, str]:
138
154
  try:
139
- return output.json()
155
+ output_json = output.json()
156
+ assert isinstance(output_json, dict)
157
+ return output_json
140
158
  except json.JSONDecodeError as e:
141
- logger.exception("Error parsing output: %s. %s", output.text, e)
142
- return {'process_status': 'error', 'stdout': '', 'stderr': 'Unknown error'}
159
+ logger.exception("Error parsing output: %s. %s", output.text, e)
160
+ return {'process_status': 'error', 'stdout': '', 'stderr': f'Unknown error: {e} \"{output.text}\"'}
143
161
 
144
- def _prepare_request(self, generated_code, timeout, language='python', **kwargs):
145
- return {
162
+ @override
163
+ def _prepare_request(self,
164
+ generated_code: str,
165
+ timeout_seconds: float,
166
+ language: str = "python",
167
+ **kwargs) -> dict[str, Any]:
168
+ request = {
146
169
  "generated_code": generated_code,
147
- "timeout": timeout,
170
+ "timeout": timeout_seconds,
148
171
  "language": language,
149
172
  }
173
+ return request
174
+
175
+ @override
176
+ async def execute_code(
177
+ self,
178
+ generated_code: str,
179
+ timeout_seconds: float = 10.0,
180
+ language: str = "python",
181
+ max_output_characters: int = 1000,
182
+ ) -> dict[str, str]:
183
+ """Override execute_code to bypass the wrapper logic and send user code directly to our server."""
184
+
185
+ logger.debug("Raw input generated_code: %s", generated_code)
186
+
187
+ # The input appears to be a string representation of a dictionary
188
+ # We need to parse it and extract the actual code
189
+ try:
190
+ # Try to evaluate the string as a Python literal (dictionary)
191
+ import ast
192
+ parsed_dict = ast.literal_eval(generated_code)
193
+ if isinstance(parsed_dict, dict) and 'generated_code' in parsed_dict:
194
+ actual_code = parsed_dict['generated_code']
195
+ assert isinstance(actual_code, str)
196
+ logger.debug("Extracted code from dict: %s...", actual_code[:100])
197
+ else:
198
+ # If it's not a dict or doesn't have the expected key, use as-is
199
+ actual_code = generated_code
200
+ logger.debug("Using code as-is: %s...", actual_code[:100])
201
+ except (ValueError, SyntaxError):
202
+ # If parsing fails, use the input as-is
203
+ actual_code = generated_code
204
+ logger.debug("Failed to parse, using as-is: %s...", actual_code[:100])
205
+
206
+ # Clean the actual code more carefully to avoid removing backticks that are part of Python code
207
+ # remove all leading/trailing whitespace -- strip()
208
+ # remove all leading/trailing backticks -- strip("`")
209
+ # may potentially start with python, so just trim from the front.
210
+ POTENTIAL_PREFIXES = ["python"]
211
+ actual_code = actual_code.strip().strip("`")
212
+ for prefix in POTENTIAL_PREFIXES:
213
+ if actual_code.startswith(prefix):
214
+ actual_code = actual_code[len(prefix):]
215
+ break
216
+
217
+ # Send the user's code directly to our server without any wrapper logic
218
+ # Our server already handles stdout/stderr capture and error handling
219
+ request = self._prepare_request(actual_code, timeout_seconds, language)
220
+ try:
221
+ return self._send_request(request, timeout_seconds)
222
+ except requests.exceptions.Timeout:
223
+ return {"process_status": "timeout", "stdout": "", "stderr": "Timed out\n"}
150
224
 
151
225
 
152
226
  class PistonSandbox(Sandbox):
153
227
  """Piston sandbox (https://github.com/engineer-man/piston)"""
154
228
 
155
- def _get_execute_url(self, uri):
229
+ @override
230
+ def _get_execute_url(self, uri: HttpUrl) -> str:
156
231
  return urljoin(str(uri), "execute")
157
232
 
158
- def _parse_request_output(self, output):
159
- output = output.json()
160
- if output['run']['signal'] == "SIGKILL":
161
- return {'result': None, 'error_message': 'Unknown error: SIGKILL'}
162
- return json.loads(output['run']['output'])
233
+ @override
234
+ def _parse_request_output(self, output: requests.Response) -> dict[str, str]:
235
+ output_json = output.json()
236
+ assert isinstance(output_json, dict)
237
+ assert 'run' in output_json
238
+ run_json = output_json['run']
239
+ assert isinstance(run_json, dict)
240
+ if run_json["code"] != 0:
241
+ return {'process_status': "error", 'stdout': run_json['stdout'], 'stderr': run_json['stderr']}
242
+ return {'process_status': "completed", 'stdout': run_json['stdout'], 'stderr': run_json['stderr']}
163
243
 
164
- def _prepare_request(self, generated_code: str, timeout, **kwargs):
244
+ @override
245
+ def _prepare_request(self, generated_code: str, timeout_seconds: float, **kwargs) -> dict[str, Any]:
165
246
  return {
166
247
  "language": "py",
167
248
  "version": "3.10.0",
@@ -170,19 +251,17 @@ class PistonSandbox(Sandbox):
170
251
  }],
171
252
  "stdin": "",
172
253
  "args": [],
173
- "run_timeout": timeout * 1000.0, # milliseconds
254
+ "run_timeout": timeout_seconds * 1000.0, # milliseconds
174
255
  "compile_memory_limit": -1,
175
256
  "run_memory_limit": -1,
176
257
  }
177
258
 
178
259
 
179
- sandboxes = {
180
- 'local': LocalSandbox,
181
- 'piston': PistonSandbox,
182
- }
183
-
184
-
185
260
  def get_sandbox(sandbox_type: str = "local", **kwargs):
186
261
  """A helper function to make it easier to set sandbox through cmd."""
262
+ sandboxes = {
263
+ 'local': LocalSandbox,
264
+ 'piston': PistonSandbox,
265
+ }
187
266
  sandbox_class = sandboxes[sandbox_type.lower()]
188
267
  return sandbox_class(**kwargs)
@@ -0,0 +1 @@
1
+ persistence_test.*
@@ -12,16 +12,59 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from __future__ import annotations
16
+
17
+ import contextlib
15
18
  import logging
16
19
  import multiprocessing
20
+ import os
17
21
  import resource
18
- import sys
22
+ from enum import Enum
19
23
  from io import StringIO
20
24
 
21
25
  from flask import Flask
26
+ from flask import Request
27
+ from flask import Response
22
28
  from flask import request
29
+ from pydantic import BaseModel
30
+ from pydantic import Field
23
31
 
24
32
  app = Flask(__name__)
33
+ logger = logging.getLogger(__name__)
34
+ logger.setLevel(logging.WARNING)
35
+
36
+
37
+ class CodeExecutionStatus(str, Enum):
38
+ """
39
+ Status of code execution.
40
+ """
41
+ COMPLETED = "completed"
42
+ ERROR = "error"
43
+ TIMEOUT = "timeout"
44
+
45
+
46
+ class CodeExecutionResult(BaseModel):
47
+ """
48
+ Result of code execution.
49
+ """
50
+ process_status: CodeExecutionStatus = Field(default=CodeExecutionStatus.COMPLETED,
51
+ description="Status of the process")
52
+ stdout: str = Field(description="Standard output of the process")
53
+ stderr: str = Field(description="Standard error of the process")
54
+
55
+
56
+ class CodeExecutionResponse(Response):
57
+ """
58
+ Response class that returns a JSON response with the given status code and result.
59
+ """
60
+
61
+ def __init__(self, status_code: int, result: CodeExecutionResult):
62
+ super().__init__(status=status_code, mimetype="application/json", response=result.model_dump_json())
63
+
64
+ @classmethod
65
+ def with_error(cls, status_code: int, error_message: str) -> 'CodeExecutionResponse':
66
+ return cls(status_code,
67
+ CodeExecutionResult(process_status=CodeExecutionStatus.ERROR, stdout="", stderr=error_message))
25
68
 
26
69
 
27
70
  @app.after_request
@@ -34,50 +77,122 @@ def add_hsts_header(response):
34
77
  return response
35
78
 
36
79
 
37
- def execute_python(generated_code, timeout):
80
+ def execute_python(generated_code: str, timeout: float) -> CodeExecutionResult:
81
+ """
82
+ Execute Python code in a subprocess.
83
+
84
+ Args:
85
+ generated_code: The code to execute
86
+ timeout: The timeout for the execution
87
+
88
+ Returns:
89
+ CodeExecutionResult object containing the execution result
90
+ """
91
+
38
92
  # running in a separate process to ensure any kind of crashes are properly handled
39
93
  queue = multiprocessing.Queue()
40
94
  process = multiprocessing.Process(target=execute_code_subprocess, args=(generated_code, queue))
95
+
41
96
  process.start()
97
+ # wait until the process finishes or the timeout expires
42
98
  process.join(timeout=timeout)
43
-
44
- if process.is_alive(): # didn't finish successfully
99
+ if process.exitcode is None:
45
100
  process.kill()
46
- return {"process_status": "timeout", "stdout": "", "stderr": "Timed out\n"}
101
+ return CodeExecutionResult(process_status=CodeExecutionStatus.TIMEOUT, stdout="", stderr="Timed out\n")
47
102
 
48
103
  return queue.get()
49
104
 
50
105
 
51
106
  # need to memory-limit to avoid common errors of allocating too much
52
107
  # but this has to be done in a subprocess to not crush server itself
53
- def execute_code_subprocess(generated_code, queue):
54
- limit = 1024 * 1024 * 1024 * 10 # 10gb - somehow with a smaller limit the server dies when numpy is used
55
- resource.setrlimit(resource.RLIMIT_AS, (limit, limit))
56
- resource.setrlimit(resource.RLIMIT_DATA, (limit, limit))
108
+ def execute_code_subprocess(generated_code: str, queue):
109
+ """
110
+ Execute code in a subprocess.
111
+
112
+ Args:
113
+ generated_code: The code to execute
114
+ queue: The queue to put the result in
115
+ """
116
+
117
+ logger.debug("execute_code_subprocess started, PID: %s", os.getpid())
57
118
 
58
- # this can be overriden inside generated code, so it's not a guaranteed protection
59
- sys.stdout = StringIO()
60
119
  try:
61
- exec(generated_code, {}) # pylint: disable=W0122
62
- queue.put(sys.stdout.getvalue())
120
+ limit = 1024 * 1024 * 1024 * 10 # 10gb - somehow with a smaller limit the server dies when numpy is used
121
+ resource.setrlimit(resource.RLIMIT_AS, (limit, limit))
122
+ resource.setrlimit(resource.RLIMIT_DATA, (limit, limit))
63
123
  except Exception as e:
64
- print(f"Error: {str(e)}")
65
- queue.put({"process_status": "error", "stdout": "", "stderr": str(e) + "\n"})
124
+ logger.error("Failed to set resource limits, PID: %s, error: %s", os.getpid(), e)
125
+
126
+ stdout_capture = StringIO()
127
+ stderr_capture = StringIO()
128
+ try:
129
+ with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
130
+ exec(generated_code, {}) # pylint: disable=W0122
131
+ logger.debug("execute_code_subprocess finished, PID: %s", os.getpid())
132
+ queue.put(CodeExecutionResult(stdout=stdout_capture.getvalue(), stderr=stderr_capture.getvalue()))
133
+ except Exception as e:
134
+ import traceback
135
+ with contextlib.redirect_stderr(stderr_capture):
136
+ traceback.print_exc()
137
+ logger.debug("execute_code_subprocess failed, PID: %s, error: %s", os.getpid(), e)
138
+ queue.put(
139
+ CodeExecutionResult(process_status=CodeExecutionStatus.ERROR,
140
+ stdout=stdout_capture.getvalue(),
141
+ stderr=stderr_capture.getvalue()))
142
+
143
+
144
+ def do_execute(request: Request) -> CodeExecutionResponse:
145
+ """
146
+ Main function to handle execution requests.
147
+
148
+ Args:
149
+ request: Request object containing the execution request
150
+
151
+ Returns:
152
+ CodeExecutionResponse object containing the execution result
153
+ """
154
+ try:
155
+ # Check if request has JSON data
156
+ if not request.is_json:
157
+ return CodeExecutionResponse.with_error(400, "Request must be JSON")
158
+
159
+ # Get JSON data safely
160
+ json_data = request.get_json(silent=True)
161
+
162
+ if json_data is None:
163
+ return CodeExecutionResponse.with_error(400, "Invalid JSON data")
164
+
165
+ # Check for required fields
166
+ if 'generated_code' not in json_data:
167
+ return CodeExecutionResponse.with_error(400, "Missing required field: generated_code")
168
+
169
+ if 'timeout' not in json_data:
170
+ return CodeExecutionResponse.with_error(400, "Missing required field: timeout")
171
+
172
+ if 'language' not in json_data:
173
+ return CodeExecutionResponse.with_error(400, "Missing required field: language")
174
+
175
+ generated_code: str | None = json_data.get('generated_code', None)
176
+ assert generated_code is not None
177
+ timeout: float | None = json_data.get('timeout', None)
178
+ assert timeout is not None
179
+ language: str | None = json_data.get('language', None)
180
+ assert language is not None
181
+
182
+ if language != 'python':
183
+ return CodeExecutionResponse.with_error(400, "Only python execution is supported")
184
+
185
+ return CodeExecutionResponse(200, execute_python(generated_code, timeout))
186
+
187
+ except Exception as e:
188
+ return CodeExecutionResponse.with_error(500, f"Server error: {str(e)}")
66
189
 
67
190
 
68
191
  # Main Flask endpoint to handle execution requests
69
192
  @app.route("/execute", methods=["POST"])
70
193
  def execute():
71
- generated_code = request.json['generated_code']
72
- timeout = request.json['timeout']
73
- language = request.json.get('language', 'python')
74
-
75
- if language == 'python':
76
- return execute_python(generated_code, timeout)
77
- return {"process_status": "error", "stdout": "", "stderr": "Only python execution is supported"}
194
+ return do_execute(request)
78
195
 
79
196
 
80
197
  if __name__ == '__main__':
81
- log = logging.getLogger('werkzeug')
82
- log.setLevel(logging.WARNING)
83
198
  app.run(port=6000)
@@ -1,4 +1,6 @@
1
1
  numpy
2
2
  pandas
3
3
  scipy
4
- ipython
4
+ ipython
5
+ plotly
6
+ pydantic
@@ -14,12 +14,37 @@
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
16
 
17
+ # Usage: ./start_local_sandbox.sh [SANDBOX_NAME] [OUTPUT_DATA_PATH]
17
18
  # NOTE: needs to run from the root of the repo!
18
19
 
20
+ DOCKER_COMMAND=${DOCKER_COMMAND:-"docker"}
19
21
  SANDBOX_NAME=${1:-'local-sandbox'}
20
22
  NUM_THREADS=10
21
23
 
24
+ # Get the output_data directory path for mounting
25
+ # Priority: command line argument > environment variable > default path (current directory)
26
+ OUTPUT_DATA_PATH=${2:-${OUTPUT_DATA_PATH:-$(pwd)}}
22
27
 
23
- docker build --tag=${SANDBOX_NAME} --build-arg="UWSGI_PROCESSES=$((${NUM_THREADS} * 10))" --build-arg="UWSGI_CHEAPER=${NUM_THREADS}" -f Dockerfile.sandbox .
28
+ echo "Starting sandbox with container name: ${SANDBOX_NAME}"
29
+ echo "Mounting output_data directory: ${OUTPUT_DATA_PATH}"
24
30
 
25
- docker run --network=host --rm --name=local-sandbox ${SANDBOX_NAME}
31
+ # Verify the path exists before mounting, create if it doesn't
32
+ if [ ! -d "${OUTPUT_DATA_PATH}" ]; then
33
+ echo "Output data directory does not exist, creating: ${OUTPUT_DATA_PATH}"
34
+ mkdir -p "${OUTPUT_DATA_PATH}"
35
+ fi
36
+
37
+ # Check if the Docker image already exists
38
+ if ! ${DOCKER_COMMAND} images ${SANDBOX_NAME} | grep -q "${SANDBOX_NAME}"; then
39
+ echo "Docker image not found locally. Building ${SANDBOX_NAME}..."
40
+ ${DOCKER_COMMAND} build --tag=${SANDBOX_NAME} --build-arg="UWSGI_PROCESSES=$((${NUM_THREADS} * 10))" --build-arg="UWSGI_CHEAPER=${NUM_THREADS}" -f Dockerfile.sandbox .
41
+ else
42
+ echo "Using existing Docker image: ${SANDBOX_NAME}"
43
+ fi
44
+
45
+ # Mount the output_data directory directly so files created in container appear in the local directory
46
+ ${DOCKER_COMMAND} run --rm --name=local-sandbox \
47
+ --network=host \
48
+ -v "${OUTPUT_DATA_PATH}:/workspace" \
49
+ -w /workspace \
50
+ ${SANDBOX_NAME}
@@ -46,7 +46,11 @@ async def code_execution_tool(config: CodeExecutionToolConfig, builder: Builder)
46
46
  class CodeExecutionInputSchema(BaseModel):
47
47
  generated_code: str = Field(description="String containing the code to be executed")
48
48
 
49
- sandbox = get_sandbox(sandbox_type=config.sandbox_type, uri=config.uri)
49
+ # Create sandbox without working_directory
50
+ sandbox_kwargs = {"uri": config.uri}
51
+
52
+ sandbox = get_sandbox(sandbox_type=config.sandbox_type, **sandbox_kwargs)
53
+ logger.info(f"[DEBUG] Created sandbox of type: {config.sandbox_type}")
50
54
 
51
55
  async def _execute_code(generated_code: str) -> dict:
52
56
  logger.info("Executing code in the sandbox at %s", config.uri)
@@ -54,12 +58,12 @@ async def code_execution_tool(config: CodeExecutionToolConfig, builder: Builder)
54
58
  output = await sandbox.execute_code(
55
59
  generated_code=generated_code,
56
60
  language="python",
57
- timeout=config.timeout,
61
+ timeout_seconds=config.timeout,
58
62
  max_output_characters=config.max_output_characters,
59
63
  )
60
64
  except Exception as e:
61
65
  logger.exception("Error when executing code in the sandbox, %s", e)
62
- return {"process_status": "error", "stdout": "", "stderr": e}
66
+ return {"process_status": "error", "stdout": "", "stderr": str(e)}
63
67
  return output
64
68
 
65
69
  yield FunctionInfo.from_fn(