PyPI - aiqtoolkit - Versions diffs - 1.2.0.dev0__py3-none-any.whl → 1.2.0rc1__py3-none-any.whl - Mend

aiqtoolkit 1.2.0.dev0py3-none-any.whl → 1.2.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (220) hide show

aiq/agent/base.py +170 -8
aiq/agent/dual_node.py +1 -1
aiq/agent/react_agent/agent.py +146 -112
aiq/agent/react_agent/prompt.py +1 -6
aiq/agent/react_agent/register.py +36 -35
aiq/agent/rewoo_agent/agent.py +36 -35
aiq/agent/rewoo_agent/register.py +2 -2
aiq/agent/tool_calling_agent/agent.py +3 -7
aiq/agent/tool_calling_agent/register.py +1 -1
aiq/authentication/__init__.py +14 -0
aiq/authentication/api_key/__init__.py +14 -0
aiq/authentication/api_key/api_key_auth_provider.py +92 -0
aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
aiq/authentication/api_key/register.py +26 -0
aiq/authentication/exceptions/__init__.py +14 -0
aiq/authentication/exceptions/api_key_exceptions.py +38 -0
aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
aiq/authentication/exceptions/call_back_exceptions.py +38 -0
aiq/authentication/exceptions/request_exceptions.py +54 -0
aiq/authentication/http_basic_auth/__init__.py +0 -0
aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
aiq/authentication/http_basic_auth/register.py +30 -0
aiq/authentication/interfaces.py +93 -0
aiq/authentication/oauth2/__init__.py +14 -0
aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
aiq/authentication/oauth2/register.py +25 -0
aiq/authentication/register.py +21 -0
aiq/builder/builder.py +64 -2
aiq/builder/component_utils.py +16 -3
aiq/builder/context.py +37 -0
aiq/builder/eval_builder.py +43 -2
aiq/builder/function.py +44 -12
aiq/builder/function_base.py +1 -1
aiq/builder/intermediate_step_manager.py +6 -8
aiq/builder/user_interaction_manager.py +3 -0
aiq/builder/workflow.py +23 -18
aiq/builder/workflow_builder.py +421 -61
aiq/cli/commands/info/list_mcp.py +103 -16
aiq/cli/commands/sizing/__init__.py +14 -0
aiq/cli/commands/sizing/calc.py +294 -0
aiq/cli/commands/sizing/sizing.py +27 -0
aiq/cli/commands/start.py +2 -1
aiq/cli/entrypoint.py +2 -0
aiq/cli/register_workflow.py +80 -0
aiq/cli/type_registry.py +151 -30
aiq/data_models/api_server.py +124 -12
aiq/data_models/authentication.py +231 -0
aiq/data_models/common.py +35 -7
aiq/data_models/component.py +17 -9
aiq/data_models/component_ref.py +33 -0
aiq/data_models/config.py +60 -3
aiq/data_models/dataset_handler.py +2 -1
aiq/data_models/embedder.py +1 -0
aiq/data_models/evaluate.py +23 -0
aiq/data_models/function_dependencies.py +8 -0
aiq/data_models/interactive.py +10 -1
aiq/data_models/intermediate_step.py +38 -5
aiq/data_models/its_strategy.py +30 -0
aiq/data_models/llm.py +1 -0
aiq/data_models/memory.py +1 -0
aiq/data_models/object_store.py +44 -0
aiq/data_models/profiler.py +1 -0
aiq/data_models/retry_mixin.py +35 -0
aiq/data_models/span.py +187 -0
aiq/data_models/telemetry_exporter.py +2 -2
aiq/embedder/nim_embedder.py +2 -1
aiq/embedder/openai_embedder.py +2 -1
aiq/eval/config.py +19 -1
aiq/eval/dataset_handler/dataset_handler.py +87 -2
aiq/eval/evaluate.py +208 -27
aiq/eval/evaluator/base_evaluator.py +73 -0
aiq/eval/evaluator/evaluator_model.py +1 -0
aiq/eval/intermediate_step_adapter.py +11 -5
aiq/eval/rag_evaluator/evaluate.py +55 -15
aiq/eval/rag_evaluator/register.py +6 -1
aiq/eval/remote_workflow.py +7 -2
aiq/eval/runners/__init__.py +14 -0
aiq/eval/runners/config.py +39 -0
aiq/eval/runners/multi_eval_runner.py +54 -0
aiq/eval/trajectory_evaluator/evaluate.py +22 -65
aiq/eval/tunable_rag_evaluator/evaluate.py +150 -168
aiq/eval/tunable_rag_evaluator/register.py +2 -0
aiq/eval/usage_stats.py +41 -0
aiq/eval/utils/output_uploader.py +10 -1
aiq/eval/utils/weave_eval.py +184 -0
aiq/experimental/__init__.py +0 -0
aiq/experimental/decorators/__init__.py +0 -0
aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
aiq/experimental/inference_time_scaling/__init__.py +0 -0
aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
aiq/experimental/inference_time_scaling/register.py +36 -0
aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
aiq/front_ends/console/authentication_flow_handler.py +233 -0
aiq/front_ends/console/console_front_end_plugin.py +11 -2
aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
aiq/front_ends/fastapi/fastapi_front_end_config.py +93 -9
aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +537 -52
aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
aiq/front_ends/fastapi/job_store.py +47 -25
aiq/front_ends/fastapi/main.py +2 -0
aiq/front_ends/fastapi/message_handler.py +108 -89
aiq/front_ends/fastapi/step_adaptor.py +2 -1
aiq/llm/aws_bedrock_llm.py +57 -0
aiq/llm/nim_llm.py +2 -1
aiq/llm/openai_llm.py +3 -2
aiq/llm/register.py +1 -0
aiq/meta/pypi.md +12 -12
aiq/object_store/__init__.py +20 -0
aiq/object_store/in_memory_object_store.py +74 -0
aiq/object_store/interfaces.py +84 -0
aiq/object_store/models.py +36 -0
aiq/object_store/register.py +20 -0
aiq/observability/__init__.py +14 -0
aiq/observability/exporter/__init__.py +14 -0
aiq/observability/exporter/base_exporter.py +449 -0
aiq/observability/exporter/exporter.py +78 -0
aiq/observability/exporter/file_exporter.py +33 -0
aiq/observability/exporter/processing_exporter.py +269 -0
aiq/observability/exporter/raw_exporter.py +52 -0
aiq/observability/exporter/span_exporter.py +264 -0
aiq/observability/exporter_manager.py +335 -0
aiq/observability/mixin/__init__.py +14 -0
aiq/observability/mixin/batch_config_mixin.py +26 -0
aiq/observability/mixin/collector_config_mixin.py +23 -0
aiq/observability/mixin/file_mixin.py +288 -0
aiq/observability/mixin/file_mode.py +23 -0
aiq/observability/mixin/resource_conflict_mixin.py +134 -0
aiq/observability/mixin/serialize_mixin.py +61 -0
aiq/observability/mixin/type_introspection_mixin.py +183 -0
aiq/observability/processor/__init__.py +14 -0
aiq/observability/processor/batching_processor.py +316 -0
aiq/observability/processor/intermediate_step_serializer.py +28 -0
aiq/observability/processor/processor.py +68 -0
aiq/observability/register.py +36 -39
aiq/observability/utils/__init__.py +14 -0
aiq/observability/utils/dict_utils.py +236 -0
aiq/observability/utils/time_utils.py +31 -0
aiq/profiler/calc/__init__.py +14 -0
aiq/profiler/calc/calc_runner.py +623 -0
aiq/profiler/calc/calculations.py +288 -0
aiq/profiler/calc/data_models.py +176 -0
aiq/profiler/calc/plot.py +345 -0
aiq/profiler/callbacks/langchain_callback_handler.py +22 -10
aiq/profiler/data_models.py +24 -0
aiq/profiler/inference_metrics_model.py +3 -0
aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +8 -0
aiq/profiler/inference_optimization/data_models.py +2 -2
aiq/profiler/inference_optimization/llm_metrics.py +2 -2
aiq/profiler/profile_runner.py +61 -21
aiq/runtime/loader.py +9 -3
aiq/runtime/runner.py +23 -9
aiq/runtime/session.py +25 -7
aiq/runtime/user_metadata.py +2 -3
aiq/tool/chat_completion.py +74 -0
aiq/tool/code_execution/README.md +152 -0
aiq/tool/code_execution/code_sandbox.py +151 -72
aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
aiq/tool/code_execution/register.py +7 -3
aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
aiq/tool/mcp/exceptions.py +142 -0
aiq/tool/mcp/mcp_client.py +41 -6
aiq/tool/mcp/mcp_tool.py +3 -2
aiq/tool/register.py +1 -0
aiq/tool/server_tools.py +6 -3
aiq/utils/exception_handlers/automatic_retries.py +289 -0
aiq/utils/exception_handlers/mcp.py +211 -0
aiq/utils/io/model_processing.py +28 -0
aiq/utils/log_utils.py +37 -0
aiq/utils/string_utils.py +38 -0
aiq/utils/type_converter.py +18 -2
aiq/utils/type_utils.py +87 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/METADATA +53 -21
aiqtoolkit-1.2.0rc1.dist-info/RECORD +436 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/WHEEL +1 -1
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/entry_points.txt +3 -0
aiq/front_ends/fastapi/websocket.py +0 -148
aiq/observability/async_otel_listener.py +0 -429
aiqtoolkit-1.2.0.dev0.dist-info/RECORD +0 -316
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE.md +0 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/top_level.txt +0 -0

aiq/tool/code_execution/code_sandbox.py CHANGED Viewed

@@ -15,11 +15,16 @@
 import abc
 import json
 import logging
+import textwrap
+from typing import Any
 from urllib.parse import urljoin
 import requests
+import requests.adapters
 from pydantic import HttpUrl
+from aiq.utils.type_utils import override
 logger = logging.getLogger(__file__)
@@ -43,18 +48,18 @@ class Sandbox(abc.ABC):
         *,
         uri: HttpUrl,
     ):
-        self.url = self._get_execute_url(uri)
+        self.url: str = self._get_execute_url(uri)
         session = requests.Session()
         adapter = requests.adapters.HTTPAdapter(pool_maxsize=1500, pool_connections=1500, max_retries=3)
         session.mount('http://', adapter)
         session.mount('https://', adapter)
-        self.http_session = session
+        self.http_session: requests.Session = session
-    def _send_request(self, request, timeout):
+    def _send_request(self, request: dict[str, Any], timeout_seconds: float) -> dict[str, str]:
         output = self.http_session.post(
             url=self.url,
             data=json.dumps(request),
-            timeout=timeout,
+            timeout=timeout_seconds,
             headers={"Content-Type": "application/json"},
         )
         # retrying 502 errors
@@ -64,104 +69,180 @@ class Sandbox(abc.ABC):
         return self._parse_request_output(output)
     @abc.abstractmethod
-    def _parse_request_output(self, output):
+    def _parse_request_output(self, output: requests.Response) -> dict[str, str]:
         pass
     @abc.abstractmethod
-    def _get_execute_url(self, uri):
+    def _get_execute_url(self, uri: HttpUrl) -> str:
         pass
     @abc.abstractmethod
-    def _prepare_request(self, generated_code, timeout):
+    def _prepare_request(self, generated_code: str, timeout_seconds: float) -> dict[str, Any]:
         pass
     async def execute_code(
         self,
         generated_code: str,
-        timeout: float = 10.0,
+        timeout_seconds: float = 10.0,
         language: str = "python",
         max_output_characters: int = 1000,
-    ) -> tuple[dict, str]:
+    ) -> dict[str, str]:
-        generated_code = generated_code.lstrip().rstrip().lstrip("`").rstrip("`")
-        code_to_execute = """
-import traceback
-import json
-import os
-import warnings
-import contextlib
-import io
-warnings.filterwarnings('ignore')
-os.environ['OPENBLAS_NUM_THREADS'] = '16'
-"""
-        code_to_execute += f"""
-\ngenerated_code = {repr(generated_code)}\n
-stdout = io.StringIO()
-stderr = io.StringIO()
-with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
-    try:
-        exec(generated_code)
-        status = "completed"
-    except Exception:
-        status = "error"
-        stderr.write(traceback.format_exc())
-stdout = stdout.getvalue()
-stderr = stderr.getvalue()
-if len(stdout) > {max_output_characters}:
-    stdout = stdout[:{max_output_characters}] + "<output cut>"
-if len(stderr) > {max_output_characters}:
-    stderr = stderr[:{max_output_characters}] + "<output cut>"
-if stdout:
-    stdout += "\\n"
-if stderr:
-    stderr += "\\n"
-output = {{"process_status": status, "stdout": stdout, "stderr": stderr}}
-print(json.dumps(output))
-"""
-        request = self._prepare_request(code_to_execute, timeout)
+        if language != "python":
+            raise ValueError(f"Language {language} not supported")
+        generated_code = generated_code.strip().strip("`")
+        code_to_execute = textwrap.dedent("""
+            import traceback
+            import json
+            import os
+            import warnings
+            import contextlib
+            import io
+            warnings.filterwarnings('ignore')
+            os.environ['OPENBLAS_NUM_THREADS'] = '16'
+        """).strip()
+        # Use json.dumps to properly escape the generated_code instead of repr()
+        escaped_code = json.dumps(generated_code)
+        code_to_execute += textwrap.dedent(f"""
+            generated_code = {escaped_code}
+            stdout = io.StringIO()
+            stderr = io.StringIO()
+            with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
+                try:
+                    exec(generated_code)
+                    status = "completed"
+                except Exception:
+                    status = "error"
+                    stderr.write(traceback.format_exc())
+            stdout = stdout.getvalue()
+            stderr = stderr.getvalue()
+            if len(stdout) > {max_output_characters}:
+                stdout = stdout[:{max_output_characters}] + "<output cut>"
+            if len(stderr) > {max_output_characters}:
+                stderr = stderr[:{max_output_characters}] + "<output cut>"
+            if stdout:
+                stdout += "\\n"
+            if stderr:
+                stderr += "\\n"
+            output = {{"process_status": status, "stdout": stdout, "stderr": stderr}}
+            print(json.dumps(output))
+        """).strip()
+        request = self._prepare_request(code_to_execute, timeout_seconds)
         try:
-            output = self._send_request(request, timeout)
+            return self._send_request(request, timeout_seconds)
         except requests.exceptions.Timeout:
-            output = {"process_status": "timeout", "stdout": "", "stderr": "Timed out\n"}
-        return output
+            return {"process_status": "timeout", "stdout": "", "stderr": "Timed out\n"}
 class LocalSandbox(Sandbox):
     """Locally hosted sandbox."""
-    def _get_execute_url(self, uri):
+    def __init__(self, *, uri: HttpUrl):
+        super().__init__(uri=uri)
+    @override
+    def _get_execute_url(self, uri: HttpUrl) -> str:
         return urljoin(str(uri), "execute")
-    def _parse_request_output(self, output):
+    @override
+    def _parse_request_output(self, output: requests.Response) -> dict[str, str]:
         try:
-            return output.json()
+            output_json = output.json()
+            assert isinstance(output_json, dict)
+            return output_json
         except json.JSONDecodeError as e:
-            logger.exception("Error  parsing output: %s. %s", output.text, e)
-            return {'process_status': 'error', 'stdout': '', 'stderr': 'Unknown error'}
+            logger.exception("Error parsing output: %s. %s", output.text, e)
+            return {'process_status': 'error', 'stdout': '', 'stderr': f'Unknown error: {e} \"{output.text}\"'}
-    def _prepare_request(self, generated_code, timeout, language='python', **kwargs):
-        return {
+    @override
+    def _prepare_request(self,
+                         generated_code: str,
+                         timeout_seconds: float,
+                         language: str = "python",
+                         **kwargs) -> dict[str, Any]:
+        request = {
             "generated_code": generated_code,
-            "timeout": timeout,
+            "timeout": timeout_seconds,
             "language": language,
         }
+        return request
+    @override
+    async def execute_code(
+        self,
+        generated_code: str,
+        timeout_seconds: float = 10.0,
+        language: str = "python",
+        max_output_characters: int = 1000,
+    ) -> dict[str, str]:
+        """Override execute_code to bypass the wrapper logic and send user code directly to our server."""
+        logger.debug("Raw input generated_code: %s", generated_code)
+        # The input appears to be a string representation of a dictionary
+        # We need to parse it and extract the actual code
+        try:
+            # Try to evaluate the string as a Python literal (dictionary)
+            import ast
+            parsed_dict = ast.literal_eval(generated_code)
+            if isinstance(parsed_dict, dict) and 'generated_code' in parsed_dict:
+                actual_code = parsed_dict['generated_code']
+                assert isinstance(actual_code, str)
+                logger.debug("Extracted code from dict: %s...", actual_code[:100])
+            else:
+                # If it's not a dict or doesn't have the expected key, use as-is
+                actual_code = generated_code
+                logger.debug("Using code as-is: %s...", actual_code[:100])
+        except (ValueError, SyntaxError):
+            # If parsing fails, use the input as-is
+            actual_code = generated_code
+            logger.debug("Failed to parse, using as-is: %s...", actual_code[:100])
+        # Clean the actual code more carefully to avoid removing backticks that are part of Python code
+        # remove all leading/trailing whitespace -- strip()
+        # remove all leading/trailing backticks -- strip("`")
+        # may potentially start with python, so just trim from the front.
+        POTENTIAL_PREFIXES = ["python"]
+        actual_code = actual_code.strip().strip("`")
+        for prefix in POTENTIAL_PREFIXES:
+            if actual_code.startswith(prefix):
+                actual_code = actual_code[len(prefix):]
+                break
+        # Send the user's code directly to our server without any wrapper logic
+        # Our server already handles stdout/stderr capture and error handling
+        request = self._prepare_request(actual_code, timeout_seconds, language)
+        try:
+            return self._send_request(request, timeout_seconds)
+        except requests.exceptions.Timeout:
+            return {"process_status": "timeout", "stdout": "", "stderr": "Timed out\n"}
 class PistonSandbox(Sandbox):
     """Piston sandbox (https://github.com/engineer-man/piston)"""
-    def _get_execute_url(self, uri):
+    @override
+    def _get_execute_url(self, uri: HttpUrl) -> str:
         return urljoin(str(uri), "execute")
-    def _parse_request_output(self, output):
-        output = output.json()
-        if output['run']['signal'] == "SIGKILL":
-            return {'result': None, 'error_message': 'Unknown error: SIGKILL'}
-        return json.loads(output['run']['output'])
+    @override
+    def _parse_request_output(self, output: requests.Response) -> dict[str, str]:
+        output_json = output.json()
+        assert isinstance(output_json, dict)
+        assert 'run' in output_json
+        run_json = output_json['run']
+        assert isinstance(run_json, dict)
+        if run_json["code"] != 0:
+            return {'process_status': "error", 'stdout': run_json['stdout'], 'stderr': run_json['stderr']}
+        return {'process_status': "completed", 'stdout': run_json['stdout'], 'stderr': run_json['stderr']}
-    def _prepare_request(self, generated_code: str, timeout, **kwargs):
+    @override
+    def _prepare_request(self, generated_code: str, timeout_seconds: float, **kwargs) -> dict[str, Any]:
         return {
             "language": "py",
             "version": "3.10.0",
@@ -170,19 +251,17 @@ class PistonSandbox(Sandbox):
             }],
             "stdin": "",
             "args": [],
-            "run_timeout": timeout * 1000.0,  # milliseconds
+            "run_timeout": timeout_seconds * 1000.0,  # milliseconds
             "compile_memory_limit": -1,
             "run_memory_limit": -1,
         }
-sandboxes = {
-    'local': LocalSandbox,
-    'piston': PistonSandbox,
-}
 def get_sandbox(sandbox_type: str = "local", **kwargs):
     """A helper function to make it easier to set sandbox through cmd."""
+    sandboxes = {
+        'local': LocalSandbox,
+        'piston': PistonSandbox,
+    }
     sandbox_class = sandboxes[sandbox_type.lower()]
     return sandbox_class(**kwargs)

aiq/tool/code_execution/local_sandbox/.gitignore ADDED Viewed

	@@ -0,0 +1 @@
1	+ persistence_test.*

aiq/tool/code_execution/local_sandbox/local_sandbox_server.py CHANGED Viewed

@@ -12,16 +12,59 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
+import contextlib
 import logging
 import multiprocessing
+import os
 import resource
-import sys
+from enum import Enum
 from io import StringIO
 from flask import Flask
+from flask import Request
+from flask import Response
 from flask import request
+from pydantic import BaseModel
+from pydantic import Field
 app = Flask(__name__)
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.WARNING)
+class CodeExecutionStatus(str, Enum):
+    """
+    Status of code execution.
+    """
+    COMPLETED = "completed"
+    ERROR = "error"
+    TIMEOUT = "timeout"
+class CodeExecutionResult(BaseModel):
+    """
+    Result of code execution.
+    """
+    process_status: CodeExecutionStatus = Field(default=CodeExecutionStatus.COMPLETED,
+                                                description="Status of the process")
+    stdout: str = Field(description="Standard output of the process")
+    stderr: str = Field(description="Standard error of the process")
+class CodeExecutionResponse(Response):
+    """
+    Response class that returns a JSON response with the given status code and result.
+    """
+    def __init__(self, status_code: int, result: CodeExecutionResult):
+        super().__init__(status=status_code, mimetype="application/json", response=result.model_dump_json())
+    @classmethod
+    def with_error(cls, status_code: int, error_message: str) -> 'CodeExecutionResponse':
+        return cls(status_code,
+                   CodeExecutionResult(process_status=CodeExecutionStatus.ERROR, stdout="", stderr=error_message))
 @app.after_request
@@ -34,50 +77,122 @@ def add_hsts_header(response):
     return response
-def execute_python(generated_code, timeout):
+def execute_python(generated_code: str, timeout: float) -> CodeExecutionResult:
+    """
+    Execute Python code in a subprocess.
+    Args:
+        generated_code: The code to execute
+        timeout: The timeout for the execution
+    Returns:
+        CodeExecutionResult object containing the execution result
+    """
     # running in a separate process to ensure any kind of crashes are properly handled
     queue = multiprocessing.Queue()
     process = multiprocessing.Process(target=execute_code_subprocess, args=(generated_code, queue))
     process.start()
+    # wait until the process finishes or the timeout expires
     process.join(timeout=timeout)
-    if process.is_alive():  # didn't finish successfully
+    if process.exitcode is None:
         process.kill()
-        return {"process_status": "timeout", "stdout": "", "stderr": "Timed out\n"}
+        return CodeExecutionResult(process_status=CodeExecutionStatus.TIMEOUT, stdout="", stderr="Timed out\n")
     return queue.get()
 # need to memory-limit to avoid common errors of allocating too much
 # but this has to be done in a subprocess to not crush server itself
-def execute_code_subprocess(generated_code, queue):
-    limit = 1024 * 1024 * 1024 * 10  # 10gb - somehow with a smaller limit the server dies when numpy is used
-    resource.setrlimit(resource.RLIMIT_AS, (limit, limit))
-    resource.setrlimit(resource.RLIMIT_DATA, (limit, limit))
+def execute_code_subprocess(generated_code: str, queue):
+    """
+    Execute code in a subprocess.
+    Args:
+        generated_code: The code to execute
+        queue: The queue to put the result in
+    """
+    logger.debug("execute_code_subprocess started, PID: %s", os.getpid())
-    # this can be overriden inside generated code, so it's not a guaranteed protection
-    sys.stdout = StringIO()
     try:
-        exec(generated_code, {})  # pylint: disable=W0122
-        queue.put(sys.stdout.getvalue())
+        limit = 1024 * 1024 * 1024 * 10  # 10gb - somehow with a smaller limit the server dies when numpy is used
+        resource.setrlimit(resource.RLIMIT_AS, (limit, limit))
+        resource.setrlimit(resource.RLIMIT_DATA, (limit, limit))
     except Exception as e:
-        print(f"Error: {str(e)}")
-        queue.put({"process_status": "error", "stdout": "", "stderr": str(e) + "\n"})
+        logger.error("Failed to set resource limits, PID: %s, error: %s", os.getpid(), e)
+    stdout_capture = StringIO()
+    stderr_capture = StringIO()
+    try:
+        with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
+            exec(generated_code, {})  # pylint: disable=W0122
+        logger.debug("execute_code_subprocess finished, PID: %s", os.getpid())
+        queue.put(CodeExecutionResult(stdout=stdout_capture.getvalue(), stderr=stderr_capture.getvalue()))
+    except Exception as e:
+        import traceback
+        with contextlib.redirect_stderr(stderr_capture):
+            traceback.print_exc()
+        logger.debug("execute_code_subprocess failed, PID: %s, error: %s", os.getpid(), e)
+        queue.put(
+            CodeExecutionResult(process_status=CodeExecutionStatus.ERROR,
+                                stdout=stdout_capture.getvalue(),
+                                stderr=stderr_capture.getvalue()))
+def do_execute(request: Request) -> CodeExecutionResponse:
+    """
+    Main function to handle execution requests.
+    Args:
+        request: Request object containing the execution request
+    Returns:
+        CodeExecutionResponse object containing the execution result
+    """
+    try:
+        # Check if request has JSON data
+        if not request.is_json:
+            return CodeExecutionResponse.with_error(400, "Request must be JSON")
+        # Get JSON data safely
+        json_data = request.get_json(silent=True)
+        if json_data is None:
+            return CodeExecutionResponse.with_error(400, "Invalid JSON data")
+        # Check for required fields
+        if 'generated_code' not in json_data:
+            return CodeExecutionResponse.with_error(400, "Missing required field: generated_code")
+        if 'timeout' not in json_data:
+            return CodeExecutionResponse.with_error(400, "Missing required field: timeout")
+        if 'language' not in json_data:
+            return CodeExecutionResponse.with_error(400, "Missing required field: language")
+        generated_code: str | None = json_data.get('generated_code', None)
+        assert generated_code is not None
+        timeout: float | None = json_data.get('timeout', None)
+        assert timeout is not None
+        language: str | None = json_data.get('language', None)
+        assert language is not None
+        if language != 'python':
+            return CodeExecutionResponse.with_error(400, "Only python execution is supported")
+        return CodeExecutionResponse(200, execute_python(generated_code, timeout))
+    except Exception as e:
+        return CodeExecutionResponse.with_error(500, f"Server error: {str(e)}")
 # Main Flask endpoint to handle execution requests
 @app.route("/execute", methods=["POST"])
 def execute():
-    generated_code = request.json['generated_code']
-    timeout = request.json['timeout']
-    language = request.json.get('language', 'python')
-    if language == 'python':
-        return execute_python(generated_code, timeout)
-    return {"process_status": "error", "stdout": "", "stderr": "Only python execution is supported"}
+    return do_execute(request)
 if __name__ == '__main__':
-    log = logging.getLogger('werkzeug')
-    log.setLevel(logging.WARNING)
     app.run(port=6000)

aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 numpy
 pandas
 scipy
-ipython
+ipython
+plotly
+pydantic

aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh CHANGED Viewed

@@ -14,12 +14,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# Usage: ./start_local_sandbox.sh [SANDBOX_NAME] [OUTPUT_DATA_PATH]
 # NOTE: needs to run from the root of the repo!
+DOCKER_COMMAND=${DOCKER_COMMAND:-"docker"}
 SANDBOX_NAME=${1:-'local-sandbox'}
 NUM_THREADS=10
+# Get the output_data directory path for mounting
+# Priority: command line argument > environment variable > default path (current directory)
+OUTPUT_DATA_PATH=${2:-${OUTPUT_DATA_PATH:-$(pwd)}}
-docker build --tag=${SANDBOX_NAME} --build-arg="UWSGI_PROCESSES=$((${NUM_THREADS} * 10))" --build-arg="UWSGI_CHEAPER=${NUM_THREADS}" -f Dockerfile.sandbox .
+echo "Starting sandbox with container name: ${SANDBOX_NAME}"
+echo "Mounting output_data directory: ${OUTPUT_DATA_PATH}"
-docker run --network=host --rm --name=local-sandbox ${SANDBOX_NAME}
+# Verify the path exists before mounting, create if it doesn't
+if [ ! -d "${OUTPUT_DATA_PATH}" ]; then
+    echo "Output data directory does not exist, creating: ${OUTPUT_DATA_PATH}"
+    mkdir -p "${OUTPUT_DATA_PATH}"
+fi
+# Check if the Docker image already exists
+if ! ${DOCKER_COMMAND} images ${SANDBOX_NAME} | grep -q "${SANDBOX_NAME}"; then
+    echo "Docker image not found locally. Building ${SANDBOX_NAME}..."
+    ${DOCKER_COMMAND} build --tag=${SANDBOX_NAME} --build-arg="UWSGI_PROCESSES=$((${NUM_THREADS} * 10))" --build-arg="UWSGI_CHEAPER=${NUM_THREADS}" -f Dockerfile.sandbox .
+else
+    echo "Using existing Docker image: ${SANDBOX_NAME}"
+fi
+# Mount the output_data directory directly so files created in container appear in the local directory
+${DOCKER_COMMAND} run --rm --name=local-sandbox \
+  --network=host \
+  -v "${OUTPUT_DATA_PATH}:/workspace" \
+  -w /workspace \
+  ${SANDBOX_NAME}

aiq/tool/code_execution/register.py CHANGED Viewed

@@ -46,7 +46,11 @@ async def code_execution_tool(config: CodeExecutionToolConfig, builder: Builder)
     class CodeExecutionInputSchema(BaseModel):
         generated_code: str = Field(description="String containing the code to be executed")
-    sandbox = get_sandbox(sandbox_type=config.sandbox_type, uri=config.uri)
+    # Create sandbox without working_directory
+    sandbox_kwargs = {"uri": config.uri}
+    sandbox = get_sandbox(sandbox_type=config.sandbox_type, **sandbox_kwargs)
+    logger.info(f"[DEBUG] Created sandbox of type: {config.sandbox_type}")
     async def _execute_code(generated_code: str) -> dict:
         logger.info("Executing code in the sandbox at %s", config.uri)
@@ -54,12 +58,12 @@ async def code_execution_tool(config: CodeExecutionToolConfig, builder: Builder)
             output = await sandbox.execute_code(
                 generated_code=generated_code,
                 language="python",
-                timeout=config.timeout,
+                timeout_seconds=config.timeout,
                 max_output_characters=config.max_output_characters,
             )
         except Exception as e:
             logger.exception("Error when executing code in the sandbox, %s", e)
-            return {"process_status": "error", "stdout": "", "stderr": e}
+            return {"process_status": "error", "stdout": "", "stderr": str(e)}
         return output
     yield FunctionInfo.from_fn(

aiqtoolkit 1.2.0.dev0__py3-none-any.whl → 1.2.0rc1__py3-none-any.whl

Potentially problematic release.

aiqtoolkit 1.2.0.dev0py3-none-any.whl → 1.2.0rc1py3-none-any.whl