aiqtoolkit 1.2.0.dev0__py3-none-any.whl → 1.2.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiqtoolkit might be problematic. Click here for more details.
- aiq/agent/base.py +170 -8
- aiq/agent/dual_node.py +1 -1
- aiq/agent/react_agent/agent.py +146 -112
- aiq/agent/react_agent/prompt.py +1 -6
- aiq/agent/react_agent/register.py +36 -35
- aiq/agent/rewoo_agent/agent.py +36 -35
- aiq/agent/rewoo_agent/register.py +2 -2
- aiq/agent/tool_calling_agent/agent.py +3 -7
- aiq/agent/tool_calling_agent/register.py +1 -1
- aiq/authentication/__init__.py +14 -0
- aiq/authentication/api_key/__init__.py +14 -0
- aiq/authentication/api_key/api_key_auth_provider.py +92 -0
- aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
- aiq/authentication/api_key/register.py +26 -0
- aiq/authentication/exceptions/__init__.py +14 -0
- aiq/authentication/exceptions/api_key_exceptions.py +38 -0
- aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
- aiq/authentication/exceptions/call_back_exceptions.py +38 -0
- aiq/authentication/exceptions/request_exceptions.py +54 -0
- aiq/authentication/http_basic_auth/__init__.py +0 -0
- aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
- aiq/authentication/http_basic_auth/register.py +30 -0
- aiq/authentication/interfaces.py +93 -0
- aiq/authentication/oauth2/__init__.py +14 -0
- aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
- aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
- aiq/authentication/oauth2/register.py +25 -0
- aiq/authentication/register.py +21 -0
- aiq/builder/builder.py +64 -2
- aiq/builder/component_utils.py +16 -3
- aiq/builder/context.py +37 -0
- aiq/builder/eval_builder.py +43 -2
- aiq/builder/function.py +44 -12
- aiq/builder/function_base.py +1 -1
- aiq/builder/intermediate_step_manager.py +6 -8
- aiq/builder/user_interaction_manager.py +3 -0
- aiq/builder/workflow.py +23 -18
- aiq/builder/workflow_builder.py +421 -61
- aiq/cli/commands/info/list_mcp.py +103 -16
- aiq/cli/commands/sizing/__init__.py +14 -0
- aiq/cli/commands/sizing/calc.py +294 -0
- aiq/cli/commands/sizing/sizing.py +27 -0
- aiq/cli/commands/start.py +2 -1
- aiq/cli/entrypoint.py +2 -0
- aiq/cli/register_workflow.py +80 -0
- aiq/cli/type_registry.py +151 -30
- aiq/data_models/api_server.py +124 -12
- aiq/data_models/authentication.py +231 -0
- aiq/data_models/common.py +35 -7
- aiq/data_models/component.py +17 -9
- aiq/data_models/component_ref.py +33 -0
- aiq/data_models/config.py +60 -3
- aiq/data_models/dataset_handler.py +2 -1
- aiq/data_models/embedder.py +1 -0
- aiq/data_models/evaluate.py +23 -0
- aiq/data_models/function_dependencies.py +8 -0
- aiq/data_models/interactive.py +10 -1
- aiq/data_models/intermediate_step.py +38 -5
- aiq/data_models/its_strategy.py +30 -0
- aiq/data_models/llm.py +1 -0
- aiq/data_models/memory.py +1 -0
- aiq/data_models/object_store.py +44 -0
- aiq/data_models/profiler.py +1 -0
- aiq/data_models/retry_mixin.py +35 -0
- aiq/data_models/span.py +187 -0
- aiq/data_models/telemetry_exporter.py +2 -2
- aiq/embedder/nim_embedder.py +2 -1
- aiq/embedder/openai_embedder.py +2 -1
- aiq/eval/config.py +19 -1
- aiq/eval/dataset_handler/dataset_handler.py +87 -2
- aiq/eval/evaluate.py +208 -27
- aiq/eval/evaluator/base_evaluator.py +73 -0
- aiq/eval/evaluator/evaluator_model.py +1 -0
- aiq/eval/intermediate_step_adapter.py +11 -5
- aiq/eval/rag_evaluator/evaluate.py +55 -15
- aiq/eval/rag_evaluator/register.py +6 -1
- aiq/eval/remote_workflow.py +7 -2
- aiq/eval/runners/__init__.py +14 -0
- aiq/eval/runners/config.py +39 -0
- aiq/eval/runners/multi_eval_runner.py +54 -0
- aiq/eval/trajectory_evaluator/evaluate.py +22 -65
- aiq/eval/tunable_rag_evaluator/evaluate.py +150 -168
- aiq/eval/tunable_rag_evaluator/register.py +2 -0
- aiq/eval/usage_stats.py +41 -0
- aiq/eval/utils/output_uploader.py +10 -1
- aiq/eval/utils/weave_eval.py +184 -0
- aiq/experimental/__init__.py +0 -0
- aiq/experimental/decorators/__init__.py +0 -0
- aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
- aiq/experimental/inference_time_scaling/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
- aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
- aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
- aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
- aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
- aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
- aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
- aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
- aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
- aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
- aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
- aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
- aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
- aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
- aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
- aiq/experimental/inference_time_scaling/register.py +36 -0
- aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
- aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
- aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
- aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
- aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
- aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
- aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
- aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
- aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
- aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
- aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
- aiq/front_ends/console/authentication_flow_handler.py +233 -0
- aiq/front_ends/console/console_front_end_plugin.py +11 -2
- aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
- aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
- aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
- aiq/front_ends/fastapi/fastapi_front_end_config.py +93 -9
- aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
- aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
- aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +537 -52
- aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
- aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
- aiq/front_ends/fastapi/job_store.py +47 -25
- aiq/front_ends/fastapi/main.py +2 -0
- aiq/front_ends/fastapi/message_handler.py +108 -89
- aiq/front_ends/fastapi/step_adaptor.py +2 -1
- aiq/llm/aws_bedrock_llm.py +57 -0
- aiq/llm/nim_llm.py +2 -1
- aiq/llm/openai_llm.py +3 -2
- aiq/llm/register.py +1 -0
- aiq/meta/pypi.md +12 -12
- aiq/object_store/__init__.py +20 -0
- aiq/object_store/in_memory_object_store.py +74 -0
- aiq/object_store/interfaces.py +84 -0
- aiq/object_store/models.py +36 -0
- aiq/object_store/register.py +20 -0
- aiq/observability/__init__.py +14 -0
- aiq/observability/exporter/__init__.py +14 -0
- aiq/observability/exporter/base_exporter.py +449 -0
- aiq/observability/exporter/exporter.py +78 -0
- aiq/observability/exporter/file_exporter.py +33 -0
- aiq/observability/exporter/processing_exporter.py +269 -0
- aiq/observability/exporter/raw_exporter.py +52 -0
- aiq/observability/exporter/span_exporter.py +264 -0
- aiq/observability/exporter_manager.py +335 -0
- aiq/observability/mixin/__init__.py +14 -0
- aiq/observability/mixin/batch_config_mixin.py +26 -0
- aiq/observability/mixin/collector_config_mixin.py +23 -0
- aiq/observability/mixin/file_mixin.py +288 -0
- aiq/observability/mixin/file_mode.py +23 -0
- aiq/observability/mixin/resource_conflict_mixin.py +134 -0
- aiq/observability/mixin/serialize_mixin.py +61 -0
- aiq/observability/mixin/type_introspection_mixin.py +183 -0
- aiq/observability/processor/__init__.py +14 -0
- aiq/observability/processor/batching_processor.py +316 -0
- aiq/observability/processor/intermediate_step_serializer.py +28 -0
- aiq/observability/processor/processor.py +68 -0
- aiq/observability/register.py +36 -39
- aiq/observability/utils/__init__.py +14 -0
- aiq/observability/utils/dict_utils.py +236 -0
- aiq/observability/utils/time_utils.py +31 -0
- aiq/profiler/calc/__init__.py +14 -0
- aiq/profiler/calc/calc_runner.py +623 -0
- aiq/profiler/calc/calculations.py +288 -0
- aiq/profiler/calc/data_models.py +176 -0
- aiq/profiler/calc/plot.py +345 -0
- aiq/profiler/callbacks/langchain_callback_handler.py +22 -10
- aiq/profiler/data_models.py +24 -0
- aiq/profiler/inference_metrics_model.py +3 -0
- aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +8 -0
- aiq/profiler/inference_optimization/data_models.py +2 -2
- aiq/profiler/inference_optimization/llm_metrics.py +2 -2
- aiq/profiler/profile_runner.py +61 -21
- aiq/runtime/loader.py +9 -3
- aiq/runtime/runner.py +23 -9
- aiq/runtime/session.py +25 -7
- aiq/runtime/user_metadata.py +2 -3
- aiq/tool/chat_completion.py +74 -0
- aiq/tool/code_execution/README.md +152 -0
- aiq/tool/code_execution/code_sandbox.py +151 -72
- aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
- aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
- aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
- aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
- aiq/tool/code_execution/register.py +7 -3
- aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
- aiq/tool/mcp/exceptions.py +142 -0
- aiq/tool/mcp/mcp_client.py +41 -6
- aiq/tool/mcp/mcp_tool.py +3 -2
- aiq/tool/register.py +1 -0
- aiq/tool/server_tools.py +6 -3
- aiq/utils/exception_handlers/automatic_retries.py +289 -0
- aiq/utils/exception_handlers/mcp.py +211 -0
- aiq/utils/io/model_processing.py +28 -0
- aiq/utils/log_utils.py +37 -0
- aiq/utils/string_utils.py +38 -0
- aiq/utils/type_converter.py +18 -2
- aiq/utils/type_utils.py +87 -0
- {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/METADATA +53 -21
- aiqtoolkit-1.2.0rc1.dist-info/RECORD +436 -0
- {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/WHEEL +1 -1
- {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/entry_points.txt +3 -0
- aiq/front_ends/fastapi/websocket.py +0 -148
- aiq/observability/async_otel_listener.py +0 -429
- aiqtoolkit-1.2.0.dev0.dist-info/RECORD +0 -316
- {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE.md +0 -0
- {aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -15,11 +15,16 @@
|
|
|
15
15
|
import abc
|
|
16
16
|
import json
|
|
17
17
|
import logging
|
|
18
|
+
import textwrap
|
|
19
|
+
from typing import Any
|
|
18
20
|
from urllib.parse import urljoin
|
|
19
21
|
|
|
20
22
|
import requests
|
|
23
|
+
import requests.adapters
|
|
21
24
|
from pydantic import HttpUrl
|
|
22
25
|
|
|
26
|
+
from aiq.utils.type_utils import override
|
|
27
|
+
|
|
23
28
|
logger = logging.getLogger(__file__)
|
|
24
29
|
|
|
25
30
|
|
|
@@ -43,18 +48,18 @@ class Sandbox(abc.ABC):
|
|
|
43
48
|
*,
|
|
44
49
|
uri: HttpUrl,
|
|
45
50
|
):
|
|
46
|
-
self.url = self._get_execute_url(uri)
|
|
51
|
+
self.url: str = self._get_execute_url(uri)
|
|
47
52
|
session = requests.Session()
|
|
48
53
|
adapter = requests.adapters.HTTPAdapter(pool_maxsize=1500, pool_connections=1500, max_retries=3)
|
|
49
54
|
session.mount('http://', adapter)
|
|
50
55
|
session.mount('https://', adapter)
|
|
51
|
-
self.http_session = session
|
|
56
|
+
self.http_session: requests.Session = session
|
|
52
57
|
|
|
53
|
-
def _send_request(self, request,
|
|
58
|
+
def _send_request(self, request: dict[str, Any], timeout_seconds: float) -> dict[str, str]:
|
|
54
59
|
output = self.http_session.post(
|
|
55
60
|
url=self.url,
|
|
56
61
|
data=json.dumps(request),
|
|
57
|
-
timeout=
|
|
62
|
+
timeout=timeout_seconds,
|
|
58
63
|
headers={"Content-Type": "application/json"},
|
|
59
64
|
)
|
|
60
65
|
# retrying 502 errors
|
|
@@ -64,104 +69,180 @@ class Sandbox(abc.ABC):
|
|
|
64
69
|
return self._parse_request_output(output)
|
|
65
70
|
|
|
66
71
|
@abc.abstractmethod
|
|
67
|
-
def _parse_request_output(self, output):
|
|
72
|
+
def _parse_request_output(self, output: requests.Response) -> dict[str, str]:
|
|
68
73
|
pass
|
|
69
74
|
|
|
70
75
|
@abc.abstractmethod
|
|
71
|
-
def _get_execute_url(self, uri):
|
|
76
|
+
def _get_execute_url(self, uri: HttpUrl) -> str:
|
|
72
77
|
pass
|
|
73
78
|
|
|
74
79
|
@abc.abstractmethod
|
|
75
|
-
def _prepare_request(self, generated_code,
|
|
80
|
+
def _prepare_request(self, generated_code: str, timeout_seconds: float) -> dict[str, Any]:
|
|
76
81
|
pass
|
|
77
82
|
|
|
78
83
|
async def execute_code(
|
|
79
84
|
self,
|
|
80
85
|
generated_code: str,
|
|
81
|
-
|
|
86
|
+
timeout_seconds: float = 10.0,
|
|
82
87
|
language: str = "python",
|
|
83
88
|
max_output_characters: int = 1000,
|
|
84
|
-
) ->
|
|
89
|
+
) -> dict[str, str]:
|
|
85
90
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
import
|
|
92
|
-
import
|
|
93
|
-
import
|
|
94
|
-
warnings
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
stdout
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
""
|
|
123
|
-
|
|
91
|
+
if language != "python":
|
|
92
|
+
raise ValueError(f"Language {language} not supported")
|
|
93
|
+
|
|
94
|
+
generated_code = generated_code.strip().strip("`")
|
|
95
|
+
code_to_execute = textwrap.dedent("""
|
|
96
|
+
import traceback
|
|
97
|
+
import json
|
|
98
|
+
import os
|
|
99
|
+
import warnings
|
|
100
|
+
import contextlib
|
|
101
|
+
import io
|
|
102
|
+
warnings.filterwarnings('ignore')
|
|
103
|
+
os.environ['OPENBLAS_NUM_THREADS'] = '16'
|
|
104
|
+
""").strip()
|
|
105
|
+
|
|
106
|
+
# Use json.dumps to properly escape the generated_code instead of repr()
|
|
107
|
+
escaped_code = json.dumps(generated_code)
|
|
108
|
+
code_to_execute += textwrap.dedent(f"""
|
|
109
|
+
|
|
110
|
+
generated_code = {escaped_code}
|
|
111
|
+
|
|
112
|
+
stdout = io.StringIO()
|
|
113
|
+
stderr = io.StringIO()
|
|
114
|
+
|
|
115
|
+
with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
|
|
116
|
+
try:
|
|
117
|
+
exec(generated_code)
|
|
118
|
+
status = "completed"
|
|
119
|
+
except Exception:
|
|
120
|
+
status = "error"
|
|
121
|
+
stderr.write(traceback.format_exc())
|
|
122
|
+
stdout = stdout.getvalue()
|
|
123
|
+
stderr = stderr.getvalue()
|
|
124
|
+
if len(stdout) > {max_output_characters}:
|
|
125
|
+
stdout = stdout[:{max_output_characters}] + "<output cut>"
|
|
126
|
+
if len(stderr) > {max_output_characters}:
|
|
127
|
+
stderr = stderr[:{max_output_characters}] + "<output cut>"
|
|
128
|
+
if stdout:
|
|
129
|
+
stdout += "\\n"
|
|
130
|
+
if stderr:
|
|
131
|
+
stderr += "\\n"
|
|
132
|
+
output = {{"process_status": status, "stdout": stdout, "stderr": stderr}}
|
|
133
|
+
print(json.dumps(output))
|
|
134
|
+
""").strip()
|
|
135
|
+
request = self._prepare_request(code_to_execute, timeout_seconds)
|
|
124
136
|
try:
|
|
125
|
-
|
|
137
|
+
return self._send_request(request, timeout_seconds)
|
|
126
138
|
except requests.exceptions.Timeout:
|
|
127
|
-
|
|
128
|
-
return output
|
|
139
|
+
return {"process_status": "timeout", "stdout": "", "stderr": "Timed out\n"}
|
|
129
140
|
|
|
130
141
|
|
|
131
142
|
class LocalSandbox(Sandbox):
|
|
132
143
|
"""Locally hosted sandbox."""
|
|
133
144
|
|
|
134
|
-
def
|
|
145
|
+
def __init__(self, *, uri: HttpUrl):
|
|
146
|
+
super().__init__(uri=uri)
|
|
147
|
+
|
|
148
|
+
@override
|
|
149
|
+
def _get_execute_url(self, uri: HttpUrl) -> str:
|
|
135
150
|
return urljoin(str(uri), "execute")
|
|
136
151
|
|
|
137
|
-
|
|
152
|
+
@override
|
|
153
|
+
def _parse_request_output(self, output: requests.Response) -> dict[str, str]:
|
|
138
154
|
try:
|
|
139
|
-
|
|
155
|
+
output_json = output.json()
|
|
156
|
+
assert isinstance(output_json, dict)
|
|
157
|
+
return output_json
|
|
140
158
|
except json.JSONDecodeError as e:
|
|
141
|
-
logger.exception("Error
|
|
142
|
-
return {'process_status': 'error', 'stdout': '', 'stderr': 'Unknown error'}
|
|
159
|
+
logger.exception("Error parsing output: %s. %s", output.text, e)
|
|
160
|
+
return {'process_status': 'error', 'stdout': '', 'stderr': f'Unknown error: {e} \"{output.text}\"'}
|
|
143
161
|
|
|
144
|
-
|
|
145
|
-
|
|
162
|
+
@override
|
|
163
|
+
def _prepare_request(self,
|
|
164
|
+
generated_code: str,
|
|
165
|
+
timeout_seconds: float,
|
|
166
|
+
language: str = "python",
|
|
167
|
+
**kwargs) -> dict[str, Any]:
|
|
168
|
+
request = {
|
|
146
169
|
"generated_code": generated_code,
|
|
147
|
-
"timeout":
|
|
170
|
+
"timeout": timeout_seconds,
|
|
148
171
|
"language": language,
|
|
149
172
|
}
|
|
173
|
+
return request
|
|
174
|
+
|
|
175
|
+
@override
|
|
176
|
+
async def execute_code(
|
|
177
|
+
self,
|
|
178
|
+
generated_code: str,
|
|
179
|
+
timeout_seconds: float = 10.0,
|
|
180
|
+
language: str = "python",
|
|
181
|
+
max_output_characters: int = 1000,
|
|
182
|
+
) -> dict[str, str]:
|
|
183
|
+
"""Override execute_code to bypass the wrapper logic and send user code directly to our server."""
|
|
184
|
+
|
|
185
|
+
logger.debug("Raw input generated_code: %s", generated_code)
|
|
186
|
+
|
|
187
|
+
# The input appears to be a string representation of a dictionary
|
|
188
|
+
# We need to parse it and extract the actual code
|
|
189
|
+
try:
|
|
190
|
+
# Try to evaluate the string as a Python literal (dictionary)
|
|
191
|
+
import ast
|
|
192
|
+
parsed_dict = ast.literal_eval(generated_code)
|
|
193
|
+
if isinstance(parsed_dict, dict) and 'generated_code' in parsed_dict:
|
|
194
|
+
actual_code = parsed_dict['generated_code']
|
|
195
|
+
assert isinstance(actual_code, str)
|
|
196
|
+
logger.debug("Extracted code from dict: %s...", actual_code[:100])
|
|
197
|
+
else:
|
|
198
|
+
# If it's not a dict or doesn't have the expected key, use as-is
|
|
199
|
+
actual_code = generated_code
|
|
200
|
+
logger.debug("Using code as-is: %s...", actual_code[:100])
|
|
201
|
+
except (ValueError, SyntaxError):
|
|
202
|
+
# If parsing fails, use the input as-is
|
|
203
|
+
actual_code = generated_code
|
|
204
|
+
logger.debug("Failed to parse, using as-is: %s...", actual_code[:100])
|
|
205
|
+
|
|
206
|
+
# Clean the actual code more carefully to avoid removing backticks that are part of Python code
|
|
207
|
+
# remove all leading/trailing whitespace -- strip()
|
|
208
|
+
# remove all leading/trailing backticks -- strip("`")
|
|
209
|
+
# may potentially start with python, so just trim from the front.
|
|
210
|
+
POTENTIAL_PREFIXES = ["python"]
|
|
211
|
+
actual_code = actual_code.strip().strip("`")
|
|
212
|
+
for prefix in POTENTIAL_PREFIXES:
|
|
213
|
+
if actual_code.startswith(prefix):
|
|
214
|
+
actual_code = actual_code[len(prefix):]
|
|
215
|
+
break
|
|
216
|
+
|
|
217
|
+
# Send the user's code directly to our server without any wrapper logic
|
|
218
|
+
# Our server already handles stdout/stderr capture and error handling
|
|
219
|
+
request = self._prepare_request(actual_code, timeout_seconds, language)
|
|
220
|
+
try:
|
|
221
|
+
return self._send_request(request, timeout_seconds)
|
|
222
|
+
except requests.exceptions.Timeout:
|
|
223
|
+
return {"process_status": "timeout", "stdout": "", "stderr": "Timed out\n"}
|
|
150
224
|
|
|
151
225
|
|
|
152
226
|
class PistonSandbox(Sandbox):
|
|
153
227
|
"""Piston sandbox (https://github.com/engineer-man/piston)"""
|
|
154
228
|
|
|
155
|
-
|
|
229
|
+
@override
|
|
230
|
+
def _get_execute_url(self, uri: HttpUrl) -> str:
|
|
156
231
|
return urljoin(str(uri), "execute")
|
|
157
232
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
233
|
+
@override
|
|
234
|
+
def _parse_request_output(self, output: requests.Response) -> dict[str, str]:
|
|
235
|
+
output_json = output.json()
|
|
236
|
+
assert isinstance(output_json, dict)
|
|
237
|
+
assert 'run' in output_json
|
|
238
|
+
run_json = output_json['run']
|
|
239
|
+
assert isinstance(run_json, dict)
|
|
240
|
+
if run_json["code"] != 0:
|
|
241
|
+
return {'process_status': "error", 'stdout': run_json['stdout'], 'stderr': run_json['stderr']}
|
|
242
|
+
return {'process_status': "completed", 'stdout': run_json['stdout'], 'stderr': run_json['stderr']}
|
|
163
243
|
|
|
164
|
-
|
|
244
|
+
@override
|
|
245
|
+
def _prepare_request(self, generated_code: str, timeout_seconds: float, **kwargs) -> dict[str, Any]:
|
|
165
246
|
return {
|
|
166
247
|
"language": "py",
|
|
167
248
|
"version": "3.10.0",
|
|
@@ -170,19 +251,17 @@ class PistonSandbox(Sandbox):
|
|
|
170
251
|
}],
|
|
171
252
|
"stdin": "",
|
|
172
253
|
"args": [],
|
|
173
|
-
"run_timeout":
|
|
254
|
+
"run_timeout": timeout_seconds * 1000.0, # milliseconds
|
|
174
255
|
"compile_memory_limit": -1,
|
|
175
256
|
"run_memory_limit": -1,
|
|
176
257
|
}
|
|
177
258
|
|
|
178
259
|
|
|
179
|
-
sandboxes = {
|
|
180
|
-
'local': LocalSandbox,
|
|
181
|
-
'piston': PistonSandbox,
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
|
|
185
260
|
def get_sandbox(sandbox_type: str = "local", **kwargs):
|
|
186
261
|
"""A helper function to make it easier to set sandbox through cmd."""
|
|
262
|
+
sandboxes = {
|
|
263
|
+
'local': LocalSandbox,
|
|
264
|
+
'piston': PistonSandbox,
|
|
265
|
+
}
|
|
187
266
|
sandbox_class = sandboxes[sandbox_type.lower()]
|
|
188
267
|
return sandbox_class(**kwargs)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
persistence_test.*
|
|
@@ -12,16 +12,59 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import contextlib
|
|
15
18
|
import logging
|
|
16
19
|
import multiprocessing
|
|
20
|
+
import os
|
|
17
21
|
import resource
|
|
18
|
-
import
|
|
22
|
+
from enum import Enum
|
|
19
23
|
from io import StringIO
|
|
20
24
|
|
|
21
25
|
from flask import Flask
|
|
26
|
+
from flask import Request
|
|
27
|
+
from flask import Response
|
|
22
28
|
from flask import request
|
|
29
|
+
from pydantic import BaseModel
|
|
30
|
+
from pydantic import Field
|
|
23
31
|
|
|
24
32
|
app = Flask(__name__)
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
logger.setLevel(logging.WARNING)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class CodeExecutionStatus(str, Enum):
|
|
38
|
+
"""
|
|
39
|
+
Status of code execution.
|
|
40
|
+
"""
|
|
41
|
+
COMPLETED = "completed"
|
|
42
|
+
ERROR = "error"
|
|
43
|
+
TIMEOUT = "timeout"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class CodeExecutionResult(BaseModel):
|
|
47
|
+
"""
|
|
48
|
+
Result of code execution.
|
|
49
|
+
"""
|
|
50
|
+
process_status: CodeExecutionStatus = Field(default=CodeExecutionStatus.COMPLETED,
|
|
51
|
+
description="Status of the process")
|
|
52
|
+
stdout: str = Field(description="Standard output of the process")
|
|
53
|
+
stderr: str = Field(description="Standard error of the process")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class CodeExecutionResponse(Response):
|
|
57
|
+
"""
|
|
58
|
+
Response class that returns a JSON response with the given status code and result.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(self, status_code: int, result: CodeExecutionResult):
|
|
62
|
+
super().__init__(status=status_code, mimetype="application/json", response=result.model_dump_json())
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def with_error(cls, status_code: int, error_message: str) -> 'CodeExecutionResponse':
|
|
66
|
+
return cls(status_code,
|
|
67
|
+
CodeExecutionResult(process_status=CodeExecutionStatus.ERROR, stdout="", stderr=error_message))
|
|
25
68
|
|
|
26
69
|
|
|
27
70
|
@app.after_request
|
|
@@ -34,50 +77,122 @@ def add_hsts_header(response):
|
|
|
34
77
|
return response
|
|
35
78
|
|
|
36
79
|
|
|
37
|
-
def execute_python(generated_code, timeout):
|
|
80
|
+
def execute_python(generated_code: str, timeout: float) -> CodeExecutionResult:
|
|
81
|
+
"""
|
|
82
|
+
Execute Python code in a subprocess.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
generated_code: The code to execute
|
|
86
|
+
timeout: The timeout for the execution
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
CodeExecutionResult object containing the execution result
|
|
90
|
+
"""
|
|
91
|
+
|
|
38
92
|
# running in a separate process to ensure any kind of crashes are properly handled
|
|
39
93
|
queue = multiprocessing.Queue()
|
|
40
94
|
process = multiprocessing.Process(target=execute_code_subprocess, args=(generated_code, queue))
|
|
95
|
+
|
|
41
96
|
process.start()
|
|
97
|
+
# wait until the process finishes or the timeout expires
|
|
42
98
|
process.join(timeout=timeout)
|
|
43
|
-
|
|
44
|
-
if process.is_alive(): # didn't finish successfully
|
|
99
|
+
if process.exitcode is None:
|
|
45
100
|
process.kill()
|
|
46
|
-
return
|
|
101
|
+
return CodeExecutionResult(process_status=CodeExecutionStatus.TIMEOUT, stdout="", stderr="Timed out\n")
|
|
47
102
|
|
|
48
103
|
return queue.get()
|
|
49
104
|
|
|
50
105
|
|
|
51
106
|
# need to memory-limit to avoid common errors of allocating too much
|
|
52
107
|
# but this has to be done in a subprocess to not crush server itself
|
|
53
|
-
def execute_code_subprocess(generated_code, queue):
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
108
|
+
def execute_code_subprocess(generated_code: str, queue):
|
|
109
|
+
"""
|
|
110
|
+
Execute code in a subprocess.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
generated_code: The code to execute
|
|
114
|
+
queue: The queue to put the result in
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
logger.debug("execute_code_subprocess started, PID: %s", os.getpid())
|
|
57
118
|
|
|
58
|
-
# this can be overriden inside generated code, so it's not a guaranteed protection
|
|
59
|
-
sys.stdout = StringIO()
|
|
60
119
|
try:
|
|
61
|
-
|
|
62
|
-
|
|
120
|
+
limit = 1024 * 1024 * 1024 * 10 # 10gb - somehow with a smaller limit the server dies when numpy is used
|
|
121
|
+
resource.setrlimit(resource.RLIMIT_AS, (limit, limit))
|
|
122
|
+
resource.setrlimit(resource.RLIMIT_DATA, (limit, limit))
|
|
63
123
|
except Exception as e:
|
|
64
|
-
|
|
65
|
-
|
|
124
|
+
logger.error("Failed to set resource limits, PID: %s, error: %s", os.getpid(), e)
|
|
125
|
+
|
|
126
|
+
stdout_capture = StringIO()
|
|
127
|
+
stderr_capture = StringIO()
|
|
128
|
+
try:
|
|
129
|
+
with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(stderr_capture):
|
|
130
|
+
exec(generated_code, {}) # pylint: disable=W0122
|
|
131
|
+
logger.debug("execute_code_subprocess finished, PID: %s", os.getpid())
|
|
132
|
+
queue.put(CodeExecutionResult(stdout=stdout_capture.getvalue(), stderr=stderr_capture.getvalue()))
|
|
133
|
+
except Exception as e:
|
|
134
|
+
import traceback
|
|
135
|
+
with contextlib.redirect_stderr(stderr_capture):
|
|
136
|
+
traceback.print_exc()
|
|
137
|
+
logger.debug("execute_code_subprocess failed, PID: %s, error: %s", os.getpid(), e)
|
|
138
|
+
queue.put(
|
|
139
|
+
CodeExecutionResult(process_status=CodeExecutionStatus.ERROR,
|
|
140
|
+
stdout=stdout_capture.getvalue(),
|
|
141
|
+
stderr=stderr_capture.getvalue()))
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def do_execute(request: Request) -> CodeExecutionResponse:
|
|
145
|
+
"""
|
|
146
|
+
Main function to handle execution requests.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
request: Request object containing the execution request
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
CodeExecutionResponse object containing the execution result
|
|
153
|
+
"""
|
|
154
|
+
try:
|
|
155
|
+
# Check if request has JSON data
|
|
156
|
+
if not request.is_json:
|
|
157
|
+
return CodeExecutionResponse.with_error(400, "Request must be JSON")
|
|
158
|
+
|
|
159
|
+
# Get JSON data safely
|
|
160
|
+
json_data = request.get_json(silent=True)
|
|
161
|
+
|
|
162
|
+
if json_data is None:
|
|
163
|
+
return CodeExecutionResponse.with_error(400, "Invalid JSON data")
|
|
164
|
+
|
|
165
|
+
# Check for required fields
|
|
166
|
+
if 'generated_code' not in json_data:
|
|
167
|
+
return CodeExecutionResponse.with_error(400, "Missing required field: generated_code")
|
|
168
|
+
|
|
169
|
+
if 'timeout' not in json_data:
|
|
170
|
+
return CodeExecutionResponse.with_error(400, "Missing required field: timeout")
|
|
171
|
+
|
|
172
|
+
if 'language' not in json_data:
|
|
173
|
+
return CodeExecutionResponse.with_error(400, "Missing required field: language")
|
|
174
|
+
|
|
175
|
+
generated_code: str | None = json_data.get('generated_code', None)
|
|
176
|
+
assert generated_code is not None
|
|
177
|
+
timeout: float | None = json_data.get('timeout', None)
|
|
178
|
+
assert timeout is not None
|
|
179
|
+
language: str | None = json_data.get('language', None)
|
|
180
|
+
assert language is not None
|
|
181
|
+
|
|
182
|
+
if language != 'python':
|
|
183
|
+
return CodeExecutionResponse.with_error(400, "Only python execution is supported")
|
|
184
|
+
|
|
185
|
+
return CodeExecutionResponse(200, execute_python(generated_code, timeout))
|
|
186
|
+
|
|
187
|
+
except Exception as e:
|
|
188
|
+
return CodeExecutionResponse.with_error(500, f"Server error: {str(e)}")
|
|
66
189
|
|
|
67
190
|
|
|
68
191
|
# Main Flask endpoint to handle execution requests
|
|
69
192
|
@app.route("/execute", methods=["POST"])
|
|
70
193
|
def execute():
|
|
71
|
-
|
|
72
|
-
timeout = request.json['timeout']
|
|
73
|
-
language = request.json.get('language', 'python')
|
|
74
|
-
|
|
75
|
-
if language == 'python':
|
|
76
|
-
return execute_python(generated_code, timeout)
|
|
77
|
-
return {"process_status": "error", "stdout": "", "stderr": "Only python execution is supported"}
|
|
194
|
+
return do_execute(request)
|
|
78
195
|
|
|
79
196
|
|
|
80
197
|
if __name__ == '__main__':
|
|
81
|
-
log = logging.getLogger('werkzeug')
|
|
82
|
-
log.setLevel(logging.WARNING)
|
|
83
198
|
app.run(port=6000)
|
|
@@ -14,12 +14,37 @@
|
|
|
14
14
|
# See the License for the specific language governing permissions and
|
|
15
15
|
# limitations under the License.
|
|
16
16
|
|
|
17
|
+
# Usage: ./start_local_sandbox.sh [SANDBOX_NAME] [OUTPUT_DATA_PATH]
|
|
17
18
|
# NOTE: needs to run from the root of the repo!
|
|
18
19
|
|
|
20
|
+
DOCKER_COMMAND=${DOCKER_COMMAND:-"docker"}
|
|
19
21
|
SANDBOX_NAME=${1:-'local-sandbox'}
|
|
20
22
|
NUM_THREADS=10
|
|
21
23
|
|
|
24
|
+
# Get the output_data directory path for mounting
|
|
25
|
+
# Priority: command line argument > environment variable > default path (current directory)
|
|
26
|
+
OUTPUT_DATA_PATH=${2:-${OUTPUT_DATA_PATH:-$(pwd)}}
|
|
22
27
|
|
|
23
|
-
|
|
28
|
+
echo "Starting sandbox with container name: ${SANDBOX_NAME}"
|
|
29
|
+
echo "Mounting output_data directory: ${OUTPUT_DATA_PATH}"
|
|
24
30
|
|
|
25
|
-
|
|
31
|
+
# Verify the path exists before mounting, create if it doesn't
|
|
32
|
+
if [ ! -d "${OUTPUT_DATA_PATH}" ]; then
|
|
33
|
+
echo "Output data directory does not exist, creating: ${OUTPUT_DATA_PATH}"
|
|
34
|
+
mkdir -p "${OUTPUT_DATA_PATH}"
|
|
35
|
+
fi
|
|
36
|
+
|
|
37
|
+
# Check if the Docker image already exists
|
|
38
|
+
if ! ${DOCKER_COMMAND} images ${SANDBOX_NAME} | grep -q "${SANDBOX_NAME}"; then
|
|
39
|
+
echo "Docker image not found locally. Building ${SANDBOX_NAME}..."
|
|
40
|
+
${DOCKER_COMMAND} build --tag=${SANDBOX_NAME} --build-arg="UWSGI_PROCESSES=$((${NUM_THREADS} * 10))" --build-arg="UWSGI_CHEAPER=${NUM_THREADS}" -f Dockerfile.sandbox .
|
|
41
|
+
else
|
|
42
|
+
echo "Using existing Docker image: ${SANDBOX_NAME}"
|
|
43
|
+
fi
|
|
44
|
+
|
|
45
|
+
# Mount the output_data directory directly so files created in container appear in the local directory
|
|
46
|
+
${DOCKER_COMMAND} run --rm --name=local-sandbox \
|
|
47
|
+
--network=host \
|
|
48
|
+
-v "${OUTPUT_DATA_PATH}:/workspace" \
|
|
49
|
+
-w /workspace \
|
|
50
|
+
${SANDBOX_NAME}
|
|
@@ -46,7 +46,11 @@ async def code_execution_tool(config: CodeExecutionToolConfig, builder: Builder)
|
|
|
46
46
|
class CodeExecutionInputSchema(BaseModel):
|
|
47
47
|
generated_code: str = Field(description="String containing the code to be executed")
|
|
48
48
|
|
|
49
|
-
sandbox
|
|
49
|
+
# Create sandbox without working_directory
|
|
50
|
+
sandbox_kwargs = {"uri": config.uri}
|
|
51
|
+
|
|
52
|
+
sandbox = get_sandbox(sandbox_type=config.sandbox_type, **sandbox_kwargs)
|
|
53
|
+
logger.info(f"[DEBUG] Created sandbox of type: {config.sandbox_type}")
|
|
50
54
|
|
|
51
55
|
async def _execute_code(generated_code: str) -> dict:
|
|
52
56
|
logger.info("Executing code in the sandbox at %s", config.uri)
|
|
@@ -54,12 +58,12 @@ async def code_execution_tool(config: CodeExecutionToolConfig, builder: Builder)
|
|
|
54
58
|
output = await sandbox.execute_code(
|
|
55
59
|
generated_code=generated_code,
|
|
56
60
|
language="python",
|
|
57
|
-
|
|
61
|
+
timeout_seconds=config.timeout,
|
|
58
62
|
max_output_characters=config.max_output_characters,
|
|
59
63
|
)
|
|
60
64
|
except Exception as e:
|
|
61
65
|
logger.exception("Error when executing code in the sandbox, %s", e)
|
|
62
|
-
return {"process_status": "error", "stdout": "", "stderr": e}
|
|
66
|
+
return {"process_status": "error", "stdout": "", "stderr": str(e)}
|
|
63
67
|
return output
|
|
64
68
|
|
|
65
69
|
yield FunctionInfo.from_fn(
|