rlm-code 0.1.5__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rlm_code-0.1.5 → rlm_code-0.1.6}/CHANGELOG.md +25 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/PKG-INFO +42 -16
- {rlm_code-0.1.5 → rlm_code-0.1.6}/README.md +41 -15
- {rlm_code-0.1.5 → rlm_code-0.1.6}/pyproject.toml +1 -1
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/__init__.py +1 -1
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/commands/slash_commands.py +84 -7
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/core/config.py +1 -1
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/harness/registry.py +306 -5
- rlm_code-0.1.6/rlm_code/harness/runner.py +710 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/__init__.py +1 -1
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/server/tools.py +1 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/benchmark_manager.py +112 -23
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/benchmarks.py +40 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/runner.py +2 -0
- rlm_code-0.1.6/rlm_code/sandbox/runtimes/monty_runtime.py +72 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/sandbox/runtimes/registry.py +27 -1
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_phase3.py +25 -2
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_phase4.py +2 -1
- rlm_code-0.1.6/tests/test_harness_registry.py +176 -0
- rlm_code-0.1.6/tests/test_harness_runner.py +182 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_provider_registry.py +6 -1
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_rlm_runner.py +97 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_sandbox_runtimes.py +46 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_slash_harness_command.py +41 -5
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_slash_rlm_command.py +62 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_slash_sandbox_command.py +23 -0
- rlm_code-0.1.5/rlm_code/harness/runner.py +0 -288
- rlm_code-0.1.5/tests/test_harness_registry.py +0 -46
- rlm_code-0.1.5/tests/test_harness_runner.py +0 -64
- {rlm_code-0.1.5 → rlm_code-0.1.6}/.gitignore +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/LICENSE +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/NOTICE +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/agent.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/agents/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/agents/rlm_agent.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/callbacks/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/callbacks/code_execution.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/cli.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/code_executor.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/events.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/files/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/files/base.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/files/lazy.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/files/loader.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/files/parsers/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/files/parsers/base.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/files/parsers/pdf.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/files/parsers/text.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/files/sources/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/files/sources/base.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/files/sources/gcs.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/files/sources/local.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/llm.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/logging/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/logging/rlm_logger.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/logging/verbose.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/main.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/prompts.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/repl/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/repl/local_repl.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/repl/safe_builtins.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/templates/index.html +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/tools/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/types.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/usage.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/adk_rlm/web.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/eval/packs/README.md +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/__main__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/commands/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/commands/config_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/commands/create_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/commands/demo_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/commands/export_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/commands/init_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/commands/interactive_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/commands/mcp_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/commands/models_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/commands/nl_command_router.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/commands/optimize_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/commands/run_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/core/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/core/debug_logger.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/core/directory_utils.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/core/exceptions.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/core/logging.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/core/venv_utils.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/core/version_checker.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/examples/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/examples/phase2_demo.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/examples/phase3_demo.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/examples/phase4_demo.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/examples/pure_rlm_demo.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/execution/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/execution/engine.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/execution/sandbox.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/export/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/export/handler.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/export/package_builder.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/generators/evaluation_generator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/generators/gepa_generator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/harness/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/main.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/client_manager.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/config.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/exceptions.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/retry.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/server/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/server/rlm_server.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/session_wrapper.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/transports/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/transports/factory.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/transports/sse_transport.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/transports/stdio_transport.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/transports/websocket_transport.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/mcp/utils.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/models/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/models/cache.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/models/code_generator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/models/dspy_reference_loader.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/models/llm_connector.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/models/model_manager.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/models/providers/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/models/providers/acp_discovery.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/models/providers/local_discovery.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/models/providers/model_catalog.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/models/providers/registry.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/models/streaming.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/models/task_collector.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/optimization/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/optimization/data_collector.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/optimization/executor.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/optimization/workflow_manager.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/project/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/project/context_manager.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/project/dspy_md_generator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/project/initializer.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/project/scanner.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/py.typed +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/action_planner.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/approval/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/approval/audit.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/approval/gate.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/approval/handlers.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/approval/policy.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/chat_session.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/code_interpreter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/comparison.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/config_schema.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/context_store.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/delegation.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/docker_interpreter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/environments.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/events.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/frameworks/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/frameworks/adk_rlm_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/frameworks/base.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/frameworks/deepagents_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/frameworks/dspy_rlm_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/frameworks/google_adk_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/frameworks/pydantic_ai_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/frameworks/registry.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/leaderboard.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/memory_compaction.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/mock_interpreter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/monty_interpreter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/observability.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/observability_sinks.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/policies/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/policies/action_policies.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/policies/base.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/policies/compaction_policies.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/policies/registry.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/policies/reward_policies.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/policies/termination_policies.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/pure_rlm_environment.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/repl_types.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/research_tui/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/research_tui/theme.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/research_tui/widgets/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/research_tui/widgets/animated.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/research_tui/widgets/panels.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/session_replay.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/task_signature.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/termination.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/trajectory.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/rlm/visualizer.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/sandbox/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/sandbox/runtimes/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/sandbox/runtimes/apple_container_runtime.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/sandbox/runtimes/base.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/sandbox/runtimes/cloud/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/sandbox/runtimes/cloud/daytona_runtime.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/sandbox/runtimes/cloud/e2b_runtime.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/sandbox/runtimes/cloud/modal_runtime.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/sandbox/runtimes/command_runtime.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/sandbox/runtimes/docker_runtime.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/sandbox/runtimes/local_runtime.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/sandbox/superbox.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/session/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/session/state_manager.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/templates/.env.example +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/templates/adapters.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/templates/async_streaming.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/templates/complete_programs.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/templates/dspy_config_example.yaml +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/templates/evaluation.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/templates/industry_templates.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/templates/optimizers.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/templates/retrievers.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/templates/rlm_benchmarks_example.yaml +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/tests/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/tests/rlm/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/tests/rlm/test_phase2.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/tests/rlm/test_pure_rlm.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/agent_collab_view.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/animations.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/conversation.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/design_system.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/diff_viewer.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/notifications.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/persistent_shell.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/prompt_widget.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/prompts.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/pty_terminal.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/resizable_divider.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/thinking_display.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/tui_app.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/tui_utils.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/ui/welcome.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/anti_patterns.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/auto_fixer.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/best_practices.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/code_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/config_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/exceptions.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/input_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/learning_integration.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/models.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/module_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/predictor_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/quality_scorer.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/report_generator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/security.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/security_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/signature_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/rlm_code/validation/validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/conftest.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/fixtures/rlm_ci_baseline_generic_smoke.json +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_adk_rlm_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_code_interpreter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_deepagents_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_dspy_rlm_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_extract_fallback.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_framework_registry_coverage.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_google_adk_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_leaderboard.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_mock_interpreter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_monty_interpreter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_observability_sinks.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_p0_features.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_pure_rlm_runtime_modes.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_pydantic_ai_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_repl_history.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_security_hardening.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_session_replay.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_submit.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_task_signature.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/rlm/test_user_tools.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_anti_patterns.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_auto_fixer.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_cache.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_execution_engine.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_export_import.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_init_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_integration.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_learning_integration.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_mcp_utils.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_module_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_optimization_workflow.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_persistent_shell.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_predictor_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_project_scanner.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_prompt_widget.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_property_validators.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_provider_discovery.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_quality_scorer.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_report_generator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_retry.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_rlm_config.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_rlm_dspy_environment.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_rlm_observability.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_security_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_session_management.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_signature_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_streaming.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_superbox.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_tui_utils.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.6}/tests/test_validation.py +0 -0
|
@@ -5,6 +5,30 @@ All notable changes to this project are documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.1.6] - 2026-02-20
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Harness strategy selector with `tool_call` (default) and opt-in `codemode`.
|
|
12
|
+
- CodeMode execution flow in harness: MCP tool discovery (`search_tools`), typed tool surface prompt, single-program generation, guardrail validation, and MCP chain execution (`call_tool_chain`).
|
|
13
|
+
- Benchmark support for harness strategy comparison with CodeMode telemetry fields (`harness_strategy`, `codemode_chain_calls`, `codemode_search_calls`, `codemode_discovery_calls`, `codemode_guardrail_blocked`).
|
|
14
|
+
- New top-level CodeMode docs section with dedicated pages for quickstart, architecture, guardrails, and evaluation.
|
|
15
|
+
- Release documentation set for CodeMode:
|
|
16
|
+
- quickstart and operator workflow
|
|
17
|
+
- integration architecture and runtime controls
|
|
18
|
+
- provider/bridge separation model (Cloudflare-based, UTCP, custom)
|
|
19
|
+
- CodeMode sandbox responsibility and deployment matrix
|
|
20
|
+
- guardrail policy and safety runbook
|
|
21
|
+
- benchmark evaluation and promotion-gate criteria
|
|
22
|
+
|
|
23
|
+
### Changed
|
|
24
|
+
- `/harness run` supports `strategy=tool_call|codemode` and `mcp_server=<name>`.
|
|
25
|
+
- `/rlm bench` in `mode=harness` supports `strategy=tool_call|codemode`.
|
|
26
|
+
- Harness and benchmark command handling now auto-enables MCP when `strategy=codemode` is selected.
|
|
27
|
+
|
|
28
|
+
### Security
|
|
29
|
+
- Added explicit CodeMode guardrail policy documentation with blocked API classes and runtime limit defaults.
|
|
30
|
+
- Codemode path remains opt-in; default harness behavior remains strict baseline `strategy=tool_call`.
|
|
31
|
+
|
|
8
32
|
## [0.1.5] - 2026-02-15
|
|
9
33
|
|
|
10
34
|
Initial public release of **RLM Code**.
|
|
@@ -31,3 +55,4 @@ Initial public release of **RLM Code**.
|
|
|
31
55
|
- Unsafe local `exec` usage preserved only as an explicit, opt-in path for advanced development scenarios.
|
|
32
56
|
|
|
33
57
|
[0.1.5]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.5
|
|
58
|
+
[0.1.6]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.6
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rlm-code
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems
|
|
5
5
|
Project-URL: Homepage, https://github.com/SuperagenticAI/rlm-code
|
|
6
6
|
Project-URL: Documentation, https://superagenticai.github.io/rlm-code/
|
|
@@ -99,20 +99,18 @@ Description-Content-Type: text/markdown
|
|
|
99
99
|
</a>
|
|
100
100
|
</p>
|
|
101
101
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
<a href="https://github.com/SuperagenticAI/rlm-code/pulls"><img alt="GitHub Pull Requests" src="https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code"></a>
|
|
115
|
-
</p>
|
|
102
|
+
[](https://pypi.org/project/rlm-code/)
|
|
103
|
+
[](https://pypi.org/project/rlm-code/)
|
|
104
|
+
[](https://pypi.org/project/rlm-code/)
|
|
105
|
+
[](https://pypi.org/project/rlm-code/)
|
|
106
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml)
|
|
107
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml)
|
|
108
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml)
|
|
109
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml)
|
|
110
|
+
[](https://superagenticai.github.io/rlm-code/)
|
|
111
|
+
[](https://github.com/SuperagenticAI/rlm-code/stargazers)
|
|
112
|
+
[](https://github.com/SuperagenticAI/rlm-code/issues)
|
|
113
|
+
[](https://github.com/SuperagenticAI/rlm-code/pulls)
|
|
116
114
|
|
|
117
115
|
**Run LLM-powered agents in a REPL loop, benchmark them, and compare results.**
|
|
118
116
|
|
|
@@ -120,6 +118,34 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
|
|
|
120
118
|
|
|
121
119
|
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
|
|
122
120
|
|
|
121
|
+
## Release v0.1.6
|
|
122
|
+
|
|
123
|
+
This release adds the new CodeMode path as an opt-in harness strategy.
|
|
124
|
+
|
|
125
|
+
- New harness strategy: `strategy=codemode` (default remains `strategy=tool_call`)
|
|
126
|
+
- MCP bridge flow for CodeMode: `search_tools` -> typed tool surface -> `call_tool_chain`
|
|
127
|
+
- Guardrails before execution: blocked API classes plus timeout/size/tool-call caps
|
|
128
|
+
- Benchmark telemetry for side-by-side comparison: `tool_call` vs `codemode`
|
|
129
|
+
- Dedicated docs section for CodeMode: quickstart, architecture, guardrails, evaluation
|
|
130
|
+
|
|
131
|
+
Example:
|
|
132
|
+
|
|
133
|
+
```text
|
|
134
|
+
/harness run "implement feature and add tests" steps=8 mcp=on strategy=codemode mcp_server=codemode
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Documentation
|
|
138
|
+
|
|
139
|
+
<p align="center">
|
|
140
|
+
<a href="https://superagenticai.github.io/rlm-code/">
|
|
141
|
+
<img alt="Read the RLM Code Docs" src="https://img.shields.io/badge/Read%20the%20Docs-RLM%20Code-ff7a18?style=for-the-badge&logo=readthedocs&logoColor=white">
|
|
142
|
+
</a>
|
|
143
|
+
</p>
|
|
144
|
+
|
|
145
|
+
<p align="center">
|
|
146
|
+
<a href="https://superagenticai.github.io/rlm-code/"><strong>Open the full documentation</strong></a>
|
|
147
|
+
</p>
|
|
148
|
+
|
|
123
149
|
## Install
|
|
124
150
|
|
|
125
151
|
```bash
|
|
@@ -399,7 +425,7 @@ rlm_code/
|
|
|
399
425
|
harness/ # Tool-using coding harness (/harness)
|
|
400
426
|
```
|
|
401
427
|
|
|
402
|
-
##
|
|
428
|
+
## Resources
|
|
403
429
|
|
|
404
430
|
Full docs: https://superagenticai.github.io/rlm-code/
|
|
405
431
|
|
|
@@ -6,20 +6,18 @@
|
|
|
6
6
|
</a>
|
|
7
7
|
</p>
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
<a href="https://github.com/SuperagenticAI/rlm-code/pulls"><img alt="GitHub Pull Requests" src="https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code"></a>
|
|
22
|
-
</p>
|
|
9
|
+
[](https://pypi.org/project/rlm-code/)
|
|
10
|
+
[](https://pypi.org/project/rlm-code/)
|
|
11
|
+
[](https://pypi.org/project/rlm-code/)
|
|
12
|
+
[](https://pypi.org/project/rlm-code/)
|
|
13
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml)
|
|
14
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml)
|
|
15
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml)
|
|
16
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml)
|
|
17
|
+
[](https://superagenticai.github.io/rlm-code/)
|
|
18
|
+
[](https://github.com/SuperagenticAI/rlm-code/stargazers)
|
|
19
|
+
[](https://github.com/SuperagenticAI/rlm-code/issues)
|
|
20
|
+
[](https://github.com/SuperagenticAI/rlm-code/pulls)
|
|
23
21
|
|
|
24
22
|
**Run LLM-powered agents in a REPL loop, benchmark them, and compare results.**
|
|
25
23
|
|
|
@@ -27,6 +25,34 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
|
|
|
27
25
|
|
|
28
26
|
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
|
|
29
27
|
|
|
28
|
+
## Release v0.1.6
|
|
29
|
+
|
|
30
|
+
This release adds the new CodeMode path as an opt-in harness strategy.
|
|
31
|
+
|
|
32
|
+
- New harness strategy: `strategy=codemode` (default remains `strategy=tool_call`)
|
|
33
|
+
- MCP bridge flow for CodeMode: `search_tools` -> typed tool surface -> `call_tool_chain`
|
|
34
|
+
- Guardrails before execution: blocked API classes plus timeout/size/tool-call caps
|
|
35
|
+
- Benchmark telemetry for side-by-side comparison: `tool_call` vs `codemode`
|
|
36
|
+
- Dedicated docs section for CodeMode: quickstart, architecture, guardrails, evaluation
|
|
37
|
+
|
|
38
|
+
Example:
|
|
39
|
+
|
|
40
|
+
```text
|
|
41
|
+
/harness run "implement feature and add tests" steps=8 mcp=on strategy=codemode mcp_server=codemode
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Documentation
|
|
45
|
+
|
|
46
|
+
<p align="center">
|
|
47
|
+
<a href="https://superagenticai.github.io/rlm-code/">
|
|
48
|
+
<img alt="Read the RLM Code Docs" src="https://img.shields.io/badge/Read%20the%20Docs-RLM%20Code-ff7a18?style=for-the-badge&logo=readthedocs&logoColor=white">
|
|
49
|
+
</a>
|
|
50
|
+
</p>
|
|
51
|
+
|
|
52
|
+
<p align="center">
|
|
53
|
+
<a href="https://superagenticai.github.io/rlm-code/"><strong>Open the full documentation</strong></a>
|
|
54
|
+
</p>
|
|
55
|
+
|
|
30
56
|
## Install
|
|
31
57
|
|
|
32
58
|
```bash
|
|
@@ -306,7 +332,7 @@ rlm_code/
|
|
|
306
332
|
harness/ # Tool-using coding harness (/harness)
|
|
307
333
|
```
|
|
308
334
|
|
|
309
|
-
##
|
|
335
|
+
## Resources
|
|
310
336
|
|
|
311
337
|
Full docs: https://superagenticai.github.io/rlm-code/
|
|
312
338
|
|
|
@@ -112,6 +112,7 @@ class SlashCommandHandler:
|
|
|
112
112
|
self.rlm_runner = RLMRunner(
|
|
113
113
|
llm_connector=self.llm_connector,
|
|
114
114
|
execution_engine=self.execution_engine,
|
|
115
|
+
mcp_manager=self.mcp_manager,
|
|
115
116
|
reward_profile=reward_profile,
|
|
116
117
|
benchmark_pack_paths=benchmark_pack_paths,
|
|
117
118
|
)
|
|
@@ -1442,7 +1443,7 @@ class SlashCommandHandler:
|
|
|
1442
1443
|
Usage:
|
|
1443
1444
|
/harness tools [mcp=on|off]
|
|
1444
1445
|
/harness doctor
|
|
1445
|
-
/harness run <task> [steps=N] [mcp=on|off] [tools=name[,name2]]
|
|
1446
|
+
/harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] [strategy=tool_call|codemode] [tools=name[,name2]]
|
|
1446
1447
|
"""
|
|
1447
1448
|
if not args or args[0].lower() in {"help", "--help"}:
|
|
1448
1449
|
console.print()
|
|
@@ -1450,7 +1451,8 @@ class SlashCommandHandler:
|
|
|
1450
1451
|
console.print(" [yellow]/harness tools [mcp=on|off][/yellow]")
|
|
1451
1452
|
console.print(" [yellow]/harness doctor[/yellow]")
|
|
1452
1453
|
console.print(
|
|
1453
|
-
" [yellow]/harness run <task> [steps=N] [mcp=on|off] [
|
|
1454
|
+
" [yellow]/harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] "
|
|
1455
|
+
"[strategy=tool_call|codemode] [tools=name[,name2]][/yellow]"
|
|
1454
1456
|
)
|
|
1455
1457
|
console.print()
|
|
1456
1458
|
return
|
|
@@ -1555,6 +1557,8 @@ class SlashCommandHandler:
|
|
|
1555
1557
|
include_mcp = True
|
|
1556
1558
|
max_steps = 10
|
|
1557
1559
|
allowlist: list[str] | None = None
|
|
1560
|
+
strategy = "tool_call"
|
|
1561
|
+
mcp_server: str | None = None
|
|
1558
1562
|
task_tokens: list[str] = []
|
|
1559
1563
|
|
|
1560
1564
|
for token in args[1:]:
|
|
@@ -1568,6 +1572,16 @@ class SlashCommandHandler:
|
|
|
1568
1572
|
elif lowered.startswith("mcp="):
|
|
1569
1573
|
value = token.split("=", 1)[1].strip().lower()
|
|
1570
1574
|
include_mcp = value not in {"off", "false", "0", "no"}
|
|
1575
|
+
elif lowered.startswith("mcp_server="):
|
|
1576
|
+
mcp_server = token.split("=", 1)[1].strip() or None
|
|
1577
|
+
elif lowered.startswith("strategy="):
|
|
1578
|
+
raw_strategy = token.split("=", 1)[1].strip().lower().replace("-", "_")
|
|
1579
|
+
if raw_strategy not in {"tool_call", "codemode"}:
|
|
1580
|
+
show_error_message(
|
|
1581
|
+
"Invalid strategy value. Use strategy=tool_call|codemode."
|
|
1582
|
+
)
|
|
1583
|
+
return
|
|
1584
|
+
strategy = raw_strategy
|
|
1571
1585
|
elif lowered.startswith("tools="):
|
|
1572
1586
|
raw = token.split("=", 1)[1].strip()
|
|
1573
1587
|
parsed = [part.strip() for part in raw.split(",") if part.strip()]
|
|
@@ -1578,15 +1592,25 @@ class SlashCommandHandler:
|
|
|
1578
1592
|
task = " ".join(task_tokens).strip()
|
|
1579
1593
|
if not task:
|
|
1580
1594
|
show_error_message(
|
|
1581
|
-
"Usage: /harness run <task> [steps=N] [mcp=on|off] [
|
|
1595
|
+
"Usage: /harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] "
|
|
1596
|
+
"[strategy=tool_call|codemode] [tools=name[,name2]]"
|
|
1582
1597
|
)
|
|
1583
1598
|
return
|
|
1599
|
+
if strategy == "codemode" and not include_mcp:
|
|
1600
|
+
show_warning_message("strategy=codemode requires mcp=on. Enabling MCP.")
|
|
1601
|
+
include_mcp = True
|
|
1602
|
+
if strategy == "codemode" and allowlist:
|
|
1603
|
+
show_warning_message("tools=... allowlist is ignored for strategy=codemode.")
|
|
1604
|
+
allowlist = None
|
|
1584
1605
|
|
|
1585
1606
|
console.print()
|
|
1586
1607
|
console.print("[bold cyan]🛠 Running Harness[/bold cyan]")
|
|
1587
1608
|
console.print(f" Task: [cyan]{task}[/cyan]")
|
|
1588
1609
|
console.print(f" Max steps: [cyan]{max_steps}[/cyan]")
|
|
1589
1610
|
console.print(f" MCP tools: [cyan]{'on' if include_mcp else 'off'}[/cyan]")
|
|
1611
|
+
console.print(f" Strategy: [cyan]{strategy}[/cyan]")
|
|
1612
|
+
if mcp_server:
|
|
1613
|
+
console.print(f" MCP server: [cyan]{mcp_server}[/cyan]")
|
|
1590
1614
|
if allowlist:
|
|
1591
1615
|
console.print(f" Tool allowlist: [cyan]{', '.join(allowlist)}[/cyan]")
|
|
1592
1616
|
console.print()
|
|
@@ -1596,6 +1620,8 @@ class SlashCommandHandler:
|
|
|
1596
1620
|
max_steps=max_steps,
|
|
1597
1621
|
include_mcp=include_mcp,
|
|
1598
1622
|
tool_allowlist=allowlist,
|
|
1623
|
+
strategy=strategy,
|
|
1624
|
+
mcp_server=mcp_server,
|
|
1599
1625
|
)
|
|
1600
1626
|
|
|
1601
1627
|
self.current_context["harness_last_response"] = result.final_response
|
|
@@ -1659,7 +1685,7 @@ class SlashCommandHandler:
|
|
|
1659
1685
|
|
|
1660
1686
|
Usage:
|
|
1661
1687
|
/rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
|
|
1662
|
-
/rlm bench [list|preset=name] [mode=native|harness|direct-llm] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
|
|
1688
|
+
/rlm bench [list|preset=name] [mode=native|harness|direct-llm] [strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
|
|
1663
1689
|
/rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N]
|
|
1664
1690
|
/rlm bench validate [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] [--json]
|
|
1665
1691
|
/rlm bench report [candidate=<id|path|latest>] [baseline=<id|path|previous>] [format=markdown|csv|json] [output=path]
|
|
@@ -1687,6 +1713,7 @@ class SlashCommandHandler:
|
|
|
1687
1713
|
)
|
|
1688
1714
|
console.print(
|
|
1689
1715
|
" [yellow]/rlm bench [list|preset=name] [mode=native|harness|direct-llm] "
|
|
1716
|
+
"[strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] "
|
|
1690
1717
|
"[pack=path[,path2]] [limit=N] [steps=N] "
|
|
1691
1718
|
f"[timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] [sub=provider/model][/yellow]"
|
|
1692
1719
|
)
|
|
@@ -2521,6 +2548,9 @@ class SlashCommandHandler:
|
|
|
2521
2548
|
environment: str | None = None
|
|
2522
2549
|
sub_model: str | None = None
|
|
2523
2550
|
sub_provider: str | None = None
|
|
2551
|
+
include_mcp = False
|
|
2552
|
+
mcp_server: str | None = None
|
|
2553
|
+
harness_strategy = "tool_call"
|
|
2524
2554
|
|
|
2525
2555
|
for token in args[1:]:
|
|
2526
2556
|
lowered = token.lower()
|
|
@@ -2537,6 +2567,19 @@ class SlashCommandHandler:
|
|
|
2537
2567
|
)
|
|
2538
2568
|
return
|
|
2539
2569
|
mode = resolved_mode
|
|
2570
|
+
elif lowered.startswith("mcp="):
|
|
2571
|
+
value = token.split("=", 1)[1].strip().lower()
|
|
2572
|
+
include_mcp = value not in {"off", "false", "0", "no"}
|
|
2573
|
+
elif lowered.startswith("strategy="):
|
|
2574
|
+
strategy_token = token.split("=", 1)[1].strip().lower().replace("-", "_")
|
|
2575
|
+
if strategy_token not in {"tool_call", "codemode"}:
|
|
2576
|
+
show_error_message(
|
|
2577
|
+
"Invalid strategy value. Use strategy=tool_call|codemode."
|
|
2578
|
+
)
|
|
2579
|
+
return
|
|
2580
|
+
harness_strategy = strategy_token
|
|
2581
|
+
elif lowered.startswith("mcp_server="):
|
|
2582
|
+
mcp_server = token.split("=", 1)[1].strip() or None
|
|
2540
2583
|
elif lowered.startswith("pack="):
|
|
2541
2584
|
raw_paths = token.split("=", 1)[1].strip()
|
|
2542
2585
|
if not raw_paths:
|
|
@@ -2593,8 +2636,10 @@ class SlashCommandHandler:
|
|
|
2593
2636
|
else:
|
|
2594
2637
|
show_error_message(
|
|
2595
2638
|
"Usage: /rlm bench [list|preset=name] [mode=native|harness|direct-llm] "
|
|
2639
|
+
"[strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] "
|
|
2596
2640
|
"[pack=path[,path2]] [limit=N] "
|
|
2597
|
-
f"[steps=N] [timeout=N] [branch=N] [framework={framework_opts}]
|
|
2641
|
+
f"[steps=N] [timeout=N] [branch=N] [framework={framework_opts}] "
|
|
2642
|
+
"[env=generic|dspy|pure_rlm] [sub=provider/model]\n"
|
|
2598
2643
|
" /rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] ...\n"
|
|
2599
2644
|
" /rlm bench validate [candidate=<id|path|latest>] [baseline=<id|path|previous>] ...\n"
|
|
2600
2645
|
" /rlm bench report [candidate=<id|path|latest>] [baseline=<id|path|previous>] "
|
|
@@ -2602,6 +2647,30 @@ class SlashCommandHandler:
|
|
|
2602
2647
|
)
|
|
2603
2648
|
return
|
|
2604
2649
|
|
|
2650
|
+
if mode == "harness" and harness_strategy == "codemode" and not include_mcp:
|
|
2651
|
+
show_warning_message("strategy=codemode requires mcp=on. Enabling MCP.")
|
|
2652
|
+
include_mcp = True
|
|
2653
|
+
|
|
2654
|
+
if mode != "harness" and include_mcp:
|
|
2655
|
+
show_warning_message("mcp=on is only used for mode=harness. Ignoring MCP settings.")
|
|
2656
|
+
include_mcp = False
|
|
2657
|
+
mcp_server = None
|
|
2658
|
+
elif mode != "harness" and mcp_server:
|
|
2659
|
+
show_warning_message(
|
|
2660
|
+
"mcp_server is only used for mode=harness with mcp=on. Ignoring."
|
|
2661
|
+
)
|
|
2662
|
+
mcp_server = None
|
|
2663
|
+
elif mode == "harness" and mcp_server and not include_mcp:
|
|
2664
|
+
show_warning_message(
|
|
2665
|
+
"mcp_server provided but mcp=off. MCP server filter will be ignored."
|
|
2666
|
+
)
|
|
2667
|
+
mcp_server = None
|
|
2668
|
+
if mode != "harness" and harness_strategy != "tool_call":
|
|
2669
|
+
show_warning_message(
|
|
2670
|
+
"strategy is only used for mode=harness. Resetting to tool_call."
|
|
2671
|
+
)
|
|
2672
|
+
harness_strategy = "tool_call"
|
|
2673
|
+
|
|
2605
2674
|
if list_only:
|
|
2606
2675
|
try:
|
|
2607
2676
|
rows = self.rlm_runner.benchmark_presets(pack_paths=pack_paths_override)
|
|
@@ -2681,6 +2750,11 @@ class SlashCommandHandler:
|
|
|
2681
2750
|
if timeout is not None:
|
|
2682
2751
|
console.print(f" Override timeout: [cyan]{timeout}s[/cyan]")
|
|
2683
2752
|
console.print(f" Branch width: [cyan]{branch_width}[/cyan]")
|
|
2753
|
+
if mode == "harness":
|
|
2754
|
+
console.print(f" Harness strategy: [cyan]{harness_strategy}[/cyan]")
|
|
2755
|
+
console.print(f" Harness MCP: [cyan]{'on' if include_mcp else 'off'}[/cyan]")
|
|
2756
|
+
if include_mcp and mcp_server:
|
|
2757
|
+
console.print(f" Harness MCP server: [cyan]{mcp_server}[/cyan]")
|
|
2684
2758
|
if pack_paths_override:
|
|
2685
2759
|
console.print(f" Benchmark packs: [cyan]{', '.join(pack_paths_override)}[/cyan]")
|
|
2686
2760
|
if environment:
|
|
@@ -2704,6 +2778,9 @@ class SlashCommandHandler:
|
|
|
2704
2778
|
branch_width=branch_width,
|
|
2705
2779
|
sub_model=sub_model,
|
|
2706
2780
|
sub_provider=sub_provider,
|
|
2781
|
+
include_mcp=include_mcp,
|
|
2782
|
+
mcp_server=mcp_server,
|
|
2783
|
+
harness_strategy=harness_strategy,
|
|
2707
2784
|
pack_paths=pack_paths_override,
|
|
2708
2785
|
)
|
|
2709
2786
|
except ValueError as exc:
|
|
@@ -4413,7 +4490,7 @@ class SlashCommandHandler:
|
|
|
4413
4490
|
|
|
4414
4491
|
[bold magenta]RLM Workflows:[/bold magenta]
|
|
4415
4492
|
[yellow]/rlm run[/yellow] <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run an RLM coding episode
|
|
4416
|
-
[yellow]/rlm bench[/yellow] [list|preset=name] [mode=native|harness|direct-llm] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run benchmark preset
|
|
4493
|
+
[yellow]/rlm bench[/yellow] [list|preset=name] [mode=native|harness|direct-llm] [strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run benchmark preset
|
|
4417
4494
|
[yellow]/rlm bench compare[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] - Gate regressions
|
|
4418
4495
|
[yellow]/rlm bench validate[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] [--json] - CI-style gate output
|
|
4419
4496
|
[yellow]/rlm bench report[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [format=markdown|csv|json] [output=path] - Export compare report
|
|
@@ -4431,7 +4508,7 @@ class SlashCommandHandler:
|
|
|
4431
4508
|
[yellow]/rlm observability[/yellow] - Show local/MLflow observability sink status
|
|
4432
4509
|
[yellow]/harness tools[/yellow] [mcp=on|off] - List coding harness tools (local + MCP)
|
|
4433
4510
|
[yellow]/harness doctor[/yellow] - Show harness tool coverage report
|
|
4434
|
-
[yellow]/harness run[/yellow] <task> [steps=N] [mcp=on|off] [tools=name[,name2]] - Run tool-using coding harness
|
|
4511
|
+
[yellow]/harness run[/yellow] <task> [steps=N] [mcp=on|off] [mcp_server=name] [strategy=tool_call|codemode] [tools=name[,name2]] - Run tool-using coding harness
|
|
4435
4512
|
|
|
4436
4513
|
[bold magenta]Optimization (GEPA):[/bold magenta]
|
|
4437
4514
|
[yellow]/optimize-start[/yellow] [budget] - Start GEPA optimization workflow
|
|
@@ -102,7 +102,7 @@ class SandboxAppleContainerConfig:
|
|
|
102
102
|
class SandboxConfig:
|
|
103
103
|
"""Execution sandbox runtime configuration."""
|
|
104
104
|
|
|
105
|
-
runtime: str = "docker" # local | docker | apple-container | daytona | e2b
|
|
105
|
+
runtime: str = "docker" # local | monty | docker | apple-container | daytona | e2b
|
|
106
106
|
default_timeout_seconds: int = 30
|
|
107
107
|
memory_limit_mb: int = 512
|
|
108
108
|
allowed_mount_roots: list[str] = field(
|