rlm-code 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rlm_code-0.1.0 → rlm_code-0.1.2}/CHANGELOG.md +26 -1
- {rlm_code-0.1.0 → rlm_code-0.1.2}/PKG-INFO +42 -16
- {rlm_code-0.1.0 → rlm_code-0.1.2}/README.md +41 -15
- {rlm_code-0.1.0 → rlm_code-0.1.2}/pyproject.toml +1 -1
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/__init__.py +1 -1
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/slash_commands.py +86 -7
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/config.py +1 -1
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/harness/registry.py +305 -5
- rlm_code-0.1.2/rlm_code/harness/runner.py +708 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/__init__.py +1 -1
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/server/tools.py +1 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/benchmark_manager.py +114 -23
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/benchmarks.py +40 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/runner.py +2 -0
- rlm_code-0.1.2/rlm_code/sandbox/runtimes/monty_runtime.py +72 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/registry.py +27 -1
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_phase3.py +25 -2
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_phase4.py +2 -1
- rlm_code-0.1.2/tests/test_harness_registry.py +176 -0
- rlm_code-0.1.2/tests/test_harness_runner.py +180 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_rlm_runner.py +97 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_sandbox_runtimes.py +46 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_slash_harness_command.py +41 -5
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_slash_rlm_command.py +62 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_slash_sandbox_command.py +23 -0
- rlm_code-0.1.0/rlm_code/harness/runner.py +0 -288
- rlm_code-0.1.0/tests/test_harness_registry.py +0 -46
- rlm_code-0.1.0/tests/test_harness_runner.py +0 -64
- {rlm_code-0.1.0 → rlm_code-0.1.2}/.gitignore +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/LICENSE +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/NOTICE +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/agent.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/agents/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/agents/rlm_agent.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/callbacks/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/callbacks/code_execution.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/cli.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/code_executor.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/events.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/base.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/lazy.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/loader.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/parsers/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/parsers/base.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/parsers/pdf.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/parsers/text.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/sources/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/sources/base.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/sources/gcs.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/sources/local.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/llm.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/logging/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/logging/rlm_logger.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/logging/verbose.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/main.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/prompts.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/repl/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/repl/local_repl.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/repl/safe_builtins.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/templates/index.html +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/tools/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/types.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/usage.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/web.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/eval/packs/README.md +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/__main__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/config_command.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/create_command.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/demo_command.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/export_command.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/init_command.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/interactive_command.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/mcp_command.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/models_command.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/nl_command_router.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/optimize_command.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/run_command.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/debug_logger.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/directory_utils.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/exceptions.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/logging.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/venv_utils.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/version_checker.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/examples/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/examples/phase2_demo.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/examples/phase3_demo.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/examples/phase4_demo.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/examples/pure_rlm_demo.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/execution/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/execution/engine.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/execution/sandbox.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/export/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/export/handler.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/export/package_builder.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/generators/evaluation_generator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/generators/gepa_generator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/harness/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/main.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/client_manager.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/config.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/exceptions.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/retry.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/server/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/server/rlm_server.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/session_wrapper.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/transports/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/transports/factory.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/transports/sse_transport.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/transports/stdio_transport.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/transports/websocket_transport.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/utils.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/cache.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/code_generator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/dspy_reference_loader.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/llm_connector.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/model_manager.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/providers/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/providers/acp_discovery.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/providers/local_discovery.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/providers/model_catalog.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/providers/registry.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/streaming.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/task_collector.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/optimization/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/optimization/data_collector.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/optimization/executor.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/optimization/workflow_manager.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/project/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/project/context_manager.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/project/dspy_md_generator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/project/initializer.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/project/scanner.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/py.typed +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/action_planner.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/approval/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/approval/audit.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/approval/gate.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/approval/handlers.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/approval/policy.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/chat_session.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/code_interpreter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/comparison.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/config_schema.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/context_store.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/delegation.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/docker_interpreter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/environments.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/events.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/adk_rlm_adapter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/base.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/deepagents_adapter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/dspy_rlm_adapter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/google_adk_adapter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/pydantic_ai_adapter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/registry.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/leaderboard.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/memory_compaction.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/mock_interpreter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/monty_interpreter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/observability.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/observability_sinks.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/policies/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/policies/action_policies.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/policies/base.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/policies/compaction_policies.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/policies/registry.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/policies/reward_policies.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/policies/termination_policies.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/pure_rlm_environment.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/repl_types.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/research_tui/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/research_tui/theme.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/research_tui/widgets/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/research_tui/widgets/animated.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/research_tui/widgets/panels.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/session_replay.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/task_signature.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/termination.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/trajectory.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/visualizer.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/apple_container_runtime.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/base.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/cloud/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/cloud/daytona_runtime.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/cloud/e2b_runtime.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/cloud/modal_runtime.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/command_runtime.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/docker_runtime.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/local_runtime.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/superbox.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/session/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/session/state_manager.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/.env.example +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/adapters.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/async_streaming.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/complete_programs.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/dspy_config_example.yaml +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/evaluation.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/industry_templates.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/optimizers.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/retrievers.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/rlm_benchmarks_example.yaml +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/tests/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/tests/rlm/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/tests/rlm/test_phase2.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/tests/rlm/test_pure_rlm.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/agent_collab_view.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/animations.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/conversation.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/design_system.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/diff_viewer.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/notifications.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/persistent_shell.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/prompt_widget.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/prompts.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/pty_terminal.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/resizable_divider.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/thinking_display.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/tui_app.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/tui_utils.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/welcome.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/anti_patterns.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/auto_fixer.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/best_practices.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/code_validator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/config_validator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/exceptions.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/input_validator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/learning_integration.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/models.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/module_validator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/predictor_validator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/quality_scorer.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/report_generator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/security.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/security_validator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/signature_validator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/validator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/__init__.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/conftest.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/fixtures/rlm_ci_baseline_generic_smoke.json +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_adk_rlm_adapter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_code_interpreter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_deepagents_adapter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_dspy_rlm_adapter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_extract_fallback.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_framework_registry_coverage.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_google_adk_adapter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_leaderboard.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_mock_interpreter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_monty_interpreter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_observability_sinks.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_p0_features.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_pure_rlm_runtime_modes.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_pydantic_ai_adapter.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_repl_history.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_security_hardening.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_session_replay.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_submit.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_task_signature.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_user_tools.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_anti_patterns.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_auto_fixer.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_cache.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_execution_engine.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_export_import.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_init_command.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_integration.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_learning_integration.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_mcp_utils.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_module_validator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_optimization_workflow.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_persistent_shell.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_predictor_validator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_project_scanner.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_prompt_widget.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_property_validators.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_provider_discovery.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_provider_registry.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_quality_scorer.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_report_generator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_retry.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_rlm_config.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_rlm_dspy_environment.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_rlm_observability.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_security_validator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_session_management.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_signature_validator.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_streaming.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_superbox.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_tui_utils.py +0 -0
- {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_validation.py +0 -0
|
@@ -5,7 +5,31 @@ All notable changes to this project are documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
-
## [0.1.
|
|
8
|
+
## [0.1.2] - 2026-02-20
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Harness strategy selector with `tool_call` (default) and opt-in `codemode`.
|
|
12
|
+
- CodeMode execution flow in harness: MCP tool discovery (`search_tools`), typed tool surface prompt, single-program generation, guardrail validation, and MCP chain execution (`call_tool_chain`).
|
|
13
|
+
- Benchmark support for harness strategy comparison with CodeMode telemetry fields (`harness_strategy`, `codemode_chain_calls`, `codemode_search_calls`, `codemode_discovery_calls`, `codemode_guardrail_blocked`).
|
|
14
|
+
- New top-level CodeMode docs section with dedicated pages for quickstart, architecture, guardrails, and evaluation.
|
|
15
|
+
- Release documentation set for CodeMode:
|
|
16
|
+
- quickstart and operator workflow
|
|
17
|
+
- integration architecture and runtime controls
|
|
18
|
+
- provider/bridge separation model (Cloudflare-based, UTCP, custom)
|
|
19
|
+
- CodeMode sandbox responsibility and deployment matrix
|
|
20
|
+
- guardrail policy and safety runbook
|
|
21
|
+
- benchmark evaluation and promotion-gate criteria
|
|
22
|
+
|
|
23
|
+
### Changed
|
|
24
|
+
- `/harness run` supports `strategy=tool_call|codemode` and `mcp_server=<name>`.
|
|
25
|
+
- `/rlm bench` in `mode=harness` supports `strategy=tool_call|codemode`.
|
|
26
|
+
- Harness and benchmark command handling now auto-enables MCP when `strategy=codemode` is selected.
|
|
27
|
+
|
|
28
|
+
### Security
|
|
29
|
+
- Added explicit CodeMode guardrail policy documentation with blocked API classes and runtime limit defaults.
|
|
30
|
+
- Codemode path remains opt-in; default harness behavior remains strict baseline `strategy=tool_call`.
|
|
31
|
+
|
|
32
|
+
## [0.1.1] - 2026-02-15
|
|
9
33
|
|
|
10
34
|
Initial public release of **RLM Code**.
|
|
11
35
|
|
|
@@ -31,3 +55,4 @@ Initial public release of **RLM Code**.
|
|
|
31
55
|
- Unsafe local `exec` usage preserved only as an explicit, opt-in path for advanced development scenarios.
|
|
32
56
|
|
|
33
57
|
[0.1.5]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.5
|
|
58
|
+
[0.1.2]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.2
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rlm-code
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems
|
|
5
5
|
Project-URL: Homepage, https://github.com/SuperagenticAI/rlm-code
|
|
6
6
|
Project-URL: Documentation, https://superagenticai.github.io/rlm-code/
|
|
@@ -99,20 +99,18 @@ Description-Content-Type: text/markdown
|
|
|
99
99
|
</a>
|
|
100
100
|
</p>
|
|
101
101
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
<a href="https://github.com/SuperagenticAI/rlm-code/pulls"><img alt="GitHub Pull Requests" src="https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code"></a>
|
|
115
|
-
</p>
|
|
102
|
+
[](https://pypi.org/project/rlm-code/)
|
|
103
|
+
[](https://pypi.org/project/rlm-code/)
|
|
104
|
+
[](https://pypi.org/project/rlm-code/)
|
|
105
|
+
[](https://pypi.org/project/rlm-code/)
|
|
106
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml)
|
|
107
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml)
|
|
108
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml)
|
|
109
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml)
|
|
110
|
+
[](https://superagenticai.github.io/rlm-code/)
|
|
111
|
+
[](https://github.com/SuperagenticAI/rlm-code/stargazers)
|
|
112
|
+
[](https://github.com/SuperagenticAI/rlm-code/issues)
|
|
113
|
+
[](https://github.com/SuperagenticAI/rlm-code/pulls)
|
|
116
114
|
|
|
117
115
|
**Run LLM-powered agents in a REPL loop, benchmark them, and compare results.**
|
|
118
116
|
|
|
@@ -120,6 +118,34 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
|
|
|
120
118
|
|
|
121
119
|
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
|
|
122
120
|
|
|
121
|
+
## Release v0.1.2
|
|
122
|
+
|
|
123
|
+
This release adds the new CodeMode path as an opt-in harness strategy.
|
|
124
|
+
|
|
125
|
+
- New harness strategy: `strategy=codemode` (default remains `strategy=tool_call`)
|
|
126
|
+
- MCP bridge flow for CodeMode: `search_tools` -> typed tool surface -> `call_tool_chain`
|
|
127
|
+
- Guardrails before execution: blocked API classes plus timeout/size/tool-call caps
|
|
128
|
+
- Benchmark telemetry for side-by-side comparison: `tool_call` vs `codemode`
|
|
129
|
+
- Dedicated docs section for CodeMode: quickstart, architecture, guardrails, evaluation
|
|
130
|
+
|
|
131
|
+
Example:
|
|
132
|
+
|
|
133
|
+
```text
|
|
134
|
+
/harness run "implement feature and add tests" steps=8 mcp=on strategy=codemode mcp_server=codemode
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Documentation
|
|
138
|
+
|
|
139
|
+
<p align="center">
|
|
140
|
+
<a href="https://superagenticai.github.io/rlm-code/">
|
|
141
|
+
<img alt="Read the RLM Code Docs" src="https://img.shields.io/badge/Read%20the%20Docs-RLM%20Code-ff7a18?style=for-the-badge&logo=readthedocs&logoColor=white">
|
|
142
|
+
</a>
|
|
143
|
+
</p>
|
|
144
|
+
|
|
145
|
+
<p align="center">
|
|
146
|
+
<a href="https://superagenticai.github.io/rlm-code/"><strong>Open the full documentation</strong></a>
|
|
147
|
+
</p>
|
|
148
|
+
|
|
123
149
|
## Install
|
|
124
150
|
|
|
125
151
|
```bash
|
|
@@ -399,7 +425,7 @@ rlm_code/
|
|
|
399
425
|
harness/ # Tool-using coding harness (/harness)
|
|
400
426
|
```
|
|
401
427
|
|
|
402
|
-
##
|
|
428
|
+
## Resources
|
|
403
429
|
|
|
404
430
|
Full docs: https://superagenticai.github.io/rlm-code/
|
|
405
431
|
|
|
@@ -6,20 +6,18 @@
|
|
|
6
6
|
</a>
|
|
7
7
|
</p>
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
<a href="https://github.com/SuperagenticAI/rlm-code/pulls"><img alt="GitHub Pull Requests" src="https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code"></a>
|
|
22
|
-
</p>
|
|
9
|
+
[](https://pypi.org/project/rlm-code/)
|
|
10
|
+
[](https://pypi.org/project/rlm-code/)
|
|
11
|
+
[](https://pypi.org/project/rlm-code/)
|
|
12
|
+
[](https://pypi.org/project/rlm-code/)
|
|
13
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml)
|
|
14
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml)
|
|
15
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml)
|
|
16
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml)
|
|
17
|
+
[](https://superagenticai.github.io/rlm-code/)
|
|
18
|
+
[](https://github.com/SuperagenticAI/rlm-code/stargazers)
|
|
19
|
+
[](https://github.com/SuperagenticAI/rlm-code/issues)
|
|
20
|
+
[](https://github.com/SuperagenticAI/rlm-code/pulls)
|
|
23
21
|
|
|
24
22
|
**Run LLM-powered agents in a REPL loop, benchmark them, and compare results.**
|
|
25
23
|
|
|
@@ -27,6 +25,34 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
|
|
|
27
25
|
|
|
28
26
|
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
|
|
29
27
|
|
|
28
|
+
## Release v0.1.2
|
|
29
|
+
|
|
30
|
+
This release adds the new CodeMode path as an opt-in harness strategy.
|
|
31
|
+
|
|
32
|
+
- New harness strategy: `strategy=codemode` (default remains `strategy=tool_call`)
|
|
33
|
+
- MCP bridge flow for CodeMode: `search_tools` -> typed tool surface -> `call_tool_chain`
|
|
34
|
+
- Guardrails before execution: blocked API classes plus timeout/size/tool-call caps
|
|
35
|
+
- Benchmark telemetry for side-by-side comparison: `tool_call` vs `codemode`
|
|
36
|
+
- Dedicated docs section for CodeMode: quickstart, architecture, guardrails, evaluation
|
|
37
|
+
|
|
38
|
+
Example:
|
|
39
|
+
|
|
40
|
+
```text
|
|
41
|
+
/harness run "implement feature and add tests" steps=8 mcp=on strategy=codemode mcp_server=codemode
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Documentation
|
|
45
|
+
|
|
46
|
+
<p align="center">
|
|
47
|
+
<a href="https://superagenticai.github.io/rlm-code/">
|
|
48
|
+
<img alt="Read the RLM Code Docs" src="https://img.shields.io/badge/Read%20the%20Docs-RLM%20Code-ff7a18?style=for-the-badge&logo=readthedocs&logoColor=white">
|
|
49
|
+
</a>
|
|
50
|
+
</p>
|
|
51
|
+
|
|
52
|
+
<p align="center">
|
|
53
|
+
<a href="https://superagenticai.github.io/rlm-code/"><strong>Open the full documentation</strong></a>
|
|
54
|
+
</p>
|
|
55
|
+
|
|
30
56
|
## Install
|
|
31
57
|
|
|
32
58
|
```bash
|
|
@@ -306,7 +332,7 @@ rlm_code/
|
|
|
306
332
|
harness/ # Tool-using coding harness (/harness)
|
|
307
333
|
```
|
|
308
334
|
|
|
309
|
-
##
|
|
335
|
+
## Resources
|
|
310
336
|
|
|
311
337
|
Full docs: https://superagenticai.github.io/rlm-code/
|
|
312
338
|
|
|
@@ -112,6 +112,7 @@ class SlashCommandHandler:
|
|
|
112
112
|
self.rlm_runner = RLMRunner(
|
|
113
113
|
llm_connector=self.llm_connector,
|
|
114
114
|
execution_engine=self.execution_engine,
|
|
115
|
+
mcp_manager=self.mcp_manager,
|
|
115
116
|
reward_profile=reward_profile,
|
|
116
117
|
benchmark_pack_paths=benchmark_pack_paths,
|
|
117
118
|
)
|
|
@@ -1442,7 +1443,7 @@ class SlashCommandHandler:
|
|
|
1442
1443
|
Usage:
|
|
1443
1444
|
/harness tools [mcp=on|off]
|
|
1444
1445
|
/harness doctor
|
|
1445
|
-
/harness run <task> [steps=N] [mcp=on|off] [tools=name[,name2]]
|
|
1446
|
+
/harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] [strategy=tool_call|codemode] [tools=name[,name2]]
|
|
1446
1447
|
"""
|
|
1447
1448
|
if not args or args[0].lower() in {"help", "--help"}:
|
|
1448
1449
|
console.print()
|
|
@@ -1450,7 +1451,8 @@ class SlashCommandHandler:
|
|
|
1450
1451
|
console.print(" [yellow]/harness tools [mcp=on|off][/yellow]")
|
|
1451
1452
|
console.print(" [yellow]/harness doctor[/yellow]")
|
|
1452
1453
|
console.print(
|
|
1453
|
-
" [yellow]/harness run <task> [steps=N] [mcp=on|off] [
|
|
1454
|
+
" [yellow]/harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] "
|
|
1455
|
+
"[strategy=tool_call|codemode] [tools=name[,name2]][/yellow]"
|
|
1454
1456
|
)
|
|
1455
1457
|
console.print()
|
|
1456
1458
|
return
|
|
@@ -1555,6 +1557,8 @@ class SlashCommandHandler:
|
|
|
1555
1557
|
include_mcp = True
|
|
1556
1558
|
max_steps = 10
|
|
1557
1559
|
allowlist: list[str] | None = None
|
|
1560
|
+
strategy = "tool_call"
|
|
1561
|
+
mcp_server: str | None = None
|
|
1558
1562
|
task_tokens: list[str] = []
|
|
1559
1563
|
|
|
1560
1564
|
for token in args[1:]:
|
|
@@ -1568,6 +1572,16 @@ class SlashCommandHandler:
|
|
|
1568
1572
|
elif lowered.startswith("mcp="):
|
|
1569
1573
|
value = token.split("=", 1)[1].strip().lower()
|
|
1570
1574
|
include_mcp = value not in {"off", "false", "0", "no"}
|
|
1575
|
+
elif lowered.startswith("mcp_server="):
|
|
1576
|
+
mcp_server = token.split("=", 1)[1].strip() or None
|
|
1577
|
+
elif lowered.startswith("strategy="):
|
|
1578
|
+
raw_strategy = token.split("=", 1)[1].strip().lower().replace("-", "_")
|
|
1579
|
+
if raw_strategy not in {"tool_call", "codemode"}:
|
|
1580
|
+
show_error_message(
|
|
1581
|
+
"Invalid strategy value. Use strategy=tool_call|codemode."
|
|
1582
|
+
)
|
|
1583
|
+
return
|
|
1584
|
+
strategy = raw_strategy
|
|
1571
1585
|
elif lowered.startswith("tools="):
|
|
1572
1586
|
raw = token.split("=", 1)[1].strip()
|
|
1573
1587
|
parsed = [part.strip() for part in raw.split(",") if part.strip()]
|
|
@@ -1578,15 +1592,27 @@ class SlashCommandHandler:
|
|
|
1578
1592
|
task = " ".join(task_tokens).strip()
|
|
1579
1593
|
if not task:
|
|
1580
1594
|
show_error_message(
|
|
1581
|
-
"Usage: /harness run <task> [steps=N] [mcp=on|off] [
|
|
1595
|
+
"Usage: /harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] "
|
|
1596
|
+
"[strategy=tool_call|codemode] [tools=name[,name2]]"
|
|
1582
1597
|
)
|
|
1583
1598
|
return
|
|
1599
|
+
if strategy == "codemode" and not include_mcp:
|
|
1600
|
+
show_warning_message("strategy=codemode requires mcp=on. Enabling MCP.")
|
|
1601
|
+
include_mcp = True
|
|
1602
|
+
if strategy == "codemode" and allowlist:
|
|
1603
|
+
show_warning_message(
|
|
1604
|
+
"tools=... allowlist is ignored for strategy=codemode."
|
|
1605
|
+
)
|
|
1606
|
+
allowlist = None
|
|
1584
1607
|
|
|
1585
1608
|
console.print()
|
|
1586
1609
|
console.print("[bold cyan]🛠 Running Harness[/bold cyan]")
|
|
1587
1610
|
console.print(f" Task: [cyan]{task}[/cyan]")
|
|
1588
1611
|
console.print(f" Max steps: [cyan]{max_steps}[/cyan]")
|
|
1589
1612
|
console.print(f" MCP tools: [cyan]{'on' if include_mcp else 'off'}[/cyan]")
|
|
1613
|
+
console.print(f" Strategy: [cyan]{strategy}[/cyan]")
|
|
1614
|
+
if mcp_server:
|
|
1615
|
+
console.print(f" MCP server: [cyan]{mcp_server}[/cyan]")
|
|
1590
1616
|
if allowlist:
|
|
1591
1617
|
console.print(f" Tool allowlist: [cyan]{', '.join(allowlist)}[/cyan]")
|
|
1592
1618
|
console.print()
|
|
@@ -1596,6 +1622,8 @@ class SlashCommandHandler:
|
|
|
1596
1622
|
max_steps=max_steps,
|
|
1597
1623
|
include_mcp=include_mcp,
|
|
1598
1624
|
tool_allowlist=allowlist,
|
|
1625
|
+
strategy=strategy,
|
|
1626
|
+
mcp_server=mcp_server,
|
|
1599
1627
|
)
|
|
1600
1628
|
|
|
1601
1629
|
self.current_context["harness_last_response"] = result.final_response
|
|
@@ -1659,7 +1687,7 @@ class SlashCommandHandler:
|
|
|
1659
1687
|
|
|
1660
1688
|
Usage:
|
|
1661
1689
|
/rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
|
|
1662
|
-
/rlm bench [list|preset=name] [mode=native|harness|direct-llm] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
|
|
1690
|
+
/rlm bench [list|preset=name] [mode=native|harness|direct-llm] [strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
|
|
1663
1691
|
/rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N]
|
|
1664
1692
|
/rlm bench validate [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] [--json]
|
|
1665
1693
|
/rlm bench report [candidate=<id|path|latest>] [baseline=<id|path|previous>] [format=markdown|csv|json] [output=path]
|
|
@@ -1687,6 +1715,7 @@ class SlashCommandHandler:
|
|
|
1687
1715
|
)
|
|
1688
1716
|
console.print(
|
|
1689
1717
|
" [yellow]/rlm bench [list|preset=name] [mode=native|harness|direct-llm] "
|
|
1718
|
+
"[strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] "
|
|
1690
1719
|
"[pack=path[,path2]] [limit=N] [steps=N] "
|
|
1691
1720
|
f"[timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] [sub=provider/model][/yellow]"
|
|
1692
1721
|
)
|
|
@@ -2521,6 +2550,9 @@ class SlashCommandHandler:
|
|
|
2521
2550
|
environment: str | None = None
|
|
2522
2551
|
sub_model: str | None = None
|
|
2523
2552
|
sub_provider: str | None = None
|
|
2553
|
+
include_mcp = False
|
|
2554
|
+
mcp_server: str | None = None
|
|
2555
|
+
harness_strategy = "tool_call"
|
|
2524
2556
|
|
|
2525
2557
|
for token in args[1:]:
|
|
2526
2558
|
lowered = token.lower()
|
|
@@ -2537,6 +2569,19 @@ class SlashCommandHandler:
|
|
|
2537
2569
|
)
|
|
2538
2570
|
return
|
|
2539
2571
|
mode = resolved_mode
|
|
2572
|
+
elif lowered.startswith("mcp="):
|
|
2573
|
+
value = token.split("=", 1)[1].strip().lower()
|
|
2574
|
+
include_mcp = value not in {"off", "false", "0", "no"}
|
|
2575
|
+
elif lowered.startswith("strategy="):
|
|
2576
|
+
strategy_token = token.split("=", 1)[1].strip().lower().replace("-", "_")
|
|
2577
|
+
if strategy_token not in {"tool_call", "codemode"}:
|
|
2578
|
+
show_error_message(
|
|
2579
|
+
"Invalid strategy value. Use strategy=tool_call|codemode."
|
|
2580
|
+
)
|
|
2581
|
+
return
|
|
2582
|
+
harness_strategy = strategy_token
|
|
2583
|
+
elif lowered.startswith("mcp_server="):
|
|
2584
|
+
mcp_server = token.split("=", 1)[1].strip() or None
|
|
2540
2585
|
elif lowered.startswith("pack="):
|
|
2541
2586
|
raw_paths = token.split("=", 1)[1].strip()
|
|
2542
2587
|
if not raw_paths:
|
|
@@ -2593,8 +2638,10 @@ class SlashCommandHandler:
|
|
|
2593
2638
|
else:
|
|
2594
2639
|
show_error_message(
|
|
2595
2640
|
"Usage: /rlm bench [list|preset=name] [mode=native|harness|direct-llm] "
|
|
2641
|
+
"[strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] "
|
|
2596
2642
|
"[pack=path[,path2]] [limit=N] "
|
|
2597
|
-
f"[steps=N] [timeout=N] [branch=N] [framework={framework_opts}]
|
|
2643
|
+
f"[steps=N] [timeout=N] [branch=N] [framework={framework_opts}] "
|
|
2644
|
+
"[env=generic|dspy|pure_rlm] [sub=provider/model]\n"
|
|
2598
2645
|
" /rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] ...\n"
|
|
2599
2646
|
" /rlm bench validate [candidate=<id|path|latest>] [baseline=<id|path|previous>] ...\n"
|
|
2600
2647
|
" /rlm bench report [candidate=<id|path|latest>] [baseline=<id|path|previous>] "
|
|
@@ -2602,6 +2649,30 @@ class SlashCommandHandler:
|
|
|
2602
2649
|
)
|
|
2603
2650
|
return
|
|
2604
2651
|
|
|
2652
|
+
if mode == "harness" and harness_strategy == "codemode" and not include_mcp:
|
|
2653
|
+
show_warning_message("strategy=codemode requires mcp=on. Enabling MCP.")
|
|
2654
|
+
include_mcp = True
|
|
2655
|
+
|
|
2656
|
+
if mode != "harness" and include_mcp:
|
|
2657
|
+
show_warning_message("mcp=on is only used for mode=harness. Ignoring MCP settings.")
|
|
2658
|
+
include_mcp = False
|
|
2659
|
+
mcp_server = None
|
|
2660
|
+
elif mode != "harness" and mcp_server:
|
|
2661
|
+
show_warning_message(
|
|
2662
|
+
"mcp_server is only used for mode=harness with mcp=on. Ignoring."
|
|
2663
|
+
)
|
|
2664
|
+
mcp_server = None
|
|
2665
|
+
elif mode == "harness" and mcp_server and not include_mcp:
|
|
2666
|
+
show_warning_message(
|
|
2667
|
+
"mcp_server provided but mcp=off. MCP server filter will be ignored."
|
|
2668
|
+
)
|
|
2669
|
+
mcp_server = None
|
|
2670
|
+
if mode != "harness" and harness_strategy != "tool_call":
|
|
2671
|
+
show_warning_message(
|
|
2672
|
+
"strategy is only used for mode=harness. Resetting to tool_call."
|
|
2673
|
+
)
|
|
2674
|
+
harness_strategy = "tool_call"
|
|
2675
|
+
|
|
2605
2676
|
if list_only:
|
|
2606
2677
|
try:
|
|
2607
2678
|
rows = self.rlm_runner.benchmark_presets(pack_paths=pack_paths_override)
|
|
@@ -2681,6 +2752,11 @@ class SlashCommandHandler:
|
|
|
2681
2752
|
if timeout is not None:
|
|
2682
2753
|
console.print(f" Override timeout: [cyan]{timeout}s[/cyan]")
|
|
2683
2754
|
console.print(f" Branch width: [cyan]{branch_width}[/cyan]")
|
|
2755
|
+
if mode == "harness":
|
|
2756
|
+
console.print(f" Harness strategy: [cyan]{harness_strategy}[/cyan]")
|
|
2757
|
+
console.print(f" Harness MCP: [cyan]{'on' if include_mcp else 'off'}[/cyan]")
|
|
2758
|
+
if include_mcp and mcp_server:
|
|
2759
|
+
console.print(f" Harness MCP server: [cyan]{mcp_server}[/cyan]")
|
|
2684
2760
|
if pack_paths_override:
|
|
2685
2761
|
console.print(f" Benchmark packs: [cyan]{', '.join(pack_paths_override)}[/cyan]")
|
|
2686
2762
|
if environment:
|
|
@@ -2704,6 +2780,9 @@ class SlashCommandHandler:
|
|
|
2704
2780
|
branch_width=branch_width,
|
|
2705
2781
|
sub_model=sub_model,
|
|
2706
2782
|
sub_provider=sub_provider,
|
|
2783
|
+
include_mcp=include_mcp,
|
|
2784
|
+
mcp_server=mcp_server,
|
|
2785
|
+
harness_strategy=harness_strategy,
|
|
2707
2786
|
pack_paths=pack_paths_override,
|
|
2708
2787
|
)
|
|
2709
2788
|
except ValueError as exc:
|
|
@@ -4413,7 +4492,7 @@ class SlashCommandHandler:
|
|
|
4413
4492
|
|
|
4414
4493
|
[bold magenta]RLM Workflows:[/bold magenta]
|
|
4415
4494
|
[yellow]/rlm run[/yellow] <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run an RLM coding episode
|
|
4416
|
-
[yellow]/rlm bench[/yellow] [list|preset=name] [mode=native|harness|direct-llm] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run benchmark preset
|
|
4495
|
+
[yellow]/rlm bench[/yellow] [list|preset=name] [mode=native|harness|direct-llm] [strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run benchmark preset
|
|
4417
4496
|
[yellow]/rlm bench compare[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] - Gate regressions
|
|
4418
4497
|
[yellow]/rlm bench validate[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] [--json] - CI-style gate output
|
|
4419
4498
|
[yellow]/rlm bench report[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [format=markdown|csv|json] [output=path] - Export compare report
|
|
@@ -4431,7 +4510,7 @@ class SlashCommandHandler:
|
|
|
4431
4510
|
[yellow]/rlm observability[/yellow] - Show local/MLflow observability sink status
|
|
4432
4511
|
[yellow]/harness tools[/yellow] [mcp=on|off] - List coding harness tools (local + MCP)
|
|
4433
4512
|
[yellow]/harness doctor[/yellow] - Show harness tool coverage report
|
|
4434
|
-
[yellow]/harness run[/yellow] <task> [steps=N] [mcp=on|off] [tools=name[,name2]] - Run tool-using coding harness
|
|
4513
|
+
[yellow]/harness run[/yellow] <task> [steps=N] [mcp=on|off] [mcp_server=name] [strategy=tool_call|codemode] [tools=name[,name2]] - Run tool-using coding harness
|
|
4435
4514
|
|
|
4436
4515
|
[bold magenta]Optimization (GEPA):[/bold magenta]
|
|
4437
4516
|
[yellow]/optimize-start[/yellow] [budget] - Start GEPA optimization workflow
|
|
@@ -102,7 +102,7 @@ class SandboxAppleContainerConfig:
|
|
|
102
102
|
class SandboxConfig:
|
|
103
103
|
"""Execution sandbox runtime configuration."""
|
|
104
104
|
|
|
105
|
-
runtime: str = "docker" # local | docker | apple-container | daytona | e2b
|
|
105
|
+
runtime: str = "docker" # local | monty | docker | apple-container | daytona | e2b
|
|
106
106
|
default_timeout_seconds: int = 30
|
|
107
107
|
memory_limit_mb: int = 512
|
|
108
108
|
allowed_mount_roots: list[str] = field(
|