rlm-code 0.1.5__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rlm_code-0.1.7/CHANGELOG.md +72 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/PKG-INFO +98 -16
- {rlm_code-0.1.5 → rlm_code-0.1.7}/README.md +97 -15
- {rlm_code-0.1.5 → rlm_code-0.1.7}/pyproject.toml +1 -1
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/__init__.py +1 -1
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/slash_commands.py +92 -15
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/config.py +1 -1
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/harness/registry.py +306 -5
- rlm_code-0.1.7/rlm_code/harness/runner.py +710 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/__init__.py +1 -1
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/server/tools.py +1 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/action_planner.py +3 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/benchmark_manager.py +112 -23
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/benchmarks.py +40 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/environments.py +245 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/runner.py +15 -0
- rlm_code-0.1.7/rlm_code/sandbox/runtimes/monty_runtime.py +72 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/registry.py +27 -1
- rlm_code-0.1.7/rlm_code/traces/__init__.py +6 -0
- rlm_code-0.1.7/rlm_code/traces/index.py +170 -0
- rlm_code-0.1.7/rlm_code/traces/models.py +103 -0
- rlm_code-0.1.7/rlm_code/traces/store.py +221 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_phase3.py +25 -2
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_phase4.py +2 -1
- rlm_code-0.1.7/tests/test_harness_registry.py +176 -0
- rlm_code-0.1.7/tests/test_harness_runner.py +182 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_provider_registry.py +6 -1
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_rlm_runner.py +97 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_sandbox_runtimes.py +46 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_slash_harness_command.py +41 -5
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_slash_rlm_command.py +62 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_slash_sandbox_command.py +23 -0
- rlm_code-0.1.7/tests/test_trace_analysis.py +115 -0
- rlm_code-0.1.5/CHANGELOG.md +0 -33
- rlm_code-0.1.5/rlm_code/harness/runner.py +0 -288
- rlm_code-0.1.5/tests/test_harness_registry.py +0 -46
- rlm_code-0.1.5/tests/test_harness_runner.py +0 -64
- {rlm_code-0.1.5 → rlm_code-0.1.7}/.gitignore +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/LICENSE +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/NOTICE +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/agent.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/agents/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/agents/rlm_agent.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/callbacks/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/callbacks/code_execution.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/cli.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/code_executor.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/events.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/base.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/lazy.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/loader.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/parsers/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/parsers/base.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/parsers/pdf.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/parsers/text.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/sources/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/sources/base.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/sources/gcs.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/sources/local.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/llm.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/logging/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/logging/rlm_logger.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/logging/verbose.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/main.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/prompts.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/repl/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/repl/local_repl.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/repl/safe_builtins.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/templates/index.html +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/tools/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/types.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/usage.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/web.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/eval/packs/README.md +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/__main__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/config_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/create_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/demo_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/export_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/init_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/interactive_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/mcp_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/models_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/nl_command_router.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/optimize_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/run_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/debug_logger.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/directory_utils.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/exceptions.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/logging.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/venv_utils.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/version_checker.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/examples/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/examples/phase2_demo.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/examples/phase3_demo.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/examples/phase4_demo.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/examples/pure_rlm_demo.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/execution/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/execution/engine.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/execution/sandbox.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/export/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/export/handler.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/export/package_builder.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/generators/evaluation_generator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/generators/gepa_generator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/harness/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/main.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/client_manager.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/config.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/exceptions.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/retry.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/server/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/server/rlm_server.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/session_wrapper.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/transports/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/transports/factory.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/transports/sse_transport.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/transports/stdio_transport.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/transports/websocket_transport.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/utils.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/cache.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/code_generator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/dspy_reference_loader.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/llm_connector.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/model_manager.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/providers/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/providers/acp_discovery.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/providers/local_discovery.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/providers/model_catalog.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/providers/registry.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/streaming.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/task_collector.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/optimization/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/optimization/data_collector.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/optimization/executor.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/optimization/workflow_manager.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/project/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/project/context_manager.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/project/dspy_md_generator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/project/initializer.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/project/scanner.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/py.typed +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/approval/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/approval/audit.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/approval/gate.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/approval/handlers.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/approval/policy.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/chat_session.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/code_interpreter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/comparison.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/config_schema.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/context_store.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/delegation.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/docker_interpreter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/events.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/adk_rlm_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/base.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/deepagents_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/dspy_rlm_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/google_adk_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/pydantic_ai_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/registry.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/leaderboard.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/memory_compaction.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/mock_interpreter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/monty_interpreter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/observability.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/observability_sinks.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/policies/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/policies/action_policies.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/policies/base.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/policies/compaction_policies.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/policies/registry.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/policies/reward_policies.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/policies/termination_policies.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/pure_rlm_environment.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/repl_types.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/theme.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/widgets/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/widgets/animated.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/widgets/panels.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/session_replay.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/task_signature.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/termination.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/trajectory.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/visualizer.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/apple_container_runtime.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/base.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/daytona_runtime.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/e2b_runtime.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/modal_runtime.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/command_runtime.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/docker_runtime.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/local_runtime.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/superbox.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/session/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/session/state_manager.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/.env.example +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/adapters.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/async_streaming.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/complete_programs.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/dspy_config_example.yaml +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/evaluation.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/industry_templates.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/optimizers.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/retrievers.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/rlm_benchmarks_example.yaml +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/tests/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/tests/rlm/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/tests/rlm/test_phase2.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/tests/rlm/test_pure_rlm.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/agent_collab_view.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/animations.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/conversation.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/design_system.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/diff_viewer.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/notifications.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/persistent_shell.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/prompt_widget.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/prompts.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/pty_terminal.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/resizable_divider.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/thinking_display.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/tui_app.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/tui_utils.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/welcome.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/anti_patterns.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/auto_fixer.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/best_practices.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/code_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/config_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/exceptions.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/input_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/learning_integration.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/models.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/module_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/predictor_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/quality_scorer.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/report_generator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/security.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/security_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/signature_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/__init__.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/conftest.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/fixtures/rlm_ci_baseline_generic_smoke.json +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_adk_rlm_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_code_interpreter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_deepagents_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_dspy_rlm_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_extract_fallback.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_framework_registry_coverage.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_google_adk_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_leaderboard.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_mock_interpreter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_monty_interpreter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_observability_sinks.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_p0_features.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_pure_rlm_runtime_modes.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_pydantic_ai_adapter.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_repl_history.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_security_hardening.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_session_replay.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_submit.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_task_signature.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_user_tools.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_anti_patterns.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_auto_fixer.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_cache.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_execution_engine.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_export_import.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_init_command.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_integration.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_learning_integration.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_mcp_utils.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_module_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_optimization_workflow.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_persistent_shell.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_predictor_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_project_scanner.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_prompt_widget.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_property_validators.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_provider_discovery.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_quality_scorer.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_report_generator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_retry.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_rlm_config.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_rlm_dspy_environment.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_rlm_observability.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_security_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_session_management.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_signature_validator.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_streaming.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_superbox.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_tui_utils.py +0 -0
- {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_validation.py +0 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.7] - 2026-04-30
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- HALO-style `trace_analysis` RLM environment for diagnosing agent harness failures from one-span-per-line JSONL traces.
|
|
12
|
+
- Trace sidecar indexing with dataset rollups for trace counts, span counts, error traces, services, models, agents, token totals, and sample trace ids.
|
|
13
|
+
- Bounded trace inspection actions: `get_dataset_overview`, `query_traces`, `count_traces`, `view_trace`, `search_trace`, and `view_spans`.
|
|
14
|
+
- Large-trace safeguards: per-attribute truncation, oversized trace summaries, and higher-cap selected-span reads.
|
|
15
|
+
- Tests for trace indexing, querying, searching, selected-span viewing, and trace environment actions.
|
|
16
|
+
- Trace analysis documentation under the Core Engine docs.
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
- `/rlm` command help now advertises `env=trace_analysis` for run, chat, and doctor workflows.
|
|
20
|
+
|
|
21
|
+
## [0.1.6] - 2026-02-20
|
|
22
|
+
|
|
23
|
+
### Added
|
|
24
|
+
- Harness strategy selector with `tool_call` (default) and opt-in `codemode`.
|
|
25
|
+
- CodeMode execution flow in harness: MCP tool discovery (`search_tools`), typed tool surface prompt, single-program generation, guardrail validation, and MCP chain execution (`call_tool_chain`).
|
|
26
|
+
- Benchmark support for harness strategy comparison with CodeMode telemetry fields (`harness_strategy`, `codemode_chain_calls`, `codemode_search_calls`, `codemode_discovery_calls`, `codemode_guardrail_blocked`).
|
|
27
|
+
- New top-level CodeMode docs section with dedicated pages for quickstart, architecture, guardrails, and evaluation.
|
|
28
|
+
- Release documentation set for CodeMode:
|
|
29
|
+
- quickstart and operator workflow
|
|
30
|
+
- integration architecture and runtime controls
|
|
31
|
+
- provider/bridge separation model (Cloudflare-based, UTCP, custom)
|
|
32
|
+
- CodeMode sandbox responsibility and deployment matrix
|
|
33
|
+
- guardrail policy and safety runbook
|
|
34
|
+
- benchmark evaluation and promotion-gate criteria
|
|
35
|
+
|
|
36
|
+
### Changed
|
|
37
|
+
- `/harness run` supports `strategy=tool_call|codemode` and `mcp_server=<name>`.
|
|
38
|
+
- `/rlm bench` in `mode=harness` supports `strategy=tool_call|codemode`.
|
|
39
|
+
- Harness and benchmark command handling now auto-enables MCP when `strategy=codemode` is selected.
|
|
40
|
+
|
|
41
|
+
### Security
|
|
42
|
+
- Added explicit CodeMode guardrail policy documentation with blocked API classes and runtime limit defaults.
|
|
43
|
+
- Codemode path remains opt-in; default harness behavior remains strict baseline `strategy=tool_call`.
|
|
44
|
+
|
|
45
|
+
## [0.1.5] - 2026-02-15
|
|
46
|
+
|
|
47
|
+
Initial public release of **RLM Code**.
|
|
48
|
+
|
|
49
|
+
### Added
|
|
50
|
+
- Unified Textual TUI with tabs for **RLM**, **Files**, **Details**, **Shell**, and **Research**.
|
|
51
|
+
- Recursive execution engine with multiple patterns: **pure RLM**, **harness/code-agent**, and direct LLM flows.
|
|
52
|
+
- Research workflows: run tracking, trajectory capture, replay, benchmark presets, compare/report flows.
|
|
53
|
+
- Sandbox runtime layer (**Superbox**) with profile-driven runtime selection and fallback orchestration.
|
|
54
|
+
- Secure runtime options including Docker and Monty, plus pluggable runtime adapters.
|
|
55
|
+
- LLM integrations for cloud and local model routes, including BYOK workflows and ACP connectivity.
|
|
56
|
+
- Coding harness with optional MCP tool integration for local/BYOK development workflows.
|
|
57
|
+
- Framework adapter surface for RLM-style integrations (including DSPy-native and ADK-oriented paths).
|
|
58
|
+
- Observability integrations (MLflow, LangFuse, Logfire, LangSmith, OpenTelemetry) via sink architecture.
|
|
59
|
+
- Documentation site (MkDocs Material) with onboarding, CLI, TUI, sandbox, integrations, and benchmark guides.
|
|
60
|
+
|
|
61
|
+
### Changed
|
|
62
|
+
- Project identity standardized as **RLM Code** (legacy inherited naming removed from repository-facing surfaces).
|
|
63
|
+
- Packaging and project metadata prepared for open-source release.
|
|
64
|
+
- License updated to **Apache-2.0**.
|
|
65
|
+
|
|
66
|
+
### Security
|
|
67
|
+
- Safer sandbox-first runtime guidance in docs and configuration defaults.
|
|
68
|
+
- Unsafe local `exec` usage preserved only as an explicit, opt-in path for advanced development scenarios.
|
|
69
|
+
|
|
70
|
+
[0.1.5]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.5
|
|
71
|
+
[0.1.6]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.6
|
|
72
|
+
[0.1.7]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.7
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rlm-code
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems
|
|
5
5
|
Project-URL: Homepage, https://github.com/SuperagenticAI/rlm-code
|
|
6
6
|
Project-URL: Documentation, https://superagenticai.github.io/rlm-code/
|
|
@@ -99,20 +99,18 @@ Description-Content-Type: text/markdown
|
|
|
99
99
|
</a>
|
|
100
100
|
</p>
|
|
101
101
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
<a href="https://github.com/SuperagenticAI/rlm-code/pulls"><img alt="GitHub Pull Requests" src="https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code"></a>
|
|
115
|
-
</p>
|
|
102
|
+
[](https://pypi.org/project/rlm-code/)
|
|
103
|
+
[](https://pypi.org/project/rlm-code/)
|
|
104
|
+
[](https://pypi.org/project/rlm-code/)
|
|
105
|
+
[](https://pypi.org/project/rlm-code/)
|
|
106
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml)
|
|
107
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml)
|
|
108
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml)
|
|
109
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml)
|
|
110
|
+
[](https://superagenticai.github.io/rlm-code/)
|
|
111
|
+
[](https://github.com/SuperagenticAI/rlm-code/stargazers)
|
|
112
|
+
[](https://github.com/SuperagenticAI/rlm-code/issues)
|
|
113
|
+
[](https://github.com/SuperagenticAI/rlm-code/pulls)
|
|
116
114
|
|
|
117
115
|
**Run LLM-powered agents in a REPL loop, benchmark them, and compare results.**
|
|
118
116
|
|
|
@@ -120,6 +118,34 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
|
|
|
120
118
|
|
|
121
119
|
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
|
|
122
120
|
|
|
121
|
+
## Release v0.1.7
|
|
122
|
+
|
|
123
|
+
This release adds HALO-style trace analysis as a new RLM environment.
|
|
124
|
+
|
|
125
|
+
- New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
|
|
126
|
+
- Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
|
|
127
|
+
- Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
|
|
128
|
+
- `/rlm` help/docs updated for `env=trace_analysis`
|
|
129
|
+
- Dedicated trace analysis docs under the Core Engine section
|
|
130
|
+
|
|
131
|
+
Example:
|
|
132
|
+
|
|
133
|
+
```text
|
|
134
|
+
/rlm run "Find systemic harness failures trace=./traces.jsonl" env=trace_analysis steps=6
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Documentation
|
|
138
|
+
|
|
139
|
+
<p align="center">
|
|
140
|
+
<a href="https://superagenticai.github.io/rlm-code/">
|
|
141
|
+
<img alt="Read the RLM Code Docs" src="https://img.shields.io/badge/Read%20the%20Docs-RLM%20Code-ff7a18?style=for-the-badge&logo=readthedocs&logoColor=white">
|
|
142
|
+
</a>
|
|
143
|
+
</p>
|
|
144
|
+
|
|
145
|
+
<p align="center">
|
|
146
|
+
<a href="https://superagenticai.github.io/rlm-code/"><strong>Open the full documentation</strong></a>
|
|
147
|
+
</p>
|
|
148
|
+
|
|
123
149
|
## Install
|
|
124
150
|
|
|
125
151
|
```bash
|
|
@@ -261,6 +287,62 @@ Notes:
|
|
|
261
287
|
- In Local/BYOK connection modes, likely coding prompts in chat can auto-route to harness.
|
|
262
288
|
- In ACP mode, auto-routing is intentionally off; use `/harness run ...` explicitly.
|
|
263
289
|
|
|
290
|
+
### 8. CodeMode with UTCP and Cloudflare MCP
|
|
291
|
+
|
|
292
|
+
Use these server entries in your project `rlm_config.yaml`:
|
|
293
|
+
|
|
294
|
+
```yaml
|
|
295
|
+
mcp_servers:
|
|
296
|
+
utcp-codemode:
|
|
297
|
+
name: utcp-codemode
|
|
298
|
+
description: "Local CodeMode MCP bridge"
|
|
299
|
+
enabled: true
|
|
300
|
+
auto_connect: false
|
|
301
|
+
timeout_seconds: 30
|
|
302
|
+
retry_attempts: 3
|
|
303
|
+
transport:
|
|
304
|
+
type: stdio
|
|
305
|
+
command: npx
|
|
306
|
+
args:
|
|
307
|
+
- "@utcp/code-mode-mcp"
|
|
308
|
+
|
|
309
|
+
cloudflare-codemode:
|
|
310
|
+
name: cloudflare-codemode
|
|
311
|
+
description: "Cloudflare MCP via remote bridge"
|
|
312
|
+
enabled: true
|
|
313
|
+
auto_connect: false
|
|
314
|
+
timeout_seconds: 30
|
|
315
|
+
retry_attempts: 3
|
|
316
|
+
transport:
|
|
317
|
+
type: stdio
|
|
318
|
+
command: npx
|
|
319
|
+
args:
|
|
320
|
+
- "mcp-remote"
|
|
321
|
+
- "https://mcp.cloudflare.com/mcp"
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
UTCP path (native CodeMode in current release):
|
|
325
|
+
|
|
326
|
+
```text
|
|
327
|
+
/mcp-connect utcp-codemode
|
|
328
|
+
/mcp-tools utcp-codemode
|
|
329
|
+
/harness run "analyze this repo, find TODO/FIXME, and create report.json" steps=3 mcp=on strategy=codemode mcp_server=utcp-codemode
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
Cloudflare path (recommended strategy today):
|
|
333
|
+
|
|
334
|
+
```text
|
|
335
|
+
/mcp-connect cloudflare-codemode
|
|
336
|
+
/mcp-tools cloudflare-codemode
|
|
337
|
+
/harness run "list available tools and run one safe read-only action, then summarize in 3 bullets" steps=3 mcp=on strategy=tool_call mcp_server=cloudflare-codemode
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
Notes:
|
|
341
|
+
|
|
342
|
+
- On first Cloudflare connect, `mcp-remote` may ask for interactive authentication.
|
|
343
|
+
- In this release, `strategy=codemode` expects the `search_tools` + `call_tool_chain` bridge contract.
|
|
344
|
+
- If a remote MCP server exposes a different tool contract, use `strategy=tool_call`.
|
|
345
|
+
|
|
264
346
|
## How the RLM Loop Works
|
|
265
347
|
|
|
266
348
|
Traditional LLM usage: paste your document into the prompt, ask a question, hope the model doesn't lose details in the middle.
|
|
@@ -399,7 +481,7 @@ rlm_code/
|
|
|
399
481
|
harness/ # Tool-using coding harness (/harness)
|
|
400
482
|
```
|
|
401
483
|
|
|
402
|
-
##
|
|
484
|
+
## Resources
|
|
403
485
|
|
|
404
486
|
Full docs: https://superagenticai.github.io/rlm-code/
|
|
405
487
|
|
|
@@ -6,20 +6,18 @@
|
|
|
6
6
|
</a>
|
|
7
7
|
</p>
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
<a href="https://github.com/SuperagenticAI/rlm-code/pulls"><img alt="GitHub Pull Requests" src="https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code"></a>
|
|
22
|
-
</p>
|
|
9
|
+
[](https://pypi.org/project/rlm-code/)
|
|
10
|
+
[](https://pypi.org/project/rlm-code/)
|
|
11
|
+
[](https://pypi.org/project/rlm-code/)
|
|
12
|
+
[](https://pypi.org/project/rlm-code/)
|
|
13
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml)
|
|
14
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml)
|
|
15
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml)
|
|
16
|
+
[](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml)
|
|
17
|
+
[](https://superagenticai.github.io/rlm-code/)
|
|
18
|
+
[](https://github.com/SuperagenticAI/rlm-code/stargazers)
|
|
19
|
+
[](https://github.com/SuperagenticAI/rlm-code/issues)
|
|
20
|
+
[](https://github.com/SuperagenticAI/rlm-code/pulls)
|
|
23
21
|
|
|
24
22
|
**Run LLM-powered agents in a REPL loop, benchmark them, and compare results.**
|
|
25
23
|
|
|
@@ -27,6 +25,34 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
|
|
|
27
25
|
|
|
28
26
|
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
|
|
29
27
|
|
|
28
|
+
## Release v0.1.7
|
|
29
|
+
|
|
30
|
+
This release adds HALO-style trace analysis as a new RLM environment.
|
|
31
|
+
|
|
32
|
+
- New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
|
|
33
|
+
- Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
|
|
34
|
+
- Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
|
|
35
|
+
- `/rlm` help/docs updated for `env=trace_analysis`
|
|
36
|
+
- Dedicated trace analysis docs under the Core Engine section
|
|
37
|
+
|
|
38
|
+
Example:
|
|
39
|
+
|
|
40
|
+
```text
|
|
41
|
+
/rlm run "Find systemic harness failures trace=./traces.jsonl" env=trace_analysis steps=6
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Documentation
|
|
45
|
+
|
|
46
|
+
<p align="center">
|
|
47
|
+
<a href="https://superagenticai.github.io/rlm-code/">
|
|
48
|
+
<img alt="Read the RLM Code Docs" src="https://img.shields.io/badge/Read%20the%20Docs-RLM%20Code-ff7a18?style=for-the-badge&logo=readthedocs&logoColor=white">
|
|
49
|
+
</a>
|
|
50
|
+
</p>
|
|
51
|
+
|
|
52
|
+
<p align="center">
|
|
53
|
+
<a href="https://superagenticai.github.io/rlm-code/"><strong>Open the full documentation</strong></a>
|
|
54
|
+
</p>
|
|
55
|
+
|
|
30
56
|
## Install
|
|
31
57
|
|
|
32
58
|
```bash
|
|
@@ -168,6 +194,62 @@ Notes:
|
|
|
168
194
|
- In Local/BYOK connection modes, likely coding prompts in chat can auto-route to harness.
|
|
169
195
|
- In ACP mode, auto-routing is intentionally off; use `/harness run ...` explicitly.
|
|
170
196
|
|
|
197
|
+
### 8. CodeMode with UTCP and Cloudflare MCP
|
|
198
|
+
|
|
199
|
+
Use these server entries in your project `rlm_config.yaml`:
|
|
200
|
+
|
|
201
|
+
```yaml
|
|
202
|
+
mcp_servers:
|
|
203
|
+
utcp-codemode:
|
|
204
|
+
name: utcp-codemode
|
|
205
|
+
description: "Local CodeMode MCP bridge"
|
|
206
|
+
enabled: true
|
|
207
|
+
auto_connect: false
|
|
208
|
+
timeout_seconds: 30
|
|
209
|
+
retry_attempts: 3
|
|
210
|
+
transport:
|
|
211
|
+
type: stdio
|
|
212
|
+
command: npx
|
|
213
|
+
args:
|
|
214
|
+
- "@utcp/code-mode-mcp"
|
|
215
|
+
|
|
216
|
+
cloudflare-codemode:
|
|
217
|
+
name: cloudflare-codemode
|
|
218
|
+
description: "Cloudflare MCP via remote bridge"
|
|
219
|
+
enabled: true
|
|
220
|
+
auto_connect: false
|
|
221
|
+
timeout_seconds: 30
|
|
222
|
+
retry_attempts: 3
|
|
223
|
+
transport:
|
|
224
|
+
type: stdio
|
|
225
|
+
command: npx
|
|
226
|
+
args:
|
|
227
|
+
- "mcp-remote"
|
|
228
|
+
- "https://mcp.cloudflare.com/mcp"
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
UTCP path (native CodeMode in current release):
|
|
232
|
+
|
|
233
|
+
```text
|
|
234
|
+
/mcp-connect utcp-codemode
|
|
235
|
+
/mcp-tools utcp-codemode
|
|
236
|
+
/harness run "analyze this repo, find TODO/FIXME, and create report.json" steps=3 mcp=on strategy=codemode mcp_server=utcp-codemode
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
Cloudflare path (recommended strategy today):
|
|
240
|
+
|
|
241
|
+
```text
|
|
242
|
+
/mcp-connect cloudflare-codemode
|
|
243
|
+
/mcp-tools cloudflare-codemode
|
|
244
|
+
/harness run "list available tools and run one safe read-only action, then summarize in 3 bullets" steps=3 mcp=on strategy=tool_call mcp_server=cloudflare-codemode
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
Notes:
|
|
248
|
+
|
|
249
|
+
- On first Cloudflare connect, `mcp-remote` may ask for interactive authentication.
|
|
250
|
+
- In this release, `strategy=codemode` expects the `search_tools` + `call_tool_chain` bridge contract.
|
|
251
|
+
- If a remote MCP server exposes a different tool contract, use `strategy=tool_call`.
|
|
252
|
+
|
|
171
253
|
## How the RLM Loop Works
|
|
172
254
|
|
|
173
255
|
Traditional LLM usage: paste your document into the prompt, ask a question, hope the model doesn't lose details in the middle.
|
|
@@ -306,7 +388,7 @@ rlm_code/
|
|
|
306
388
|
harness/ # Tool-using coding harness (/harness)
|
|
307
389
|
```
|
|
308
390
|
|
|
309
|
-
##
|
|
391
|
+
## Resources
|
|
310
392
|
|
|
311
393
|
Full docs: https://superagenticai.github.io/rlm-code/
|
|
312
394
|
|
|
@@ -112,6 +112,7 @@ class SlashCommandHandler:
|
|
|
112
112
|
self.rlm_runner = RLMRunner(
|
|
113
113
|
llm_connector=self.llm_connector,
|
|
114
114
|
execution_engine=self.execution_engine,
|
|
115
|
+
mcp_manager=self.mcp_manager,
|
|
115
116
|
reward_profile=reward_profile,
|
|
116
117
|
benchmark_pack_paths=benchmark_pack_paths,
|
|
117
118
|
)
|
|
@@ -1442,7 +1443,7 @@ class SlashCommandHandler:
|
|
|
1442
1443
|
Usage:
|
|
1443
1444
|
/harness tools [mcp=on|off]
|
|
1444
1445
|
/harness doctor
|
|
1445
|
-
/harness run <task> [steps=N] [mcp=on|off] [tools=name[,name2]]
|
|
1446
|
+
/harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] [strategy=tool_call|codemode] [tools=name[,name2]]
|
|
1446
1447
|
"""
|
|
1447
1448
|
if not args or args[0].lower() in {"help", "--help"}:
|
|
1448
1449
|
console.print()
|
|
@@ -1450,7 +1451,8 @@ class SlashCommandHandler:
|
|
|
1450
1451
|
console.print(" [yellow]/harness tools [mcp=on|off][/yellow]")
|
|
1451
1452
|
console.print(" [yellow]/harness doctor[/yellow]")
|
|
1452
1453
|
console.print(
|
|
1453
|
-
" [yellow]/harness run <task> [steps=N] [mcp=on|off] [
|
|
1454
|
+
" [yellow]/harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] "
|
|
1455
|
+
"[strategy=tool_call|codemode] [tools=name[,name2]][/yellow]"
|
|
1454
1456
|
)
|
|
1455
1457
|
console.print()
|
|
1456
1458
|
return
|
|
@@ -1555,6 +1557,8 @@ class SlashCommandHandler:
|
|
|
1555
1557
|
include_mcp = True
|
|
1556
1558
|
max_steps = 10
|
|
1557
1559
|
allowlist: list[str] | None = None
|
|
1560
|
+
strategy = "tool_call"
|
|
1561
|
+
mcp_server: str | None = None
|
|
1558
1562
|
task_tokens: list[str] = []
|
|
1559
1563
|
|
|
1560
1564
|
for token in args[1:]:
|
|
@@ -1568,6 +1572,16 @@ class SlashCommandHandler:
|
|
|
1568
1572
|
elif lowered.startswith("mcp="):
|
|
1569
1573
|
value = token.split("=", 1)[1].strip().lower()
|
|
1570
1574
|
include_mcp = value not in {"off", "false", "0", "no"}
|
|
1575
|
+
elif lowered.startswith("mcp_server="):
|
|
1576
|
+
mcp_server = token.split("=", 1)[1].strip() or None
|
|
1577
|
+
elif lowered.startswith("strategy="):
|
|
1578
|
+
raw_strategy = token.split("=", 1)[1].strip().lower().replace("-", "_")
|
|
1579
|
+
if raw_strategy not in {"tool_call", "codemode"}:
|
|
1580
|
+
show_error_message(
|
|
1581
|
+
"Invalid strategy value. Use strategy=tool_call|codemode."
|
|
1582
|
+
)
|
|
1583
|
+
return
|
|
1584
|
+
strategy = raw_strategy
|
|
1571
1585
|
elif lowered.startswith("tools="):
|
|
1572
1586
|
raw = token.split("=", 1)[1].strip()
|
|
1573
1587
|
parsed = [part.strip() for part in raw.split(",") if part.strip()]
|
|
@@ -1578,15 +1592,25 @@ class SlashCommandHandler:
|
|
|
1578
1592
|
task = " ".join(task_tokens).strip()
|
|
1579
1593
|
if not task:
|
|
1580
1594
|
show_error_message(
|
|
1581
|
-
"Usage: /harness run <task> [steps=N] [mcp=on|off] [
|
|
1595
|
+
"Usage: /harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] "
|
|
1596
|
+
"[strategy=tool_call|codemode] [tools=name[,name2]]"
|
|
1582
1597
|
)
|
|
1583
1598
|
return
|
|
1599
|
+
if strategy == "codemode" and not include_mcp:
|
|
1600
|
+
show_warning_message("strategy=codemode requires mcp=on. Enabling MCP.")
|
|
1601
|
+
include_mcp = True
|
|
1602
|
+
if strategy == "codemode" and allowlist:
|
|
1603
|
+
show_warning_message("tools=... allowlist is ignored for strategy=codemode.")
|
|
1604
|
+
allowlist = None
|
|
1584
1605
|
|
|
1585
1606
|
console.print()
|
|
1586
1607
|
console.print("[bold cyan]🛠 Running Harness[/bold cyan]")
|
|
1587
1608
|
console.print(f" Task: [cyan]{task}[/cyan]")
|
|
1588
1609
|
console.print(f" Max steps: [cyan]{max_steps}[/cyan]")
|
|
1589
1610
|
console.print(f" MCP tools: [cyan]{'on' if include_mcp else 'off'}[/cyan]")
|
|
1611
|
+
console.print(f" Strategy: [cyan]{strategy}[/cyan]")
|
|
1612
|
+
if mcp_server:
|
|
1613
|
+
console.print(f" MCP server: [cyan]{mcp_server}[/cyan]")
|
|
1590
1614
|
if allowlist:
|
|
1591
1615
|
console.print(f" Tool allowlist: [cyan]{', '.join(allowlist)}[/cyan]")
|
|
1592
1616
|
console.print()
|
|
@@ -1596,6 +1620,8 @@ class SlashCommandHandler:
|
|
|
1596
1620
|
max_steps=max_steps,
|
|
1597
1621
|
include_mcp=include_mcp,
|
|
1598
1622
|
tool_allowlist=allowlist,
|
|
1623
|
+
strategy=strategy,
|
|
1624
|
+
mcp_server=mcp_server,
|
|
1599
1625
|
)
|
|
1600
1626
|
|
|
1601
1627
|
self.current_context["harness_last_response"] = result.final_response
|
|
@@ -1658,8 +1684,8 @@ class SlashCommandHandler:
|
|
|
1658
1684
|
Manage RLM runs.
|
|
1659
1685
|
|
|
1660
1686
|
Usage:
|
|
1661
|
-
/rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
|
|
1662
|
-
/rlm bench [list|preset=name] [mode=native|harness|direct-llm] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
|
|
1687
|
+
/rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm|trace_analysis] [sub=provider/model]
|
|
1688
|
+
/rlm bench [list|preset=name] [mode=native|harness|direct-llm] [strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
|
|
1663
1689
|
/rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N]
|
|
1664
1690
|
/rlm bench validate [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] [--json]
|
|
1665
1691
|
/rlm bench report [candidate=<id|path|latest>] [baseline=<id|path|previous>] [format=markdown|csv|json] [output=path]
|
|
@@ -1670,8 +1696,8 @@ class SlashCommandHandler:
|
|
|
1670
1696
|
/rlm status [run_id]
|
|
1671
1697
|
/rlm abort [run_id|all]
|
|
1672
1698
|
/rlm replay [run_id|latest]
|
|
1673
|
-
/rlm doctor [env=generic|dspy|pure_rlm] [--json]
|
|
1674
|
-
/rlm chat <message> [session=name] [env=generic|dspy|pure_rlm] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [sub=provider/model]
|
|
1699
|
+
/rlm doctor [env=generic|dspy|pure_rlm|trace_analysis] [--json]
|
|
1700
|
+
/rlm chat <message> [session=name] [env=generic|dspy|pure_rlm|trace_analysis] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [sub=provider/model]
|
|
1675
1701
|
/rlm chat status [session=name]
|
|
1676
1702
|
/rlm chat reset [session=name]
|
|
1677
1703
|
/rlm observability
|
|
@@ -1682,13 +1708,14 @@ class SlashCommandHandler:
|
|
|
1682
1708
|
console.print("[bold cyan]🧠 RLM Commands[/bold cyan]")
|
|
1683
1709
|
console.print(
|
|
1684
1710
|
" [yellow]/rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] "
|
|
1685
|
-
f"[parallel=N] [budget=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] "
|
|
1711
|
+
f"[parallel=N] [budget=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm|trace_analysis] "
|
|
1686
1712
|
"[sub=provider/model][/yellow]"
|
|
1687
1713
|
)
|
|
1688
1714
|
console.print(
|
|
1689
1715
|
" [yellow]/rlm bench [list|preset=name] [mode=native|harness|direct-llm] "
|
|
1716
|
+
"[strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] "
|
|
1690
1717
|
"[pack=path[,path2]] [limit=N] [steps=N] "
|
|
1691
|
-
f"[timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] [sub=provider/model][/yellow]"
|
|
1718
|
+
f"[timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm|trace_analysis] [sub=provider/model][/yellow]"
|
|
1692
1719
|
)
|
|
1693
1720
|
console.print(
|
|
1694
1721
|
" [yellow]/rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] "
|
|
@@ -1714,9 +1741,9 @@ class SlashCommandHandler:
|
|
|
1714
1741
|
console.print(" [yellow]/rlm status [run_id][/yellow]")
|
|
1715
1742
|
console.print(" [yellow]/rlm abort [run_id|all][/yellow]")
|
|
1716
1743
|
console.print(" [yellow]/rlm replay [run_id|latest][/yellow]")
|
|
1717
|
-
console.print(" [yellow]/rlm doctor [env=generic|dspy|pure_rlm] [--json][/yellow]")
|
|
1744
|
+
console.print(" [yellow]/rlm doctor [env=generic|dspy|pure_rlm|trace_analysis] [--json][/yellow]")
|
|
1718
1745
|
console.print(
|
|
1719
|
-
" [yellow]/rlm chat <message> [session=name] [env=generic|dspy|pure_rlm] [branch=N] [depth=N] "
|
|
1746
|
+
" [yellow]/rlm chat <message> [session=name] [env=generic|dspy|pure_rlm|trace_analysis] [branch=N] [depth=N] "
|
|
1720
1747
|
f"[children=N] [parallel=N] [budget=N] [framework={framework_opts}] "
|
|
1721
1748
|
"[sub=provider/model][/yellow]"
|
|
1722
1749
|
)
|
|
@@ -2108,7 +2135,7 @@ class SlashCommandHandler:
|
|
|
2108
2135
|
task = " ".join(task_tokens).strip()
|
|
2109
2136
|
if not task:
|
|
2110
2137
|
show_error_message(
|
|
2111
|
-
"Usage: /rlm run <task> [steps=N] [timeout=N] [env=generic|dspy|pure_rlm] "
|
|
2138
|
+
"Usage: /rlm run <task> [steps=N] [timeout=N] [env=generic|dspy|pure_rlm|trace_analysis] "
|
|
2112
2139
|
"[depth=N] [children=N] [parallel=N] [budget=N] "
|
|
2113
2140
|
f"[framework={framework_opts}] "
|
|
2114
2141
|
"[branch=N] [sub=provider/model]"
|
|
@@ -2521,6 +2548,9 @@ class SlashCommandHandler:
|
|
|
2521
2548
|
environment: str | None = None
|
|
2522
2549
|
sub_model: str | None = None
|
|
2523
2550
|
sub_provider: str | None = None
|
|
2551
|
+
include_mcp = False
|
|
2552
|
+
mcp_server: str | None = None
|
|
2553
|
+
harness_strategy = "tool_call"
|
|
2524
2554
|
|
|
2525
2555
|
for token in args[1:]:
|
|
2526
2556
|
lowered = token.lower()
|
|
@@ -2537,6 +2567,19 @@ class SlashCommandHandler:
|
|
|
2537
2567
|
)
|
|
2538
2568
|
return
|
|
2539
2569
|
mode = resolved_mode
|
|
2570
|
+
elif lowered.startswith("mcp="):
|
|
2571
|
+
value = token.split("=", 1)[1].strip().lower()
|
|
2572
|
+
include_mcp = value not in {"off", "false", "0", "no"}
|
|
2573
|
+
elif lowered.startswith("strategy="):
|
|
2574
|
+
strategy_token = token.split("=", 1)[1].strip().lower().replace("-", "_")
|
|
2575
|
+
if strategy_token not in {"tool_call", "codemode"}:
|
|
2576
|
+
show_error_message(
|
|
2577
|
+
"Invalid strategy value. Use strategy=tool_call|codemode."
|
|
2578
|
+
)
|
|
2579
|
+
return
|
|
2580
|
+
harness_strategy = strategy_token
|
|
2581
|
+
elif lowered.startswith("mcp_server="):
|
|
2582
|
+
mcp_server = token.split("=", 1)[1].strip() or None
|
|
2540
2583
|
elif lowered.startswith("pack="):
|
|
2541
2584
|
raw_paths = token.split("=", 1)[1].strip()
|
|
2542
2585
|
if not raw_paths:
|
|
@@ -2593,8 +2636,10 @@ class SlashCommandHandler:
|
|
|
2593
2636
|
else:
|
|
2594
2637
|
show_error_message(
|
|
2595
2638
|
"Usage: /rlm bench [list|preset=name] [mode=native|harness|direct-llm] "
|
|
2639
|
+
"[strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] "
|
|
2596
2640
|
"[pack=path[,path2]] [limit=N] "
|
|
2597
|
-
f"[steps=N] [timeout=N] [branch=N] [framework={framework_opts}]
|
|
2641
|
+
f"[steps=N] [timeout=N] [branch=N] [framework={framework_opts}] "
|
|
2642
|
+
"[env=generic|dspy|pure_rlm] [sub=provider/model]\n"
|
|
2598
2643
|
" /rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] ...\n"
|
|
2599
2644
|
" /rlm bench validate [candidate=<id|path|latest>] [baseline=<id|path|previous>] ...\n"
|
|
2600
2645
|
" /rlm bench report [candidate=<id|path|latest>] [baseline=<id|path|previous>] "
|
|
@@ -2602,6 +2647,30 @@ class SlashCommandHandler:
|
|
|
2602
2647
|
)
|
|
2603
2648
|
return
|
|
2604
2649
|
|
|
2650
|
+
if mode == "harness" and harness_strategy == "codemode" and not include_mcp:
|
|
2651
|
+
show_warning_message("strategy=codemode requires mcp=on. Enabling MCP.")
|
|
2652
|
+
include_mcp = True
|
|
2653
|
+
|
|
2654
|
+
if mode != "harness" and include_mcp:
|
|
2655
|
+
show_warning_message("mcp=on is only used for mode=harness. Ignoring MCP settings.")
|
|
2656
|
+
include_mcp = False
|
|
2657
|
+
mcp_server = None
|
|
2658
|
+
elif mode != "harness" and mcp_server:
|
|
2659
|
+
show_warning_message(
|
|
2660
|
+
"mcp_server is only used for mode=harness with mcp=on. Ignoring."
|
|
2661
|
+
)
|
|
2662
|
+
mcp_server = None
|
|
2663
|
+
elif mode == "harness" and mcp_server and not include_mcp:
|
|
2664
|
+
show_warning_message(
|
|
2665
|
+
"mcp_server provided but mcp=off. MCP server filter will be ignored."
|
|
2666
|
+
)
|
|
2667
|
+
mcp_server = None
|
|
2668
|
+
if mode != "harness" and harness_strategy != "tool_call":
|
|
2669
|
+
show_warning_message(
|
|
2670
|
+
"strategy is only used for mode=harness. Resetting to tool_call."
|
|
2671
|
+
)
|
|
2672
|
+
harness_strategy = "tool_call"
|
|
2673
|
+
|
|
2605
2674
|
if list_only:
|
|
2606
2675
|
try:
|
|
2607
2676
|
rows = self.rlm_runner.benchmark_presets(pack_paths=pack_paths_override)
|
|
@@ -2681,6 +2750,11 @@ class SlashCommandHandler:
|
|
|
2681
2750
|
if timeout is not None:
|
|
2682
2751
|
console.print(f" Override timeout: [cyan]{timeout}s[/cyan]")
|
|
2683
2752
|
console.print(f" Branch width: [cyan]{branch_width}[/cyan]")
|
|
2753
|
+
if mode == "harness":
|
|
2754
|
+
console.print(f" Harness strategy: [cyan]{harness_strategy}[/cyan]")
|
|
2755
|
+
console.print(f" Harness MCP: [cyan]{'on' if include_mcp else 'off'}[/cyan]")
|
|
2756
|
+
if include_mcp and mcp_server:
|
|
2757
|
+
console.print(f" Harness MCP server: [cyan]{mcp_server}[/cyan]")
|
|
2684
2758
|
if pack_paths_override:
|
|
2685
2759
|
console.print(f" Benchmark packs: [cyan]{', '.join(pack_paths_override)}[/cyan]")
|
|
2686
2760
|
if environment:
|
|
@@ -2704,6 +2778,9 @@ class SlashCommandHandler:
|
|
|
2704
2778
|
branch_width=branch_width,
|
|
2705
2779
|
sub_model=sub_model,
|
|
2706
2780
|
sub_provider=sub_provider,
|
|
2781
|
+
include_mcp=include_mcp,
|
|
2782
|
+
mcp_server=mcp_server,
|
|
2783
|
+
harness_strategy=harness_strategy,
|
|
2707
2784
|
pack_paths=pack_paths_override,
|
|
2708
2785
|
)
|
|
2709
2786
|
except ValueError as exc:
|
|
@@ -4413,7 +4490,7 @@ class SlashCommandHandler:
|
|
|
4413
4490
|
|
|
4414
4491
|
[bold magenta]RLM Workflows:[/bold magenta]
|
|
4415
4492
|
[yellow]/rlm run[/yellow] <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run an RLM coding episode
|
|
4416
|
-
[yellow]/rlm bench[/yellow] [list|preset=name] [mode=native|harness|direct-llm] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run benchmark preset
|
|
4493
|
+
[yellow]/rlm bench[/yellow] [list|preset=name] [mode=native|harness|direct-llm] [strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run benchmark preset
|
|
4417
4494
|
[yellow]/rlm bench compare[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] - Gate regressions
|
|
4418
4495
|
[yellow]/rlm bench validate[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] [--json] - CI-style gate output
|
|
4419
4496
|
[yellow]/rlm bench report[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [format=markdown|csv|json] [output=path] - Export compare report
|
|
@@ -4431,7 +4508,7 @@ class SlashCommandHandler:
|
|
|
4431
4508
|
[yellow]/rlm observability[/yellow] - Show local/MLflow observability sink status
|
|
4432
4509
|
[yellow]/harness tools[/yellow] [mcp=on|off] - List coding harness tools (local + MCP)
|
|
4433
4510
|
[yellow]/harness doctor[/yellow] - Show harness tool coverage report
|
|
4434
|
-
[yellow]/harness run[/yellow] <task> [steps=N] [mcp=on|off] [tools=name[,name2]] - Run tool-using coding harness
|
|
4511
|
+
[yellow]/harness run[/yellow] <task> [steps=N] [mcp=on|off] [mcp_server=name] [strategy=tool_call|codemode] [tools=name[,name2]] - Run tool-using coding harness
|
|
4435
4512
|
|
|
4436
4513
|
[bold magenta]Optimization (GEPA):[/bold magenta]
|
|
4437
4514
|
[yellow]/optimize-start[/yellow] [budget] - Start GEPA optimization workflow
|