rlm-code 0.1.5__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (309) hide show
  1. rlm_code-0.1.7/CHANGELOG.md +72 -0
  2. {rlm_code-0.1.5 → rlm_code-0.1.7}/PKG-INFO +98 -16
  3. {rlm_code-0.1.5 → rlm_code-0.1.7}/README.md +97 -15
  4. {rlm_code-0.1.5 → rlm_code-0.1.7}/pyproject.toml +1 -1
  5. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/__init__.py +1 -1
  6. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/slash_commands.py +92 -15
  7. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/config.py +1 -1
  8. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/harness/registry.py +306 -5
  9. rlm_code-0.1.7/rlm_code/harness/runner.py +710 -0
  10. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/__init__.py +1 -1
  11. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/server/tools.py +1 -0
  12. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/action_planner.py +3 -0
  13. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/benchmark_manager.py +112 -23
  14. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/benchmarks.py +40 -0
  15. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/environments.py +245 -0
  16. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/runner.py +15 -0
  17. rlm_code-0.1.7/rlm_code/sandbox/runtimes/monty_runtime.py +72 -0
  18. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/registry.py +27 -1
  19. rlm_code-0.1.7/rlm_code/traces/__init__.py +6 -0
  20. rlm_code-0.1.7/rlm_code/traces/index.py +170 -0
  21. rlm_code-0.1.7/rlm_code/traces/models.py +103 -0
  22. rlm_code-0.1.7/rlm_code/traces/store.py +221 -0
  23. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_phase3.py +25 -2
  24. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_phase4.py +2 -1
  25. rlm_code-0.1.7/tests/test_harness_registry.py +176 -0
  26. rlm_code-0.1.7/tests/test_harness_runner.py +182 -0
  27. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_provider_registry.py +6 -1
  28. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_rlm_runner.py +97 -0
  29. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_sandbox_runtimes.py +46 -0
  30. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_slash_harness_command.py +41 -5
  31. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_slash_rlm_command.py +62 -0
  32. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_slash_sandbox_command.py +23 -0
  33. rlm_code-0.1.7/tests/test_trace_analysis.py +115 -0
  34. rlm_code-0.1.5/CHANGELOG.md +0 -33
  35. rlm_code-0.1.5/rlm_code/harness/runner.py +0 -288
  36. rlm_code-0.1.5/tests/test_harness_registry.py +0 -46
  37. rlm_code-0.1.5/tests/test_harness_runner.py +0 -64
  38. {rlm_code-0.1.5 → rlm_code-0.1.7}/.gitignore +0 -0
  39. {rlm_code-0.1.5 → rlm_code-0.1.7}/LICENSE +0 -0
  40. {rlm_code-0.1.5 → rlm_code-0.1.7}/NOTICE +0 -0
  41. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/__init__.py +0 -0
  42. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/agent.py +0 -0
  43. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/agents/__init__.py +0 -0
  44. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/agents/rlm_agent.py +0 -0
  45. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/callbacks/__init__.py +0 -0
  46. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/callbacks/code_execution.py +0 -0
  47. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/cli.py +0 -0
  48. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/code_executor.py +0 -0
  49. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/events.py +0 -0
  50. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/__init__.py +0 -0
  51. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/base.py +0 -0
  52. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/lazy.py +0 -0
  53. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/loader.py +0 -0
  54. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/parsers/__init__.py +0 -0
  55. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/parsers/base.py +0 -0
  56. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/parsers/pdf.py +0 -0
  57. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/parsers/text.py +0 -0
  58. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/sources/__init__.py +0 -0
  59. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/sources/base.py +0 -0
  60. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/sources/gcs.py +0 -0
  61. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/files/sources/local.py +0 -0
  62. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/llm.py +0 -0
  63. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/logging/__init__.py +0 -0
  64. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/logging/rlm_logger.py +0 -0
  65. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/logging/verbose.py +0 -0
  66. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/main.py +0 -0
  67. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/prompts.py +0 -0
  68. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/repl/__init__.py +0 -0
  69. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/repl/local_repl.py +0 -0
  70. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/repl/safe_builtins.py +0 -0
  71. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/templates/index.html +0 -0
  72. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/tools/__init__.py +0 -0
  73. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/types.py +0 -0
  74. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/usage.py +0 -0
  75. {rlm_code-0.1.5 → rlm_code-0.1.7}/adk_rlm/web.py +0 -0
  76. {rlm_code-0.1.5 → rlm_code-0.1.7}/eval/packs/README.md +0 -0
  77. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/__main__.py +0 -0
  78. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/__init__.py +0 -0
  79. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/config_command.py +0 -0
  80. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/create_command.py +0 -0
  81. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/demo_command.py +0 -0
  82. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/export_command.py +0 -0
  83. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/init_command.py +0 -0
  84. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/interactive_command.py +0 -0
  85. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/mcp_command.py +0 -0
  86. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/models_command.py +0 -0
  87. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/nl_command_router.py +0 -0
  88. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/optimize_command.py +0 -0
  89. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/commands/run_command.py +0 -0
  90. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/__init__.py +0 -0
  91. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/debug_logger.py +0 -0
  92. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/directory_utils.py +0 -0
  93. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/exceptions.py +0 -0
  94. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/logging.py +0 -0
  95. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/venv_utils.py +0 -0
  96. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/core/version_checker.py +0 -0
  97. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/examples/__init__.py +0 -0
  98. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/examples/phase2_demo.py +0 -0
  99. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/examples/phase3_demo.py +0 -0
  100. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/examples/phase4_demo.py +0 -0
  101. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/examples/pure_rlm_demo.py +0 -0
  102. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/execution/__init__.py +0 -0
  103. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/execution/engine.py +0 -0
  104. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/execution/sandbox.py +0 -0
  105. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/export/__init__.py +0 -0
  106. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/export/handler.py +0 -0
  107. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/export/package_builder.py +0 -0
  108. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/generators/evaluation_generator.py +0 -0
  109. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/generators/gepa_generator.py +0 -0
  110. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/harness/__init__.py +0 -0
  111. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/main.py +0 -0
  112. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/client_manager.py +0 -0
  113. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/config.py +0 -0
  114. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/exceptions.py +0 -0
  115. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/retry.py +0 -0
  116. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/server/__init__.py +0 -0
  117. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/server/rlm_server.py +0 -0
  118. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/session_wrapper.py +0 -0
  119. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/transports/__init__.py +0 -0
  120. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/transports/factory.py +0 -0
  121. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/transports/sse_transport.py +0 -0
  122. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/transports/stdio_transport.py +0 -0
  123. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/transports/websocket_transport.py +0 -0
  124. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/mcp/utils.py +0 -0
  125. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/__init__.py +0 -0
  126. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/cache.py +0 -0
  127. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/code_generator.py +0 -0
  128. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/dspy_reference_loader.py +0 -0
  129. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/llm_connector.py +0 -0
  130. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/model_manager.py +0 -0
  131. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/providers/__init__.py +0 -0
  132. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/providers/acp_discovery.py +0 -0
  133. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/providers/local_discovery.py +0 -0
  134. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/providers/model_catalog.py +0 -0
  135. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/providers/registry.py +0 -0
  136. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/streaming.py +0 -0
  137. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/models/task_collector.py +0 -0
  138. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/optimization/__init__.py +0 -0
  139. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/optimization/data_collector.py +0 -0
  140. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/optimization/executor.py +0 -0
  141. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/optimization/workflow_manager.py +0 -0
  142. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/project/__init__.py +0 -0
  143. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/project/context_manager.py +0 -0
  144. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/project/dspy_md_generator.py +0 -0
  145. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/project/initializer.py +0 -0
  146. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/project/scanner.py +0 -0
  147. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/py.typed +0 -0
  148. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/__init__.py +0 -0
  149. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/approval/__init__.py +0 -0
  150. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/approval/audit.py +0 -0
  151. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/approval/gate.py +0 -0
  152. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/approval/handlers.py +0 -0
  153. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/approval/policy.py +0 -0
  154. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/chat_session.py +0 -0
  155. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/code_interpreter.py +0 -0
  156. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/comparison.py +0 -0
  157. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/config_schema.py +0 -0
  158. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/context_store.py +0 -0
  159. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/delegation.py +0 -0
  160. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/docker_interpreter.py +0 -0
  161. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/events.py +0 -0
  162. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/__init__.py +0 -0
  163. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/adk_rlm_adapter.py +0 -0
  164. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/base.py +0 -0
  165. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/deepagents_adapter.py +0 -0
  166. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/dspy_rlm_adapter.py +0 -0
  167. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/google_adk_adapter.py +0 -0
  168. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/pydantic_ai_adapter.py +0 -0
  169. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/registry.py +0 -0
  170. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/leaderboard.py +0 -0
  171. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/memory_compaction.py +0 -0
  172. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/mock_interpreter.py +0 -0
  173. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/monty_interpreter.py +0 -0
  174. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/observability.py +0 -0
  175. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/observability_sinks.py +0 -0
  176. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/policies/__init__.py +0 -0
  177. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/policies/action_policies.py +0 -0
  178. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/policies/base.py +0 -0
  179. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/policies/compaction_policies.py +0 -0
  180. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/policies/registry.py +0 -0
  181. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/policies/reward_policies.py +0 -0
  182. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/policies/termination_policies.py +0 -0
  183. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/pure_rlm_environment.py +0 -0
  184. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/repl_types.py +0 -0
  185. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/__init__.py +0 -0
  186. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/theme.py +0 -0
  187. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/widgets/__init__.py +0 -0
  188. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/widgets/animated.py +0 -0
  189. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/widgets/panels.py +0 -0
  190. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/session_replay.py +0 -0
  191. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/task_signature.py +0 -0
  192. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/termination.py +0 -0
  193. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/trajectory.py +0 -0
  194. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/rlm/visualizer.py +0 -0
  195. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/__init__.py +0 -0
  196. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/__init__.py +0 -0
  197. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/apple_container_runtime.py +0 -0
  198. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/base.py +0 -0
  199. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/__init__.py +0 -0
  200. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/daytona_runtime.py +0 -0
  201. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/e2b_runtime.py +0 -0
  202. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/modal_runtime.py +0 -0
  203. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/command_runtime.py +0 -0
  204. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/docker_runtime.py +0 -0
  205. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/local_runtime.py +0 -0
  206. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/sandbox/superbox.py +0 -0
  207. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/session/__init__.py +0 -0
  208. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/session/state_manager.py +0 -0
  209. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/.env.example +0 -0
  210. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/adapters.py +0 -0
  211. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/async_streaming.py +0 -0
  212. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/complete_programs.py +0 -0
  213. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/dspy_config_example.yaml +0 -0
  214. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/evaluation.py +0 -0
  215. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/industry_templates.py +0 -0
  216. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/optimizers.py +0 -0
  217. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/retrievers.py +0 -0
  218. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/templates/rlm_benchmarks_example.yaml +0 -0
  219. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/tests/__init__.py +0 -0
  220. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/tests/rlm/__init__.py +0 -0
  221. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/tests/rlm/test_phase2.py +0 -0
  222. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/tests/rlm/test_pure_rlm.py +0 -0
  223. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/__init__.py +0 -0
  224. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/agent_collab_view.py +0 -0
  225. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/animations.py +0 -0
  226. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/conversation.py +0 -0
  227. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/design_system.py +0 -0
  228. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/diff_viewer.py +0 -0
  229. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/notifications.py +0 -0
  230. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/persistent_shell.py +0 -0
  231. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/prompt_widget.py +0 -0
  232. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/prompts.py +0 -0
  233. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/pty_terminal.py +0 -0
  234. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/resizable_divider.py +0 -0
  235. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/thinking_display.py +0 -0
  236. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/tui_app.py +0 -0
  237. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/tui_utils.py +0 -0
  238. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/ui/welcome.py +0 -0
  239. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/__init__.py +0 -0
  240. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/anti_patterns.py +0 -0
  241. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/auto_fixer.py +0 -0
  242. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/best_practices.py +0 -0
  243. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/code_validator.py +0 -0
  244. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/config_validator.py +0 -0
  245. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/exceptions.py +0 -0
  246. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/input_validator.py +0 -0
  247. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/learning_integration.py +0 -0
  248. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/models.py +0 -0
  249. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/module_validator.py +0 -0
  250. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/predictor_validator.py +0 -0
  251. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/quality_scorer.py +0 -0
  252. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/report_generator.py +0 -0
  253. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/security.py +0 -0
  254. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/security_validator.py +0 -0
  255. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/signature_validator.py +0 -0
  256. {rlm_code-0.1.5 → rlm_code-0.1.7}/rlm_code/validation/validator.py +0 -0
  257. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/__init__.py +0 -0
  258. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/conftest.py +0 -0
  259. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/fixtures/rlm_ci_baseline_generic_smoke.json +0 -0
  260. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_adk_rlm_adapter.py +0 -0
  261. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_code_interpreter.py +0 -0
  262. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_deepagents_adapter.py +0 -0
  263. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_dspy_rlm_adapter.py +0 -0
  264. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_extract_fallback.py +0 -0
  265. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_framework_registry_coverage.py +0 -0
  266. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_google_adk_adapter.py +0 -0
  267. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_leaderboard.py +0 -0
  268. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_mock_interpreter.py +0 -0
  269. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_monty_interpreter.py +0 -0
  270. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_observability_sinks.py +0 -0
  271. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_p0_features.py +0 -0
  272. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_pure_rlm_runtime_modes.py +0 -0
  273. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_pydantic_ai_adapter.py +0 -0
  274. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_repl_history.py +0 -0
  275. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_security_hardening.py +0 -0
  276. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_session_replay.py +0 -0
  277. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_submit.py +0 -0
  278. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_task_signature.py +0 -0
  279. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/rlm/test_user_tools.py +0 -0
  280. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_anti_patterns.py +0 -0
  281. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_auto_fixer.py +0 -0
  282. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_cache.py +0 -0
  283. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_execution_engine.py +0 -0
  284. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_export_import.py +0 -0
  285. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_init_command.py +0 -0
  286. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_integration.py +0 -0
  287. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_learning_integration.py +0 -0
  288. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_mcp_utils.py +0 -0
  289. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_module_validator.py +0 -0
  290. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_optimization_workflow.py +0 -0
  291. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_persistent_shell.py +0 -0
  292. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_predictor_validator.py +0 -0
  293. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_project_scanner.py +0 -0
  294. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_prompt_widget.py +0 -0
  295. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_property_validators.py +0 -0
  296. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_provider_discovery.py +0 -0
  297. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_quality_scorer.py +0 -0
  298. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_report_generator.py +0 -0
  299. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_retry.py +0 -0
  300. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_rlm_config.py +0 -0
  301. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_rlm_dspy_environment.py +0 -0
  302. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_rlm_observability.py +0 -0
  303. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_security_validator.py +0 -0
  304. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_session_management.py +0 -0
  305. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_signature_validator.py +0 -0
  306. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_streaming.py +0 -0
  307. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_superbox.py +0 -0
  308. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_tui_utils.py +0 -0
  309. {rlm_code-0.1.5 → rlm_code-0.1.7}/tests/test_validation.py +0 -0
@@ -0,0 +1,72 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.7] - 2026-04-30
9
+
10
+ ### Added
11
+ - HALO-style `trace_analysis` RLM environment for diagnosing agent harness failures from one-span-per-line JSONL traces.
12
+ - Trace sidecar indexing with dataset rollups for trace counts, span counts, error traces, services, models, agents, token totals, and sample trace ids.
13
+ - Bounded trace inspection actions: `get_dataset_overview`, `query_traces`, `count_traces`, `view_trace`, `search_trace`, and `view_spans`.
14
+ - Large-trace safeguards: per-attribute truncation, oversized trace summaries, and higher-cap selected-span reads.
15
+ - Tests for trace indexing, querying, searching, selected-span viewing, and trace environment actions.
16
+ - Trace analysis documentation under the Core Engine docs.
17
+
18
+ ### Changed
19
+ - `/rlm` command help now advertises `env=trace_analysis` for run, chat, and doctor workflows.
20
+
21
+ ## [0.1.6] - 2026-02-20
22
+
23
+ ### Added
24
+ - Harness strategy selector with `tool_call` (default) and opt-in `codemode`.
25
+ - CodeMode execution flow in harness: MCP tool discovery (`search_tools`), typed tool surface prompt, single-program generation, guardrail validation, and MCP chain execution (`call_tool_chain`).
26
+ - Benchmark support for harness strategy comparison with CodeMode telemetry fields (`harness_strategy`, `codemode_chain_calls`, `codemode_search_calls`, `codemode_discovery_calls`, `codemode_guardrail_blocked`).
27
+ - New top-level CodeMode docs section with dedicated pages for quickstart, architecture, guardrails, and evaluation.
28
+ - Release documentation set for CodeMode:
29
+ - quickstart and operator workflow
30
+ - integration architecture and runtime controls
31
+ - provider/bridge separation model (Cloudflare-based, UTCP, custom)
32
+ - CodeMode sandbox responsibility and deployment matrix
33
+ - guardrail policy and safety runbook
34
+ - benchmark evaluation and promotion-gate criteria
35
+
36
+ ### Changed
37
+ - `/harness run` supports `strategy=tool_call|codemode` and `mcp_server=<name>`.
38
+ - `/rlm bench` in `mode=harness` supports `strategy=tool_call|codemode`.
39
+ - Harness and benchmark command handling now auto-enables MCP when `strategy=codemode` is selected.
40
+
41
+ ### Security
42
+ - Added explicit CodeMode guardrail policy documentation with blocked API classes and runtime limit defaults.
43
+ - Codemode path remains opt-in; default harness behavior remains strict baseline `strategy=tool_call`.
44
+
45
+ ## [0.1.5] - 2026-02-15
46
+
47
+ Initial public release of **RLM Code**.
48
+
49
+ ### Added
50
+ - Unified Textual TUI with tabs for **RLM**, **Files**, **Details**, **Shell**, and **Research**.
51
+ - Recursive execution engine with multiple patterns: **pure RLM**, **harness/code-agent**, and direct LLM flows.
52
+ - Research workflows: run tracking, trajectory capture, replay, benchmark presets, compare/report flows.
53
+ - Sandbox runtime layer (**Superbox**) with profile-driven runtime selection and fallback orchestration.
54
+ - Secure runtime options including Docker and Monty, plus pluggable runtime adapters.
55
+ - LLM integrations for cloud and local model routes, including BYOK workflows and ACP connectivity.
56
+ - Coding harness with optional MCP tool integration for local/BYOK development workflows.
57
+ - Framework adapter surface for RLM-style integrations (including DSPy-native and ADK-oriented paths).
58
+ - Observability integrations (MLflow, LangFuse, Logfire, LangSmith, OpenTelemetry) via sink architecture.
59
+ - Documentation site (MkDocs Material) with onboarding, CLI, TUI, sandbox, integrations, and benchmark guides.
60
+
61
+ ### Changed
62
+ - Project identity standardized as **RLM Code** (legacy inherited naming removed from repository-facing surfaces).
63
+ - Packaging and project metadata prepared for open-source release.
64
+ - License updated to **Apache-2.0**.
65
+
66
+ ### Security
67
+ - Safer sandbox-first runtime guidance in docs and configuration defaults.
68
+ - Unsafe local `exec` usage preserved only as an explicit, opt-in path for advanced development scenarios.
69
+
70
+ [0.1.5]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.5
71
+ [0.1.6]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.6
72
+ [0.1.7]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.7
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlm-code
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems
5
5
  Project-URL: Homepage, https://github.com/SuperagenticAI/rlm-code
6
6
  Project-URL: Documentation, https://superagenticai.github.io/rlm-code/
@@ -99,20 +99,18 @@ Description-Content-Type: text/markdown
99
99
  </a>
100
100
  </p>
101
101
 
102
- <p align="center">
103
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Version" src="https://img.shields.io/pypi/v/rlm-code"></a>
104
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Python Versions" src="https://img.shields.io/pypi/pyversions/rlm-code"></a>
105
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Downloads" src="https://img.shields.io/pypi/dm/rlm-code"></a>
106
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Wheel" src="https://img.shields.io/pypi/wheel/rlm-code"></a>
107
- <a href="LICENSE"><img alt="License" src="https://img.shields.io/pypi/l/rlm-code"></a>
108
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml"><img alt="CI" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml/badge.svg"></a>
109
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml"><img alt="Pre-commit" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml/badge.svg"></a>
110
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml"><img alt="Docs Deploy" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml/badge.svg"></a>
111
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml"><img alt="Release" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml/badge.svg"></a>
112
- <a href="https://github.com/SuperagenticAI/rlm-code/stargazers"><img alt="GitHub Stars" src="https://img.shields.io/github/stars/SuperagenticAI/rlm-code?style=social"></a>
113
- <a href="https://github.com/SuperagenticAI/rlm-code/issues"><img alt="GitHub Issues" src="https://img.shields.io/github/issues/SuperagenticAI/rlm-code"></a>
114
- <a href="https://github.com/SuperagenticAI/rlm-code/pulls"><img alt="GitHub Pull Requests" src="https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code"></a>
115
- </p>
102
+ [![PyPI Version](https://img.shields.io/pypi/v/rlm-code.svg)](https://pypi.org/project/rlm-code/)
103
+ [![Python Versions](https://img.shields.io/pypi/pyversions/rlm-code.svg)](https://pypi.org/project/rlm-code/)
104
+ [![PyPI Wheel](https://img.shields.io/pypi/wheel/rlm-code.svg)](https://pypi.org/project/rlm-code/)
105
+ [![License](https://img.shields.io/pypi/l/rlm-code.svg)](https://pypi.org/project/rlm-code/)
106
+ [![CI](https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml)
107
+ [![Pre-commit](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml)
108
+ [![Docs Deploy](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml)
109
+ [![Release](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml)
110
+ [![Docs](https://img.shields.io/badge/Docs-RLM%20Code-ff7a18.svg?logo=readthedocs&logoColor=white)](https://superagenticai.github.io/rlm-code/)
111
+ [![GitHub Stars](https://img.shields.io/github/stars/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/stargazers)
112
+ [![GitHub Issues](https://img.shields.io/github/issues/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/issues)
113
+ [![GitHub Pull Requests](https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/pulls)
116
114
 
117
115
  **Run LLM-powered agents in a REPL loop, benchmark them, and compare results.**
118
116
 
@@ -120,6 +118,34 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
120
118
 
121
119
  RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
122
120
 
121
+ ## Release v0.1.7
122
+
123
+ This release adds HALO-style trace analysis as a new RLM environment.
124
+
125
+ - New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
126
+ - Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
127
+ - Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
128
+ - `/rlm` help/docs updated for `env=trace_analysis`
129
+ - Dedicated trace analysis docs under the Core Engine section
130
+
131
+ Example:
132
+
133
+ ```text
134
+ /rlm run "Find systemic harness failures trace=./traces.jsonl" env=trace_analysis steps=6
135
+ ```
136
+
137
+ ## Documentation
138
+
139
+ <p align="center">
140
+ <a href="https://superagenticai.github.io/rlm-code/">
141
+ <img alt="Read the RLM Code Docs" src="https://img.shields.io/badge/Read%20the%20Docs-RLM%20Code-ff7a18?style=for-the-badge&logo=readthedocs&logoColor=white">
142
+ </a>
143
+ </p>
144
+
145
+ <p align="center">
146
+ <a href="https://superagenticai.github.io/rlm-code/"><strong>Open the full documentation</strong></a>
147
+ </p>
148
+
123
149
  ## Install
124
150
 
125
151
  ```bash
@@ -261,6 +287,62 @@ Notes:
261
287
  - In Local/BYOK connection modes, likely coding prompts in chat can auto-route to harness.
262
288
  - In ACP mode, auto-routing is intentionally off; use `/harness run ...` explicitly.
263
289
 
290
+ ### 8. CodeMode with UTCP and Cloudflare MCP
291
+
292
+ Use these server entries in your project `rlm_config.yaml`:
293
+
294
+ ```yaml
295
+ mcp_servers:
296
+ utcp-codemode:
297
+ name: utcp-codemode
298
+ description: "Local CodeMode MCP bridge"
299
+ enabled: true
300
+ auto_connect: false
301
+ timeout_seconds: 30
302
+ retry_attempts: 3
303
+ transport:
304
+ type: stdio
305
+ command: npx
306
+ args:
307
+ - "@utcp/code-mode-mcp"
308
+
309
+ cloudflare-codemode:
310
+ name: cloudflare-codemode
311
+ description: "Cloudflare MCP via remote bridge"
312
+ enabled: true
313
+ auto_connect: false
314
+ timeout_seconds: 30
315
+ retry_attempts: 3
316
+ transport:
317
+ type: stdio
318
+ command: npx
319
+ args:
320
+ - "mcp-remote"
321
+ - "https://mcp.cloudflare.com/mcp"
322
+ ```
323
+
324
+ UTCP path (native CodeMode in current release):
325
+
326
+ ```text
327
+ /mcp-connect utcp-codemode
328
+ /mcp-tools utcp-codemode
329
+ /harness run "analyze this repo, find TODO/FIXME, and create report.json" steps=3 mcp=on strategy=codemode mcp_server=utcp-codemode
330
+ ```
331
+
332
+ Cloudflare path (recommended strategy today):
333
+
334
+ ```text
335
+ /mcp-connect cloudflare-codemode
336
+ /mcp-tools cloudflare-codemode
337
+ /harness run "list available tools and run one safe read-only action, then summarize in 3 bullets" steps=3 mcp=on strategy=tool_call mcp_server=cloudflare-codemode
338
+ ```
339
+
340
+ Notes:
341
+
342
+ - On first Cloudflare connect, `mcp-remote` may ask for interactive authentication.
343
+ - In this release, `strategy=codemode` expects the `search_tools` + `call_tool_chain` bridge contract.
344
+ - If a remote MCP server exposes a different tool contract, use `strategy=tool_call`.
345
+
264
346
  ## How the RLM Loop Works
265
347
 
266
348
  Traditional LLM usage: paste your document into the prompt, ask a question, hope the model doesn't lose details in the middle.
@@ -399,7 +481,7 @@ rlm_code/
399
481
  harness/ # Tool-using coding harness (/harness)
400
482
  ```
401
483
 
402
- ## Documentation
484
+ ## Resources
403
485
 
404
486
  Full docs: https://superagenticai.github.io/rlm-code/
405
487
 
@@ -6,20 +6,18 @@
6
6
  </a>
7
7
  </p>
8
8
 
9
- <p align="center">
10
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Version" src="https://img.shields.io/pypi/v/rlm-code"></a>
11
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Python Versions" src="https://img.shields.io/pypi/pyversions/rlm-code"></a>
12
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Downloads" src="https://img.shields.io/pypi/dm/rlm-code"></a>
13
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Wheel" src="https://img.shields.io/pypi/wheel/rlm-code"></a>
14
- <a href="LICENSE"><img alt="License" src="https://img.shields.io/pypi/l/rlm-code"></a>
15
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml"><img alt="CI" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml/badge.svg"></a>
16
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml"><img alt="Pre-commit" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml/badge.svg"></a>
17
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml"><img alt="Docs Deploy" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml/badge.svg"></a>
18
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml"><img alt="Release" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml/badge.svg"></a>
19
- <a href="https://github.com/SuperagenticAI/rlm-code/stargazers"><img alt="GitHub Stars" src="https://img.shields.io/github/stars/SuperagenticAI/rlm-code?style=social"></a>
20
- <a href="https://github.com/SuperagenticAI/rlm-code/issues"><img alt="GitHub Issues" src="https://img.shields.io/github/issues/SuperagenticAI/rlm-code"></a>
21
- <a href="https://github.com/SuperagenticAI/rlm-code/pulls"><img alt="GitHub Pull Requests" src="https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code"></a>
22
- </p>
9
+ [![PyPI Version](https://img.shields.io/pypi/v/rlm-code.svg)](https://pypi.org/project/rlm-code/)
10
+ [![Python Versions](https://img.shields.io/pypi/pyversions/rlm-code.svg)](https://pypi.org/project/rlm-code/)
11
+ [![PyPI Wheel](https://img.shields.io/pypi/wheel/rlm-code.svg)](https://pypi.org/project/rlm-code/)
12
+ [![License](https://img.shields.io/pypi/l/rlm-code.svg)](https://pypi.org/project/rlm-code/)
13
+ [![CI](https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml)
14
+ [![Pre-commit](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml)
15
+ [![Docs Deploy](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml)
16
+ [![Release](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml)
17
+ [![Docs](https://img.shields.io/badge/Docs-RLM%20Code-ff7a18.svg?logo=readthedocs&logoColor=white)](https://superagenticai.github.io/rlm-code/)
18
+ [![GitHub Stars](https://img.shields.io/github/stars/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/stargazers)
19
+ [![GitHub Issues](https://img.shields.io/github/issues/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/issues)
20
+ [![GitHub Pull Requests](https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/pulls)
23
21
 
24
22
  **Run LLM-powered agents in a REPL loop, benchmark them, and compare results.**
25
23
 
@@ -27,6 +25,34 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
27
25
 
28
26
  RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
29
27
 
28
+ ## Release v0.1.7
29
+
30
+ This release adds HALO-style trace analysis as a new RLM environment.
31
+
32
+ - New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
33
+ - Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
34
+ - Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
35
+ - `/rlm` help/docs updated for `env=trace_analysis`
36
+ - Dedicated trace analysis docs under the Core Engine section
37
+
38
+ Example:
39
+
40
+ ```text
41
+ /rlm run "Find systemic harness failures trace=./traces.jsonl" env=trace_analysis steps=6
42
+ ```
43
+
44
+ ## Documentation
45
+
46
+ <p align="center">
47
+ <a href="https://superagenticai.github.io/rlm-code/">
48
+ <img alt="Read the RLM Code Docs" src="https://img.shields.io/badge/Read%20the%20Docs-RLM%20Code-ff7a18?style=for-the-badge&logo=readthedocs&logoColor=white">
49
+ </a>
50
+ </p>
51
+
52
+ <p align="center">
53
+ <a href="https://superagenticai.github.io/rlm-code/"><strong>Open the full documentation</strong></a>
54
+ </p>
55
+
30
56
  ## Install
31
57
 
32
58
  ```bash
@@ -168,6 +194,62 @@ Notes:
168
194
  - In Local/BYOK connection modes, likely coding prompts in chat can auto-route to harness.
169
195
  - In ACP mode, auto-routing is intentionally off; use `/harness run ...` explicitly.
170
196
 
197
+ ### 8. CodeMode with UTCP and Cloudflare MCP
198
+
199
+ Use these server entries in your project `rlm_config.yaml`:
200
+
201
+ ```yaml
202
+ mcp_servers:
203
+ utcp-codemode:
204
+ name: utcp-codemode
205
+ description: "Local CodeMode MCP bridge"
206
+ enabled: true
207
+ auto_connect: false
208
+ timeout_seconds: 30
209
+ retry_attempts: 3
210
+ transport:
211
+ type: stdio
212
+ command: npx
213
+ args:
214
+ - "@utcp/code-mode-mcp"
215
+
216
+ cloudflare-codemode:
217
+ name: cloudflare-codemode
218
+ description: "Cloudflare MCP via remote bridge"
219
+ enabled: true
220
+ auto_connect: false
221
+ timeout_seconds: 30
222
+ retry_attempts: 3
223
+ transport:
224
+ type: stdio
225
+ command: npx
226
+ args:
227
+ - "mcp-remote"
228
+ - "https://mcp.cloudflare.com/mcp"
229
+ ```
230
+
231
+ UTCP path (native CodeMode in current release):
232
+
233
+ ```text
234
+ /mcp-connect utcp-codemode
235
+ /mcp-tools utcp-codemode
236
+ /harness run "analyze this repo, find TODO/FIXME, and create report.json" steps=3 mcp=on strategy=codemode mcp_server=utcp-codemode
237
+ ```
238
+
239
+ Cloudflare path (recommended strategy today):
240
+
241
+ ```text
242
+ /mcp-connect cloudflare-codemode
243
+ /mcp-tools cloudflare-codemode
244
+ /harness run "list available tools and run one safe read-only action, then summarize in 3 bullets" steps=3 mcp=on strategy=tool_call mcp_server=cloudflare-codemode
245
+ ```
246
+
247
+ Notes:
248
+
249
+ - On first Cloudflare connect, `mcp-remote` may ask for interactive authentication.
250
+ - In this release, `strategy=codemode` expects the `search_tools` + `call_tool_chain` bridge contract.
251
+ - If a remote MCP server exposes a different tool contract, use `strategy=tool_call`.
252
+
171
253
  ## How the RLM Loop Works
172
254
 
173
255
  Traditional LLM usage: paste your document into the prompt, ask a question, hope the model doesn't lose details in the middle.
@@ -306,7 +388,7 @@ rlm_code/
306
388
  harness/ # Tool-using coding harness (/harness)
307
389
  ```
308
390
 
309
- ## Documentation
391
+ ## Resources
310
392
 
311
393
  Full docs: https://superagenticai.github.io/rlm-code/
312
394
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "rlm-code"
7
- version = "0.1.5"
7
+ version = "0.1.7"
8
8
  description = "RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems"
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -5,5 +5,5 @@ This package provides tools for creating, managing, and optimizing DSPy componen
5
5
  through natural language interactions.
6
6
  """
7
7
 
8
- __version__ = "0.1.5"
8
+ __version__ = "0.1.7"
9
9
  __author__ = "Super Agentic AI"
@@ -112,6 +112,7 @@ class SlashCommandHandler:
112
112
  self.rlm_runner = RLMRunner(
113
113
  llm_connector=self.llm_connector,
114
114
  execution_engine=self.execution_engine,
115
+ mcp_manager=self.mcp_manager,
115
116
  reward_profile=reward_profile,
116
117
  benchmark_pack_paths=benchmark_pack_paths,
117
118
  )
@@ -1442,7 +1443,7 @@ class SlashCommandHandler:
1442
1443
  Usage:
1443
1444
  /harness tools [mcp=on|off]
1444
1445
  /harness doctor
1445
- /harness run <task> [steps=N] [mcp=on|off] [tools=name[,name2]]
1446
+ /harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] [strategy=tool_call|codemode] [tools=name[,name2]]
1446
1447
  """
1447
1448
  if not args or args[0].lower() in {"help", "--help"}:
1448
1449
  console.print()
@@ -1450,7 +1451,8 @@ class SlashCommandHandler:
1450
1451
  console.print(" [yellow]/harness tools [mcp=on|off][/yellow]")
1451
1452
  console.print(" [yellow]/harness doctor[/yellow]")
1452
1453
  console.print(
1453
- " [yellow]/harness run <task> [steps=N] [mcp=on|off] [tools=name[,name2]][/yellow]"
1454
+ " [yellow]/harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] "
1455
+ "[strategy=tool_call|codemode] [tools=name[,name2]][/yellow]"
1454
1456
  )
1455
1457
  console.print()
1456
1458
  return
@@ -1555,6 +1557,8 @@ class SlashCommandHandler:
1555
1557
  include_mcp = True
1556
1558
  max_steps = 10
1557
1559
  allowlist: list[str] | None = None
1560
+ strategy = "tool_call"
1561
+ mcp_server: str | None = None
1558
1562
  task_tokens: list[str] = []
1559
1563
 
1560
1564
  for token in args[1:]:
@@ -1568,6 +1572,16 @@ class SlashCommandHandler:
1568
1572
  elif lowered.startswith("mcp="):
1569
1573
  value = token.split("=", 1)[1].strip().lower()
1570
1574
  include_mcp = value not in {"off", "false", "0", "no"}
1575
+ elif lowered.startswith("mcp_server="):
1576
+ mcp_server = token.split("=", 1)[1].strip() or None
1577
+ elif lowered.startswith("strategy="):
1578
+ raw_strategy = token.split("=", 1)[1].strip().lower().replace("-", "_")
1579
+ if raw_strategy not in {"tool_call", "codemode"}:
1580
+ show_error_message(
1581
+ "Invalid strategy value. Use strategy=tool_call|codemode."
1582
+ )
1583
+ return
1584
+ strategy = raw_strategy
1571
1585
  elif lowered.startswith("tools="):
1572
1586
  raw = token.split("=", 1)[1].strip()
1573
1587
  parsed = [part.strip() for part in raw.split(",") if part.strip()]
@@ -1578,15 +1592,25 @@ class SlashCommandHandler:
1578
1592
  task = " ".join(task_tokens).strip()
1579
1593
  if not task:
1580
1594
  show_error_message(
1581
- "Usage: /harness run <task> [steps=N] [mcp=on|off] [tools=name[,name2]]"
1595
+ "Usage: /harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] "
1596
+ "[strategy=tool_call|codemode] [tools=name[,name2]]"
1582
1597
  )
1583
1598
  return
1599
+ if strategy == "codemode" and not include_mcp:
1600
+ show_warning_message("strategy=codemode requires mcp=on. Enabling MCP.")
1601
+ include_mcp = True
1602
+ if strategy == "codemode" and allowlist:
1603
+ show_warning_message("tools=... allowlist is ignored for strategy=codemode.")
1604
+ allowlist = None
1584
1605
 
1585
1606
  console.print()
1586
1607
  console.print("[bold cyan]🛠 Running Harness[/bold cyan]")
1587
1608
  console.print(f" Task: [cyan]{task}[/cyan]")
1588
1609
  console.print(f" Max steps: [cyan]{max_steps}[/cyan]")
1589
1610
  console.print(f" MCP tools: [cyan]{'on' if include_mcp else 'off'}[/cyan]")
1611
+ console.print(f" Strategy: [cyan]{strategy}[/cyan]")
1612
+ if mcp_server:
1613
+ console.print(f" MCP server: [cyan]{mcp_server}[/cyan]")
1590
1614
  if allowlist:
1591
1615
  console.print(f" Tool allowlist: [cyan]{', '.join(allowlist)}[/cyan]")
1592
1616
  console.print()
@@ -1596,6 +1620,8 @@ class SlashCommandHandler:
1596
1620
  max_steps=max_steps,
1597
1621
  include_mcp=include_mcp,
1598
1622
  tool_allowlist=allowlist,
1623
+ strategy=strategy,
1624
+ mcp_server=mcp_server,
1599
1625
  )
1600
1626
 
1601
1627
  self.current_context["harness_last_response"] = result.final_response
@@ -1658,8 +1684,8 @@ class SlashCommandHandler:
1658
1684
  Manage RLM runs.
1659
1685
 
1660
1686
  Usage:
1661
- /rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
1662
- /rlm bench [list|preset=name] [mode=native|harness|direct-llm] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
1687
+ /rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm|trace_analysis] [sub=provider/model]
1688
+ /rlm bench [list|preset=name] [mode=native|harness|direct-llm] [strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
1663
1689
  /rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N]
1664
1690
  /rlm bench validate [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] [--json]
1665
1691
  /rlm bench report [candidate=<id|path|latest>] [baseline=<id|path|previous>] [format=markdown|csv|json] [output=path]
@@ -1670,8 +1696,8 @@ class SlashCommandHandler:
1670
1696
  /rlm status [run_id]
1671
1697
  /rlm abort [run_id|all]
1672
1698
  /rlm replay [run_id|latest]
1673
- /rlm doctor [env=generic|dspy|pure_rlm] [--json]
1674
- /rlm chat <message> [session=name] [env=generic|dspy|pure_rlm] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [sub=provider/model]
1699
+ /rlm doctor [env=generic|dspy|pure_rlm|trace_analysis] [--json]
1700
+ /rlm chat <message> [session=name] [env=generic|dspy|pure_rlm|trace_analysis] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [sub=provider/model]
1675
1701
  /rlm chat status [session=name]
1676
1702
  /rlm chat reset [session=name]
1677
1703
  /rlm observability
@@ -1682,13 +1708,14 @@ class SlashCommandHandler:
1682
1708
  console.print("[bold cyan]🧠 RLM Commands[/bold cyan]")
1683
1709
  console.print(
1684
1710
  " [yellow]/rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] "
1685
- f"[parallel=N] [budget=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] "
1711
+ f"[parallel=N] [budget=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm|trace_analysis] "
1686
1712
  "[sub=provider/model][/yellow]"
1687
1713
  )
1688
1714
  console.print(
1689
1715
  " [yellow]/rlm bench [list|preset=name] [mode=native|harness|direct-llm] "
1716
+ "[strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] "
1690
1717
  "[pack=path[,path2]] [limit=N] [steps=N] "
1691
- f"[timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] [sub=provider/model][/yellow]"
1718
+ f"[timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm|trace_analysis] [sub=provider/model][/yellow]"
1692
1719
  )
1693
1720
  console.print(
1694
1721
  " [yellow]/rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] "
@@ -1714,9 +1741,9 @@ class SlashCommandHandler:
1714
1741
  console.print(" [yellow]/rlm status [run_id][/yellow]")
1715
1742
  console.print(" [yellow]/rlm abort [run_id|all][/yellow]")
1716
1743
  console.print(" [yellow]/rlm replay [run_id|latest][/yellow]")
1717
- console.print(" [yellow]/rlm doctor [env=generic|dspy|pure_rlm] [--json][/yellow]")
1744
+ console.print(" [yellow]/rlm doctor [env=generic|dspy|pure_rlm|trace_analysis] [--json][/yellow]")
1718
1745
  console.print(
1719
- " [yellow]/rlm chat <message> [session=name] [env=generic|dspy|pure_rlm] [branch=N] [depth=N] "
1746
+ " [yellow]/rlm chat <message> [session=name] [env=generic|dspy|pure_rlm|trace_analysis] [branch=N] [depth=N] "
1720
1747
  f"[children=N] [parallel=N] [budget=N] [framework={framework_opts}] "
1721
1748
  "[sub=provider/model][/yellow]"
1722
1749
  )
@@ -2108,7 +2135,7 @@ class SlashCommandHandler:
2108
2135
  task = " ".join(task_tokens).strip()
2109
2136
  if not task:
2110
2137
  show_error_message(
2111
- "Usage: /rlm run <task> [steps=N] [timeout=N] [env=generic|dspy|pure_rlm] "
2138
+ "Usage: /rlm run <task> [steps=N] [timeout=N] [env=generic|dspy|pure_rlm|trace_analysis] "
2112
2139
  "[depth=N] [children=N] [parallel=N] [budget=N] "
2113
2140
  f"[framework={framework_opts}] "
2114
2141
  "[branch=N] [sub=provider/model]"
@@ -2521,6 +2548,9 @@ class SlashCommandHandler:
2521
2548
  environment: str | None = None
2522
2549
  sub_model: str | None = None
2523
2550
  sub_provider: str | None = None
2551
+ include_mcp = False
2552
+ mcp_server: str | None = None
2553
+ harness_strategy = "tool_call"
2524
2554
 
2525
2555
  for token in args[1:]:
2526
2556
  lowered = token.lower()
@@ -2537,6 +2567,19 @@ class SlashCommandHandler:
2537
2567
  )
2538
2568
  return
2539
2569
  mode = resolved_mode
2570
+ elif lowered.startswith("mcp="):
2571
+ value = token.split("=", 1)[1].strip().lower()
2572
+ include_mcp = value not in {"off", "false", "0", "no"}
2573
+ elif lowered.startswith("strategy="):
2574
+ strategy_token = token.split("=", 1)[1].strip().lower().replace("-", "_")
2575
+ if strategy_token not in {"tool_call", "codemode"}:
2576
+ show_error_message(
2577
+ "Invalid strategy value. Use strategy=tool_call|codemode."
2578
+ )
2579
+ return
2580
+ harness_strategy = strategy_token
2581
+ elif lowered.startswith("mcp_server="):
2582
+ mcp_server = token.split("=", 1)[1].strip() or None
2540
2583
  elif lowered.startswith("pack="):
2541
2584
  raw_paths = token.split("=", 1)[1].strip()
2542
2585
  if not raw_paths:
@@ -2593,8 +2636,10 @@ class SlashCommandHandler:
2593
2636
  else:
2594
2637
  show_error_message(
2595
2638
  "Usage: /rlm bench [list|preset=name] [mode=native|harness|direct-llm] "
2639
+ "[strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] "
2596
2640
  "[pack=path[,path2]] [limit=N] "
2597
- f"[steps=N] [timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] [sub=provider/model]\n"
2641
+ f"[steps=N] [timeout=N] [branch=N] [framework={framework_opts}] "
2642
+ "[env=generic|dspy|pure_rlm] [sub=provider/model]\n"
2598
2643
  " /rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] ...\n"
2599
2644
  " /rlm bench validate [candidate=<id|path|latest>] [baseline=<id|path|previous>] ...\n"
2600
2645
  " /rlm bench report [candidate=<id|path|latest>] [baseline=<id|path|previous>] "
@@ -2602,6 +2647,30 @@ class SlashCommandHandler:
2602
2647
  )
2603
2648
  return
2604
2649
 
2650
+ if mode == "harness" and harness_strategy == "codemode" and not include_mcp:
2651
+ show_warning_message("strategy=codemode requires mcp=on. Enabling MCP.")
2652
+ include_mcp = True
2653
+
2654
+ if mode != "harness" and include_mcp:
2655
+ show_warning_message("mcp=on is only used for mode=harness. Ignoring MCP settings.")
2656
+ include_mcp = False
2657
+ mcp_server = None
2658
+ elif mode != "harness" and mcp_server:
2659
+ show_warning_message(
2660
+ "mcp_server is only used for mode=harness with mcp=on. Ignoring."
2661
+ )
2662
+ mcp_server = None
2663
+ elif mode == "harness" and mcp_server and not include_mcp:
2664
+ show_warning_message(
2665
+ "mcp_server provided but mcp=off. MCP server filter will be ignored."
2666
+ )
2667
+ mcp_server = None
2668
+ if mode != "harness" and harness_strategy != "tool_call":
2669
+ show_warning_message(
2670
+ "strategy is only used for mode=harness. Resetting to tool_call."
2671
+ )
2672
+ harness_strategy = "tool_call"
2673
+
2605
2674
  if list_only:
2606
2675
  try:
2607
2676
  rows = self.rlm_runner.benchmark_presets(pack_paths=pack_paths_override)
@@ -2681,6 +2750,11 @@ class SlashCommandHandler:
2681
2750
  if timeout is not None:
2682
2751
  console.print(f" Override timeout: [cyan]{timeout}s[/cyan]")
2683
2752
  console.print(f" Branch width: [cyan]{branch_width}[/cyan]")
2753
+ if mode == "harness":
2754
+ console.print(f" Harness strategy: [cyan]{harness_strategy}[/cyan]")
2755
+ console.print(f" Harness MCP: [cyan]{'on' if include_mcp else 'off'}[/cyan]")
2756
+ if include_mcp and mcp_server:
2757
+ console.print(f" Harness MCP server: [cyan]{mcp_server}[/cyan]")
2684
2758
  if pack_paths_override:
2685
2759
  console.print(f" Benchmark packs: [cyan]{', '.join(pack_paths_override)}[/cyan]")
2686
2760
  if environment:
@@ -2704,6 +2778,9 @@ class SlashCommandHandler:
2704
2778
  branch_width=branch_width,
2705
2779
  sub_model=sub_model,
2706
2780
  sub_provider=sub_provider,
2781
+ include_mcp=include_mcp,
2782
+ mcp_server=mcp_server,
2783
+ harness_strategy=harness_strategy,
2707
2784
  pack_paths=pack_paths_override,
2708
2785
  )
2709
2786
  except ValueError as exc:
@@ -4413,7 +4490,7 @@ class SlashCommandHandler:
4413
4490
 
4414
4491
  [bold magenta]RLM Workflows:[/bold magenta]
4415
4492
  [yellow]/rlm run[/yellow] <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run an RLM coding episode
4416
- [yellow]/rlm bench[/yellow] [list|preset=name] [mode=native|harness|direct-llm] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run benchmark preset
4493
+ [yellow]/rlm bench[/yellow] [list|preset=name] [mode=native|harness|direct-llm] [strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run benchmark preset
4417
4494
  [yellow]/rlm bench compare[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] - Gate regressions
4418
4495
  [yellow]/rlm bench validate[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] [--json] - CI-style gate output
4419
4496
  [yellow]/rlm bench report[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [format=markdown|csv|json] [output=path] - Export compare report
@@ -4431,7 +4508,7 @@ class SlashCommandHandler:
4431
4508
  [yellow]/rlm observability[/yellow] - Show local/MLflow observability sink status
4432
4509
  [yellow]/harness tools[/yellow] [mcp=on|off] - List coding harness tools (local + MCP)
4433
4510
  [yellow]/harness doctor[/yellow] - Show harness tool coverage report
4434
- [yellow]/harness run[/yellow] <task> [steps=N] [mcp=on|off] [tools=name[,name2]] - Run tool-using coding harness
4511
+ [yellow]/harness run[/yellow] <task> [steps=N] [mcp=on|off] [mcp_server=name] [strategy=tool_call|codemode] [tools=name[,name2]] - Run tool-using coding harness
4435
4512
 
4436
4513
  [bold magenta]Optimization (GEPA):[/bold magenta]
4437
4514
  [yellow]/optimize-start[/yellow] [budget] - Start GEPA optimization workflow