rlm-code 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (303) hide show
  1. {rlm_code-0.1.0 → rlm_code-0.1.2}/CHANGELOG.md +26 -1
  2. {rlm_code-0.1.0 → rlm_code-0.1.2}/PKG-INFO +42 -16
  3. {rlm_code-0.1.0 → rlm_code-0.1.2}/README.md +41 -15
  4. {rlm_code-0.1.0 → rlm_code-0.1.2}/pyproject.toml +1 -1
  5. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/__init__.py +1 -1
  6. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/slash_commands.py +86 -7
  7. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/config.py +1 -1
  8. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/harness/registry.py +305 -5
  9. rlm_code-0.1.2/rlm_code/harness/runner.py +708 -0
  10. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/__init__.py +1 -1
  11. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/server/tools.py +1 -0
  12. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/benchmark_manager.py +114 -23
  13. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/benchmarks.py +40 -0
  14. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/runner.py +2 -0
  15. rlm_code-0.1.2/rlm_code/sandbox/runtimes/monty_runtime.py +72 -0
  16. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/registry.py +27 -1
  17. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_phase3.py +25 -2
  18. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_phase4.py +2 -1
  19. rlm_code-0.1.2/tests/test_harness_registry.py +176 -0
  20. rlm_code-0.1.2/tests/test_harness_runner.py +180 -0
  21. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_rlm_runner.py +97 -0
  22. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_sandbox_runtimes.py +46 -0
  23. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_slash_harness_command.py +41 -5
  24. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_slash_rlm_command.py +62 -0
  25. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_slash_sandbox_command.py +23 -0
  26. rlm_code-0.1.0/rlm_code/harness/runner.py +0 -288
  27. rlm_code-0.1.0/tests/test_harness_registry.py +0 -46
  28. rlm_code-0.1.0/tests/test_harness_runner.py +0 -64
  29. {rlm_code-0.1.0 → rlm_code-0.1.2}/.gitignore +0 -0
  30. {rlm_code-0.1.0 → rlm_code-0.1.2}/LICENSE +0 -0
  31. {rlm_code-0.1.0 → rlm_code-0.1.2}/NOTICE +0 -0
  32. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/__init__.py +0 -0
  33. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/agent.py +0 -0
  34. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/agents/__init__.py +0 -0
  35. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/agents/rlm_agent.py +0 -0
  36. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/callbacks/__init__.py +0 -0
  37. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/callbacks/code_execution.py +0 -0
  38. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/cli.py +0 -0
  39. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/code_executor.py +0 -0
  40. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/events.py +0 -0
  41. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/__init__.py +0 -0
  42. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/base.py +0 -0
  43. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/lazy.py +0 -0
  44. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/loader.py +0 -0
  45. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/parsers/__init__.py +0 -0
  46. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/parsers/base.py +0 -0
  47. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/parsers/pdf.py +0 -0
  48. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/parsers/text.py +0 -0
  49. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/sources/__init__.py +0 -0
  50. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/sources/base.py +0 -0
  51. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/sources/gcs.py +0 -0
  52. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/files/sources/local.py +0 -0
  53. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/llm.py +0 -0
  54. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/logging/__init__.py +0 -0
  55. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/logging/rlm_logger.py +0 -0
  56. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/logging/verbose.py +0 -0
  57. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/main.py +0 -0
  58. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/prompts.py +0 -0
  59. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/repl/__init__.py +0 -0
  60. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/repl/local_repl.py +0 -0
  61. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/repl/safe_builtins.py +0 -0
  62. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/templates/index.html +0 -0
  63. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/tools/__init__.py +0 -0
  64. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/types.py +0 -0
  65. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/usage.py +0 -0
  66. {rlm_code-0.1.0 → rlm_code-0.1.2}/adk_rlm/web.py +0 -0
  67. {rlm_code-0.1.0 → rlm_code-0.1.2}/eval/packs/README.md +0 -0
  68. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/__main__.py +0 -0
  69. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/__init__.py +0 -0
  70. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/config_command.py +0 -0
  71. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/create_command.py +0 -0
  72. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/demo_command.py +0 -0
  73. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/export_command.py +0 -0
  74. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/init_command.py +0 -0
  75. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/interactive_command.py +0 -0
  76. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/mcp_command.py +0 -0
  77. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/models_command.py +0 -0
  78. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/nl_command_router.py +0 -0
  79. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/optimize_command.py +0 -0
  80. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/commands/run_command.py +0 -0
  81. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/__init__.py +0 -0
  82. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/debug_logger.py +0 -0
  83. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/directory_utils.py +0 -0
  84. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/exceptions.py +0 -0
  85. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/logging.py +0 -0
  86. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/venv_utils.py +0 -0
  87. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/core/version_checker.py +0 -0
  88. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/examples/__init__.py +0 -0
  89. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/examples/phase2_demo.py +0 -0
  90. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/examples/phase3_demo.py +0 -0
  91. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/examples/phase4_demo.py +0 -0
  92. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/examples/pure_rlm_demo.py +0 -0
  93. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/execution/__init__.py +0 -0
  94. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/execution/engine.py +0 -0
  95. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/execution/sandbox.py +0 -0
  96. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/export/__init__.py +0 -0
  97. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/export/handler.py +0 -0
  98. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/export/package_builder.py +0 -0
  99. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/generators/evaluation_generator.py +0 -0
  100. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/generators/gepa_generator.py +0 -0
  101. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/harness/__init__.py +0 -0
  102. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/main.py +0 -0
  103. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/client_manager.py +0 -0
  104. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/config.py +0 -0
  105. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/exceptions.py +0 -0
  106. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/retry.py +0 -0
  107. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/server/__init__.py +0 -0
  108. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/server/rlm_server.py +0 -0
  109. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/session_wrapper.py +0 -0
  110. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/transports/__init__.py +0 -0
  111. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/transports/factory.py +0 -0
  112. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/transports/sse_transport.py +0 -0
  113. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/transports/stdio_transport.py +0 -0
  114. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/transports/websocket_transport.py +0 -0
  115. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/mcp/utils.py +0 -0
  116. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/__init__.py +0 -0
  117. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/cache.py +0 -0
  118. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/code_generator.py +0 -0
  119. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/dspy_reference_loader.py +0 -0
  120. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/llm_connector.py +0 -0
  121. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/model_manager.py +0 -0
  122. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/providers/__init__.py +0 -0
  123. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/providers/acp_discovery.py +0 -0
  124. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/providers/local_discovery.py +0 -0
  125. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/providers/model_catalog.py +0 -0
  126. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/providers/registry.py +0 -0
  127. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/streaming.py +0 -0
  128. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/models/task_collector.py +0 -0
  129. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/optimization/__init__.py +0 -0
  130. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/optimization/data_collector.py +0 -0
  131. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/optimization/executor.py +0 -0
  132. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/optimization/workflow_manager.py +0 -0
  133. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/project/__init__.py +0 -0
  134. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/project/context_manager.py +0 -0
  135. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/project/dspy_md_generator.py +0 -0
  136. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/project/initializer.py +0 -0
  137. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/project/scanner.py +0 -0
  138. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/py.typed +0 -0
  139. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/__init__.py +0 -0
  140. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/action_planner.py +0 -0
  141. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/approval/__init__.py +0 -0
  142. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/approval/audit.py +0 -0
  143. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/approval/gate.py +0 -0
  144. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/approval/handlers.py +0 -0
  145. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/approval/policy.py +0 -0
  146. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/chat_session.py +0 -0
  147. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/code_interpreter.py +0 -0
  148. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/comparison.py +0 -0
  149. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/config_schema.py +0 -0
  150. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/context_store.py +0 -0
  151. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/delegation.py +0 -0
  152. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/docker_interpreter.py +0 -0
  153. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/environments.py +0 -0
  154. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/events.py +0 -0
  155. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/__init__.py +0 -0
  156. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/adk_rlm_adapter.py +0 -0
  157. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/base.py +0 -0
  158. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/deepagents_adapter.py +0 -0
  159. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/dspy_rlm_adapter.py +0 -0
  160. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/google_adk_adapter.py +0 -0
  161. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/pydantic_ai_adapter.py +0 -0
  162. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/frameworks/registry.py +0 -0
  163. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/leaderboard.py +0 -0
  164. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/memory_compaction.py +0 -0
  165. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/mock_interpreter.py +0 -0
  166. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/monty_interpreter.py +0 -0
  167. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/observability.py +0 -0
  168. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/observability_sinks.py +0 -0
  169. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/policies/__init__.py +0 -0
  170. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/policies/action_policies.py +0 -0
  171. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/policies/base.py +0 -0
  172. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/policies/compaction_policies.py +0 -0
  173. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/policies/registry.py +0 -0
  174. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/policies/reward_policies.py +0 -0
  175. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/policies/termination_policies.py +0 -0
  176. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/pure_rlm_environment.py +0 -0
  177. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/repl_types.py +0 -0
  178. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/research_tui/__init__.py +0 -0
  179. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/research_tui/theme.py +0 -0
  180. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/research_tui/widgets/__init__.py +0 -0
  181. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/research_tui/widgets/animated.py +0 -0
  182. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/research_tui/widgets/panels.py +0 -0
  183. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/session_replay.py +0 -0
  184. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/task_signature.py +0 -0
  185. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/termination.py +0 -0
  186. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/trajectory.py +0 -0
  187. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/rlm/visualizer.py +0 -0
  188. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/__init__.py +0 -0
  189. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/__init__.py +0 -0
  190. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/apple_container_runtime.py +0 -0
  191. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/base.py +0 -0
  192. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/cloud/__init__.py +0 -0
  193. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/cloud/daytona_runtime.py +0 -0
  194. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/cloud/e2b_runtime.py +0 -0
  195. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/cloud/modal_runtime.py +0 -0
  196. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/command_runtime.py +0 -0
  197. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/docker_runtime.py +0 -0
  198. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/runtimes/local_runtime.py +0 -0
  199. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/sandbox/superbox.py +0 -0
  200. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/session/__init__.py +0 -0
  201. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/session/state_manager.py +0 -0
  202. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/.env.example +0 -0
  203. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/adapters.py +0 -0
  204. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/async_streaming.py +0 -0
  205. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/complete_programs.py +0 -0
  206. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/dspy_config_example.yaml +0 -0
  207. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/evaluation.py +0 -0
  208. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/industry_templates.py +0 -0
  209. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/optimizers.py +0 -0
  210. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/retrievers.py +0 -0
  211. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/templates/rlm_benchmarks_example.yaml +0 -0
  212. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/tests/__init__.py +0 -0
  213. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/tests/rlm/__init__.py +0 -0
  214. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/tests/rlm/test_phase2.py +0 -0
  215. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/tests/rlm/test_pure_rlm.py +0 -0
  216. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/__init__.py +0 -0
  217. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/agent_collab_view.py +0 -0
  218. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/animations.py +0 -0
  219. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/conversation.py +0 -0
  220. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/design_system.py +0 -0
  221. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/diff_viewer.py +0 -0
  222. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/notifications.py +0 -0
  223. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/persistent_shell.py +0 -0
  224. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/prompt_widget.py +0 -0
  225. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/prompts.py +0 -0
  226. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/pty_terminal.py +0 -0
  227. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/resizable_divider.py +0 -0
  228. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/thinking_display.py +0 -0
  229. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/tui_app.py +0 -0
  230. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/tui_utils.py +0 -0
  231. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/ui/welcome.py +0 -0
  232. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/__init__.py +0 -0
  233. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/anti_patterns.py +0 -0
  234. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/auto_fixer.py +0 -0
  235. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/best_practices.py +0 -0
  236. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/code_validator.py +0 -0
  237. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/config_validator.py +0 -0
  238. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/exceptions.py +0 -0
  239. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/input_validator.py +0 -0
  240. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/learning_integration.py +0 -0
  241. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/models.py +0 -0
  242. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/module_validator.py +0 -0
  243. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/predictor_validator.py +0 -0
  244. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/quality_scorer.py +0 -0
  245. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/report_generator.py +0 -0
  246. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/security.py +0 -0
  247. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/security_validator.py +0 -0
  248. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/signature_validator.py +0 -0
  249. {rlm_code-0.1.0 → rlm_code-0.1.2}/rlm_code/validation/validator.py +0 -0
  250. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/__init__.py +0 -0
  251. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/conftest.py +0 -0
  252. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/fixtures/rlm_ci_baseline_generic_smoke.json +0 -0
  253. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_adk_rlm_adapter.py +0 -0
  254. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_code_interpreter.py +0 -0
  255. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_deepagents_adapter.py +0 -0
  256. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_dspy_rlm_adapter.py +0 -0
  257. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_extract_fallback.py +0 -0
  258. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_framework_registry_coverage.py +0 -0
  259. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_google_adk_adapter.py +0 -0
  260. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_leaderboard.py +0 -0
  261. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_mock_interpreter.py +0 -0
  262. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_monty_interpreter.py +0 -0
  263. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_observability_sinks.py +0 -0
  264. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_p0_features.py +0 -0
  265. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_pure_rlm_runtime_modes.py +0 -0
  266. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_pydantic_ai_adapter.py +0 -0
  267. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_repl_history.py +0 -0
  268. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_security_hardening.py +0 -0
  269. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_session_replay.py +0 -0
  270. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_submit.py +0 -0
  271. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_task_signature.py +0 -0
  272. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/rlm/test_user_tools.py +0 -0
  273. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_anti_patterns.py +0 -0
  274. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_auto_fixer.py +0 -0
  275. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_cache.py +0 -0
  276. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_execution_engine.py +0 -0
  277. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_export_import.py +0 -0
  278. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_init_command.py +0 -0
  279. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_integration.py +0 -0
  280. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_learning_integration.py +0 -0
  281. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_mcp_utils.py +0 -0
  282. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_module_validator.py +0 -0
  283. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_optimization_workflow.py +0 -0
  284. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_persistent_shell.py +0 -0
  285. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_predictor_validator.py +0 -0
  286. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_project_scanner.py +0 -0
  287. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_prompt_widget.py +0 -0
  288. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_property_validators.py +0 -0
  289. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_provider_discovery.py +0 -0
  290. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_provider_registry.py +0 -0
  291. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_quality_scorer.py +0 -0
  292. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_report_generator.py +0 -0
  293. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_retry.py +0 -0
  294. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_rlm_config.py +0 -0
  295. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_rlm_dspy_environment.py +0 -0
  296. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_rlm_observability.py +0 -0
  297. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_security_validator.py +0 -0
  298. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_session_management.py +0 -0
  299. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_signature_validator.py +0 -0
  300. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_streaming.py +0 -0
  301. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_superbox.py +0 -0
  302. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_tui_utils.py +0 -0
  303. {rlm_code-0.1.0 → rlm_code-0.1.2}/tests/test_validation.py +0 -0
@@ -5,7 +5,31 @@ All notable changes to this project are documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
- ## [0.1.5] - 2026-02-15
8
+ ## [0.1.2] - 2026-02-20
9
+
10
+ ### Added
11
+ - Harness strategy selector with `tool_call` (default) and opt-in `codemode`.
12
+ - CodeMode execution flow in harness: MCP tool discovery (`search_tools`), typed tool surface prompt, single-program generation, guardrail validation, and MCP chain execution (`call_tool_chain`).
13
+ - Benchmark support for harness strategy comparison with CodeMode telemetry fields (`harness_strategy`, `codemode_chain_calls`, `codemode_search_calls`, `codemode_discovery_calls`, `codemode_guardrail_blocked`).
14
+ - New top-level CodeMode docs section with dedicated pages for quickstart, architecture, guardrails, and evaluation.
15
+ - Release documentation set for CodeMode:
16
+ - quickstart and operator workflow
17
+ - integration architecture and runtime controls
18
+ - provider/bridge separation model (Cloudflare-based, UTCP, custom)
19
+ - CodeMode sandbox responsibility and deployment matrix
20
+ - guardrail policy and safety runbook
21
+ - benchmark evaluation and promotion-gate criteria
22
+
23
+ ### Changed
24
+ - `/harness run` supports `strategy=tool_call|codemode` and `mcp_server=<name>`.
25
+ - `/rlm bench` in `mode=harness` supports `strategy=tool_call|codemode`.
26
+ - Harness and benchmark command handling now auto-enables MCP when `strategy=codemode` is selected.
27
+
28
+ ### Security
29
+ - Added explicit CodeMode guardrail policy documentation with blocked API classes and runtime limit defaults.
30
+ - Codemode path remains opt-in; default harness behavior remains strict baseline `strategy=tool_call`.
31
+
32
+ ## [0.1.1] - 2026-02-15
9
33
 
10
34
  Initial public release of **RLM Code**.
11
35
 
@@ -31,3 +55,4 @@ Initial public release of **RLM Code**.
31
55
  - Unsafe local `exec` usage preserved only as an explicit, opt-in path for advanced development scenarios.
32
56
 
33
57
  [0.1.5]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.5
58
+ [0.1.2]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.2
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlm-code
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems
5
5
  Project-URL: Homepage, https://github.com/SuperagenticAI/rlm-code
6
6
  Project-URL: Documentation, https://superagenticai.github.io/rlm-code/
@@ -99,20 +99,18 @@ Description-Content-Type: text/markdown
99
99
  </a>
100
100
  </p>
101
101
 
102
- <p align="center">
103
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Version" src="https://img.shields.io/pypi/v/rlm-code"></a>
104
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Python Versions" src="https://img.shields.io/pypi/pyversions/rlm-code"></a>
105
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Downloads" src="https://img.shields.io/pypi/dm/rlm-code"></a>
106
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Wheel" src="https://img.shields.io/pypi/wheel/rlm-code"></a>
107
- <a href="LICENSE"><img alt="License" src="https://img.shields.io/pypi/l/rlm-code"></a>
108
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml"><img alt="CI" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml/badge.svg"></a>
109
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml"><img alt="Pre-commit" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml/badge.svg"></a>
110
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml"><img alt="Docs Deploy" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml/badge.svg"></a>
111
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml"><img alt="Release" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml/badge.svg"></a>
112
- <a href="https://github.com/SuperagenticAI/rlm-code/stargazers"><img alt="GitHub Stars" src="https://img.shields.io/github/stars/SuperagenticAI/rlm-code?style=social"></a>
113
- <a href="https://github.com/SuperagenticAI/rlm-code/issues"><img alt="GitHub Issues" src="https://img.shields.io/github/issues/SuperagenticAI/rlm-code"></a>
114
- <a href="https://github.com/SuperagenticAI/rlm-code/pulls"><img alt="GitHub Pull Requests" src="https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code"></a>
115
- </p>
102
+ [![PyPI Version](https://img.shields.io/pypi/v/rlm-code.svg)](https://pypi.org/project/rlm-code/)
103
+ [![Python Versions](https://img.shields.io/pypi/pyversions/rlm-code.svg)](https://pypi.org/project/rlm-code/)
104
+ [![PyPI Wheel](https://img.shields.io/pypi/wheel/rlm-code.svg)](https://pypi.org/project/rlm-code/)
105
+ [![License](https://img.shields.io/pypi/l/rlm-code.svg)](https://pypi.org/project/rlm-code/)
106
+ [![CI](https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml)
107
+ [![Pre-commit](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml)
108
+ [![Docs Deploy](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml)
109
+ [![Release](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml)
110
+ [![Docs](https://img.shields.io/badge/Docs-RLM%20Code-ff7a18.svg?logo=readthedocs&logoColor=white)](https://superagenticai.github.io/rlm-code/)
111
+ [![GitHub Stars](https://img.shields.io/github/stars/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/stargazers)
112
+ [![GitHub Issues](https://img.shields.io/github/issues/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/issues)
113
+ [![GitHub Pull Requests](https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/pulls)
116
114
 
117
115
  **Run LLM-powered agents in a REPL loop, benchmark them, and compare results.**
118
116
 
@@ -120,6 +118,34 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
120
118
 
121
119
  RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
122
120
 
121
+ ## Release v0.1.2
122
+
123
+ This release adds the new CodeMode path as an opt-in harness strategy.
124
+
125
+ - New harness strategy: `strategy=codemode` (default remains `strategy=tool_call`)
126
+ - MCP bridge flow for CodeMode: `search_tools` -> typed tool surface -> `call_tool_chain`
127
+ - Guardrails before execution: blocked API classes plus timeout/size/tool-call caps
128
+ - Benchmark telemetry for side-by-side comparison: `tool_call` vs `codemode`
129
+ - Dedicated docs section for CodeMode: quickstart, architecture, guardrails, evaluation
130
+
131
+ Example:
132
+
133
+ ```text
134
+ /harness run "implement feature and add tests" steps=8 mcp=on strategy=codemode mcp_server=codemode
135
+ ```
136
+
137
+ ## Documentation
138
+
139
+ <p align="center">
140
+ <a href="https://superagenticai.github.io/rlm-code/">
141
+ <img alt="Read the RLM Code Docs" src="https://img.shields.io/badge/Read%20the%20Docs-RLM%20Code-ff7a18?style=for-the-badge&logo=readthedocs&logoColor=white">
142
+ </a>
143
+ </p>
144
+
145
+ <p align="center">
146
+ <a href="https://superagenticai.github.io/rlm-code/"><strong>Open the full documentation</strong></a>
147
+ </p>
148
+
123
149
  ## Install
124
150
 
125
151
  ```bash
@@ -399,7 +425,7 @@ rlm_code/
399
425
  harness/ # Tool-using coding harness (/harness)
400
426
  ```
401
427
 
402
- ## Documentation
428
+ ## Resources
403
429
 
404
430
  Full docs: https://superagenticai.github.io/rlm-code/
405
431
 
@@ -6,20 +6,18 @@
6
6
  </a>
7
7
  </p>
8
8
 
9
- <p align="center">
10
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Version" src="https://img.shields.io/pypi/v/rlm-code"></a>
11
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Python Versions" src="https://img.shields.io/pypi/pyversions/rlm-code"></a>
12
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Downloads" src="https://img.shields.io/pypi/dm/rlm-code"></a>
13
- <a href="https://pypi.org/project/rlm-code/"><img alt="PyPI Wheel" src="https://img.shields.io/pypi/wheel/rlm-code"></a>
14
- <a href="LICENSE"><img alt="License" src="https://img.shields.io/pypi/l/rlm-code"></a>
15
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml"><img alt="CI" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml/badge.svg"></a>
16
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml"><img alt="Pre-commit" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml/badge.svg"></a>
17
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml"><img alt="Docs Deploy" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml/badge.svg"></a>
18
- <a href="https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml"><img alt="Release" src="https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml/badge.svg"></a>
19
- <a href="https://github.com/SuperagenticAI/rlm-code/stargazers"><img alt="GitHub Stars" src="https://img.shields.io/github/stars/SuperagenticAI/rlm-code?style=social"></a>
20
- <a href="https://github.com/SuperagenticAI/rlm-code/issues"><img alt="GitHub Issues" src="https://img.shields.io/github/issues/SuperagenticAI/rlm-code"></a>
21
- <a href="https://github.com/SuperagenticAI/rlm-code/pulls"><img alt="GitHub Pull Requests" src="https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code"></a>
22
- </p>
9
+ [![PyPI Version](https://img.shields.io/pypi/v/rlm-code.svg)](https://pypi.org/project/rlm-code/)
10
+ [![Python Versions](https://img.shields.io/pypi/pyversions/rlm-code.svg)](https://pypi.org/project/rlm-code/)
11
+ [![PyPI Wheel](https://img.shields.io/pypi/wheel/rlm-code.svg)](https://pypi.org/project/rlm-code/)
12
+ [![License](https://img.shields.io/pypi/l/rlm-code.svg)](https://pypi.org/project/rlm-code/)
13
+ [![CI](https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/ci.yml)
14
+ [![Pre-commit](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml)
15
+ [![Docs Deploy](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml)
16
+ [![Release](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml)
17
+ [![Docs](https://img.shields.io/badge/Docs-RLM%20Code-ff7a18.svg?logo=readthedocs&logoColor=white)](https://superagenticai.github.io/rlm-code/)
18
+ [![GitHub Stars](https://img.shields.io/github/stars/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/stargazers)
19
+ [![GitHub Issues](https://img.shields.io/github/issues/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/issues)
20
+ [![GitHub Pull Requests](https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/pulls)
23
21
 
24
22
  **Run LLM-powered agents in a REPL loop, benchmark them, and compare results.**
25
23
 
@@ -27,6 +25,34 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
27
25
 
28
26
  RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
29
27
 
28
+ ## Release v0.1.2
29
+
30
+ This release adds the new CodeMode path as an opt-in harness strategy.
31
+
32
+ - New harness strategy: `strategy=codemode` (default remains `strategy=tool_call`)
33
+ - MCP bridge flow for CodeMode: `search_tools` -> typed tool surface -> `call_tool_chain`
34
+ - Guardrails before execution: blocked API classes plus timeout/size/tool-call caps
35
+ - Benchmark telemetry for side-by-side comparison: `tool_call` vs `codemode`
36
+ - Dedicated docs section for CodeMode: quickstart, architecture, guardrails, evaluation
37
+
38
+ Example:
39
+
40
+ ```text
41
+ /harness run "implement feature and add tests" steps=8 mcp=on strategy=codemode mcp_server=codemode
42
+ ```
43
+
44
+ ## Documentation
45
+
46
+ <p align="center">
47
+ <a href="https://superagenticai.github.io/rlm-code/">
48
+ <img alt="Read the RLM Code Docs" src="https://img.shields.io/badge/Read%20the%20Docs-RLM%20Code-ff7a18?style=for-the-badge&logo=readthedocs&logoColor=white">
49
+ </a>
50
+ </p>
51
+
52
+ <p align="center">
53
+ <a href="https://superagenticai.github.io/rlm-code/"><strong>Open the full documentation</strong></a>
54
+ </p>
55
+
30
56
  ## Install
31
57
 
32
58
  ```bash
@@ -306,7 +332,7 @@ rlm_code/
306
332
  harness/ # Tool-using coding harness (/harness)
307
333
  ```
308
334
 
309
- ## Documentation
335
+ ## Resources
310
336
 
311
337
  Full docs: https://superagenticai.github.io/rlm-code/
312
338
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "rlm-code"
7
- version = "0.1.0"
7
+ version = "0.1.2"
8
8
  description = "RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems"
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -5,5 +5,5 @@ This package provides tools for creating, managing, and optimizing DSPy componen
5
5
  through natural language interactions.
6
6
  """
7
7
 
8
- __version__ = "0.1.5"
8
+ __version__ = "0.1.2"
9
9
  __author__ = "Super Agentic AI"
@@ -112,6 +112,7 @@ class SlashCommandHandler:
112
112
  self.rlm_runner = RLMRunner(
113
113
  llm_connector=self.llm_connector,
114
114
  execution_engine=self.execution_engine,
115
+ mcp_manager=self.mcp_manager,
115
116
  reward_profile=reward_profile,
116
117
  benchmark_pack_paths=benchmark_pack_paths,
117
118
  )
@@ -1442,7 +1443,7 @@ class SlashCommandHandler:
1442
1443
  Usage:
1443
1444
  /harness tools [mcp=on|off]
1444
1445
  /harness doctor
1445
- /harness run <task> [steps=N] [mcp=on|off] [tools=name[,name2]]
1446
+ /harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] [strategy=tool_call|codemode] [tools=name[,name2]]
1446
1447
  """
1447
1448
  if not args or args[0].lower() in {"help", "--help"}:
1448
1449
  console.print()
@@ -1450,7 +1451,8 @@ class SlashCommandHandler:
1450
1451
  console.print(" [yellow]/harness tools [mcp=on|off][/yellow]")
1451
1452
  console.print(" [yellow]/harness doctor[/yellow]")
1452
1453
  console.print(
1453
- " [yellow]/harness run <task> [steps=N] [mcp=on|off] [tools=name[,name2]][/yellow]"
1454
+ " [yellow]/harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] "
1455
+ "[strategy=tool_call|codemode] [tools=name[,name2]][/yellow]"
1454
1456
  )
1455
1457
  console.print()
1456
1458
  return
@@ -1555,6 +1557,8 @@ class SlashCommandHandler:
1555
1557
  include_mcp = True
1556
1558
  max_steps = 10
1557
1559
  allowlist: list[str] | None = None
1560
+ strategy = "tool_call"
1561
+ mcp_server: str | None = None
1558
1562
  task_tokens: list[str] = []
1559
1563
 
1560
1564
  for token in args[1:]:
@@ -1568,6 +1572,16 @@ class SlashCommandHandler:
1568
1572
  elif lowered.startswith("mcp="):
1569
1573
  value = token.split("=", 1)[1].strip().lower()
1570
1574
  include_mcp = value not in {"off", "false", "0", "no"}
1575
+ elif lowered.startswith("mcp_server="):
1576
+ mcp_server = token.split("=", 1)[1].strip() or None
1577
+ elif lowered.startswith("strategy="):
1578
+ raw_strategy = token.split("=", 1)[1].strip().lower().replace("-", "_")
1579
+ if raw_strategy not in {"tool_call", "codemode"}:
1580
+ show_error_message(
1581
+ "Invalid strategy value. Use strategy=tool_call|codemode."
1582
+ )
1583
+ return
1584
+ strategy = raw_strategy
1571
1585
  elif lowered.startswith("tools="):
1572
1586
  raw = token.split("=", 1)[1].strip()
1573
1587
  parsed = [part.strip() for part in raw.split(",") if part.strip()]
@@ -1578,15 +1592,27 @@ class SlashCommandHandler:
1578
1592
  task = " ".join(task_tokens).strip()
1579
1593
  if not task:
1580
1594
  show_error_message(
1581
- "Usage: /harness run <task> [steps=N] [mcp=on|off] [tools=name[,name2]]"
1595
+ "Usage: /harness run <task> [steps=N] [mcp=on|off] [mcp_server=name] "
1596
+ "[strategy=tool_call|codemode] [tools=name[,name2]]"
1582
1597
  )
1583
1598
  return
1599
+ if strategy == "codemode" and not include_mcp:
1600
+ show_warning_message("strategy=codemode requires mcp=on. Enabling MCP.")
1601
+ include_mcp = True
1602
+ if strategy == "codemode" and allowlist:
1603
+ show_warning_message(
1604
+ "tools=... allowlist is ignored for strategy=codemode."
1605
+ )
1606
+ allowlist = None
1584
1607
 
1585
1608
  console.print()
1586
1609
  console.print("[bold cyan]🛠 Running Harness[/bold cyan]")
1587
1610
  console.print(f" Task: [cyan]{task}[/cyan]")
1588
1611
  console.print(f" Max steps: [cyan]{max_steps}[/cyan]")
1589
1612
  console.print(f" MCP tools: [cyan]{'on' if include_mcp else 'off'}[/cyan]")
1613
+ console.print(f" Strategy: [cyan]{strategy}[/cyan]")
1614
+ if mcp_server:
1615
+ console.print(f" MCP server: [cyan]{mcp_server}[/cyan]")
1590
1616
  if allowlist:
1591
1617
  console.print(f" Tool allowlist: [cyan]{', '.join(allowlist)}[/cyan]")
1592
1618
  console.print()
@@ -1596,6 +1622,8 @@ class SlashCommandHandler:
1596
1622
  max_steps=max_steps,
1597
1623
  include_mcp=include_mcp,
1598
1624
  tool_allowlist=allowlist,
1625
+ strategy=strategy,
1626
+ mcp_server=mcp_server,
1599
1627
  )
1600
1628
 
1601
1629
  self.current_context["harness_last_response"] = result.final_response
@@ -1659,7 +1687,7 @@ class SlashCommandHandler:
1659
1687
 
1660
1688
  Usage:
1661
1689
  /rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
1662
- /rlm bench [list|preset=name] [mode=native|harness|direct-llm] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
1690
+ /rlm bench [list|preset=name] [mode=native|harness|direct-llm] [strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
1663
1691
  /rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N]
1664
1692
  /rlm bench validate [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] [--json]
1665
1693
  /rlm bench report [candidate=<id|path|latest>] [baseline=<id|path|previous>] [format=markdown|csv|json] [output=path]
@@ -1687,6 +1715,7 @@ class SlashCommandHandler:
1687
1715
  )
1688
1716
  console.print(
1689
1717
  " [yellow]/rlm bench [list|preset=name] [mode=native|harness|direct-llm] "
1718
+ "[strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] "
1690
1719
  "[pack=path[,path2]] [limit=N] [steps=N] "
1691
1720
  f"[timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] [sub=provider/model][/yellow]"
1692
1721
  )
@@ -2521,6 +2550,9 @@ class SlashCommandHandler:
2521
2550
  environment: str | None = None
2522
2551
  sub_model: str | None = None
2523
2552
  sub_provider: str | None = None
2553
+ include_mcp = False
2554
+ mcp_server: str | None = None
2555
+ harness_strategy = "tool_call"
2524
2556
 
2525
2557
  for token in args[1:]:
2526
2558
  lowered = token.lower()
@@ -2537,6 +2569,19 @@ class SlashCommandHandler:
2537
2569
  )
2538
2570
  return
2539
2571
  mode = resolved_mode
2572
+ elif lowered.startswith("mcp="):
2573
+ value = token.split("=", 1)[1].strip().lower()
2574
+ include_mcp = value not in {"off", "false", "0", "no"}
2575
+ elif lowered.startswith("strategy="):
2576
+ strategy_token = token.split("=", 1)[1].strip().lower().replace("-", "_")
2577
+ if strategy_token not in {"tool_call", "codemode"}:
2578
+ show_error_message(
2579
+ "Invalid strategy value. Use strategy=tool_call|codemode."
2580
+ )
2581
+ return
2582
+ harness_strategy = strategy_token
2583
+ elif lowered.startswith("mcp_server="):
2584
+ mcp_server = token.split("=", 1)[1].strip() or None
2540
2585
  elif lowered.startswith("pack="):
2541
2586
  raw_paths = token.split("=", 1)[1].strip()
2542
2587
  if not raw_paths:
@@ -2593,8 +2638,10 @@ class SlashCommandHandler:
2593
2638
  else:
2594
2639
  show_error_message(
2595
2640
  "Usage: /rlm bench [list|preset=name] [mode=native|harness|direct-llm] "
2641
+ "[strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] "
2596
2642
  "[pack=path[,path2]] [limit=N] "
2597
- f"[steps=N] [timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] [sub=provider/model]\n"
2643
+ f"[steps=N] [timeout=N] [branch=N] [framework={framework_opts}] "
2644
+ "[env=generic|dspy|pure_rlm] [sub=provider/model]\n"
2598
2645
  " /rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] ...\n"
2599
2646
  " /rlm bench validate [candidate=<id|path|latest>] [baseline=<id|path|previous>] ...\n"
2600
2647
  " /rlm bench report [candidate=<id|path|latest>] [baseline=<id|path|previous>] "
@@ -2602,6 +2649,30 @@ class SlashCommandHandler:
2602
2649
  )
2603
2650
  return
2604
2651
 
2652
+ if mode == "harness" and harness_strategy == "codemode" and not include_mcp:
2653
+ show_warning_message("strategy=codemode requires mcp=on. Enabling MCP.")
2654
+ include_mcp = True
2655
+
2656
+ if mode != "harness" and include_mcp:
2657
+ show_warning_message("mcp=on is only used for mode=harness. Ignoring MCP settings.")
2658
+ include_mcp = False
2659
+ mcp_server = None
2660
+ elif mode != "harness" and mcp_server:
2661
+ show_warning_message(
2662
+ "mcp_server is only used for mode=harness with mcp=on. Ignoring."
2663
+ )
2664
+ mcp_server = None
2665
+ elif mode == "harness" and mcp_server and not include_mcp:
2666
+ show_warning_message(
2667
+ "mcp_server provided but mcp=off. MCP server filter will be ignored."
2668
+ )
2669
+ mcp_server = None
2670
+ if mode != "harness" and harness_strategy != "tool_call":
2671
+ show_warning_message(
2672
+ "strategy is only used for mode=harness. Resetting to tool_call."
2673
+ )
2674
+ harness_strategy = "tool_call"
2675
+
2605
2676
  if list_only:
2606
2677
  try:
2607
2678
  rows = self.rlm_runner.benchmark_presets(pack_paths=pack_paths_override)
@@ -2681,6 +2752,11 @@ class SlashCommandHandler:
2681
2752
  if timeout is not None:
2682
2753
  console.print(f" Override timeout: [cyan]{timeout}s[/cyan]")
2683
2754
  console.print(f" Branch width: [cyan]{branch_width}[/cyan]")
2755
+ if mode == "harness":
2756
+ console.print(f" Harness strategy: [cyan]{harness_strategy}[/cyan]")
2757
+ console.print(f" Harness MCP: [cyan]{'on' if include_mcp else 'off'}[/cyan]")
2758
+ if include_mcp and mcp_server:
2759
+ console.print(f" Harness MCP server: [cyan]{mcp_server}[/cyan]")
2684
2760
  if pack_paths_override:
2685
2761
  console.print(f" Benchmark packs: [cyan]{', '.join(pack_paths_override)}[/cyan]")
2686
2762
  if environment:
@@ -2704,6 +2780,9 @@ class SlashCommandHandler:
2704
2780
  branch_width=branch_width,
2705
2781
  sub_model=sub_model,
2706
2782
  sub_provider=sub_provider,
2783
+ include_mcp=include_mcp,
2784
+ mcp_server=mcp_server,
2785
+ harness_strategy=harness_strategy,
2707
2786
  pack_paths=pack_paths_override,
2708
2787
  )
2709
2788
  except ValueError as exc:
@@ -4413,7 +4492,7 @@ class SlashCommandHandler:
4413
4492
 
4414
4493
  [bold magenta]RLM Workflows:[/bold magenta]
4415
4494
  [yellow]/rlm run[/yellow] <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run an RLM coding episode
4416
- [yellow]/rlm bench[/yellow] [list|preset=name] [mode=native|harness|direct-llm] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run benchmark preset
4495
+ [yellow]/rlm bench[/yellow] [list|preset=name] [mode=native|harness|direct-llm] [strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=native|dspy-rlm|adk-rlm|pydantic-ai|google-adk|deepagents] [env=generic|dspy|pure_rlm] [sub=provider/model] - Run benchmark preset
4417
4496
  [yellow]/rlm bench compare[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] - Gate regressions
4418
4497
  [yellow]/rlm bench validate[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] [--json] - CI-style gate output
4419
4498
  [yellow]/rlm bench report[/yellow] [candidate=<id|path|latest>] [baseline=<id|path|previous>] [format=markdown|csv|json] [output=path] - Export compare report
@@ -4431,7 +4510,7 @@ class SlashCommandHandler:
4431
4510
  [yellow]/rlm observability[/yellow] - Show local/MLflow observability sink status
4432
4511
  [yellow]/harness tools[/yellow] [mcp=on|off] - List coding harness tools (local + MCP)
4433
4512
  [yellow]/harness doctor[/yellow] - Show harness tool coverage report
4434
- [yellow]/harness run[/yellow] <task> [steps=N] [mcp=on|off] [tools=name[,name2]] - Run tool-using coding harness
4513
+ [yellow]/harness run[/yellow] <task> [steps=N] [mcp=on|off] [mcp_server=name] [strategy=tool_call|codemode] [tools=name[,name2]] - Run tool-using coding harness
4435
4514
 
4436
4515
  [bold magenta]Optimization (GEPA):[/bold magenta]
4437
4516
  [yellow]/optimize-start[/yellow] [budget] - Start GEPA optimization workflow
@@ -102,7 +102,7 @@ class SandboxAppleContainerConfig:
102
102
  class SandboxConfig:
103
103
  """Execution sandbox runtime configuration."""
104
104
 
105
- runtime: str = "docker" # local | docker | apple-container | daytona | e2b
105
+ runtime: str = "docker" # local | monty | docker | apple-container | daytona | e2b
106
106
  default_timeout_seconds: int = 30
107
107
  memory_limit_mb: int = 512
108
108
  allowed_mount_roots: list[str] = field(