rlm-code 0.1.8__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. {rlm_code-0.1.8 → rlm_code-0.1.9}/.gitignore +1 -0
  2. {rlm_code-0.1.8 → rlm_code-0.1.9}/CHANGELOG.md +12 -0
  3. {rlm_code-0.1.8 → rlm_code-0.1.9}/PKG-INFO +9 -10
  4. {rlm_code-0.1.8 → rlm_code-0.1.9}/README.md +8 -9
  5. {rlm_code-0.1.8 → rlm_code-0.1.9}/pyproject.toml +1 -1
  6. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/__init__.py +1 -1
  7. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/__init__.py +1 -1
  8. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/runner.py +97 -1
  9. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/session_replay.py +34 -6
  10. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/visualizer.py +23 -0
  11. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/tui_app.py +87 -6
  12. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_session_replay.py +56 -0
  13. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_rlm_runner.py +33 -0
  14. {rlm_code-0.1.8 → rlm_code-0.1.9}/LICENSE +0 -0
  15. {rlm_code-0.1.8 → rlm_code-0.1.9}/NOTICE +0 -0
  16. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/__init__.py +0 -0
  17. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/agent.py +0 -0
  18. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/agents/__init__.py +0 -0
  19. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/agents/rlm_agent.py +0 -0
  20. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/callbacks/__init__.py +0 -0
  21. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/callbacks/code_execution.py +0 -0
  22. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/cli.py +0 -0
  23. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/code_executor.py +0 -0
  24. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/events.py +0 -0
  25. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/__init__.py +0 -0
  26. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/base.py +0 -0
  27. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/lazy.py +0 -0
  28. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/loader.py +0 -0
  29. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/parsers/__init__.py +0 -0
  30. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/parsers/base.py +0 -0
  31. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/parsers/pdf.py +0 -0
  32. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/parsers/text.py +0 -0
  33. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/sources/__init__.py +0 -0
  34. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/sources/base.py +0 -0
  35. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/sources/gcs.py +0 -0
  36. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/sources/local.py +0 -0
  37. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/llm.py +0 -0
  38. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/logging/__init__.py +0 -0
  39. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/logging/rlm_logger.py +0 -0
  40. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/logging/verbose.py +0 -0
  41. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/main.py +0 -0
  42. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/prompts.py +0 -0
  43. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/repl/__init__.py +0 -0
  44. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/repl/local_repl.py +0 -0
  45. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/repl/safe_builtins.py +0 -0
  46. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/templates/index.html +0 -0
  47. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/tools/__init__.py +0 -0
  48. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/types.py +0 -0
  49. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/usage.py +0 -0
  50. {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/web.py +0 -0
  51. {rlm_code-0.1.8 → rlm_code-0.1.9}/eval/packs/README.md +0 -0
  52. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/__main__.py +0 -0
  53. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/__init__.py +0 -0
  54. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/config_command.py +0 -0
  55. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/create_command.py +0 -0
  56. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/demo_command.py +0 -0
  57. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/export_command.py +0 -0
  58. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/init_command.py +0 -0
  59. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/interactive_command.py +0 -0
  60. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/mcp_command.py +0 -0
  61. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/models_command.py +0 -0
  62. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/nl_command_router.py +0 -0
  63. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/optimize_command.py +0 -0
  64. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/run_command.py +0 -0
  65. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/slash_commands.py +0 -0
  66. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/__init__.py +0 -0
  67. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/config.py +0 -0
  68. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/debug_logger.py +0 -0
  69. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/directory_utils.py +0 -0
  70. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/exceptions.py +0 -0
  71. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/logging.py +0 -0
  72. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/venv_utils.py +0 -0
  73. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/version_checker.py +0 -0
  74. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/examples/__init__.py +0 -0
  75. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/examples/phase2_demo.py +0 -0
  76. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/examples/phase3_demo.py +0 -0
  77. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/examples/phase4_demo.py +0 -0
  78. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/examples/pure_rlm_demo.py +0 -0
  79. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/execution/__init__.py +0 -0
  80. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/execution/engine.py +0 -0
  81. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/execution/sandbox.py +0 -0
  82. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/export/__init__.py +0 -0
  83. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/export/handler.py +0 -0
  84. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/export/package_builder.py +0 -0
  85. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/generators/evaluation_generator.py +0 -0
  86. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/generators/gepa_generator.py +0 -0
  87. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/harness/__init__.py +0 -0
  88. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/harness/registry.py +0 -0
  89. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/harness/runner.py +0 -0
  90. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/main.py +0 -0
  91. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/client_manager.py +0 -0
  92. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/config.py +0 -0
  93. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/exceptions.py +0 -0
  94. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/retry.py +0 -0
  95. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/server/__init__.py +0 -0
  96. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/server/rlm_server.py +0 -0
  97. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/server/tools.py +0 -0
  98. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/session_wrapper.py +0 -0
  99. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/transports/__init__.py +0 -0
  100. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/transports/factory.py +0 -0
  101. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/transports/sse_transport.py +0 -0
  102. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/transports/stdio_transport.py +0 -0
  103. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/transports/websocket_transport.py +0 -0
  104. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/utils.py +0 -0
  105. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/__init__.py +0 -0
  106. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/cache.py +0 -0
  107. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/code_generator.py +0 -0
  108. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/dspy_reference_loader.py +0 -0
  109. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/llm_connector.py +0 -0
  110. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/model_manager.py +0 -0
  111. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/providers/__init__.py +0 -0
  112. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/providers/acp_discovery.py +0 -0
  113. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/providers/local_discovery.py +0 -0
  114. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/providers/model_catalog.py +0 -0
  115. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/providers/registry.py +0 -0
  116. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/streaming.py +0 -0
  117. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/task_collector.py +0 -0
  118. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/optimization/__init__.py +0 -0
  119. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/optimization/data_collector.py +0 -0
  120. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/optimization/executor.py +0 -0
  121. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/optimization/workflow_manager.py +0 -0
  122. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/project/__init__.py +0 -0
  123. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/project/context_manager.py +0 -0
  124. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/project/dspy_md_generator.py +0 -0
  125. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/project/initializer.py +0 -0
  126. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/project/scanner.py +0 -0
  127. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/py.typed +0 -0
  128. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/__init__.py +0 -0
  129. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/action_planner.py +0 -0
  130. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/approval/__init__.py +0 -0
  131. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/approval/audit.py +0 -0
  132. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/approval/gate.py +0 -0
  133. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/approval/handlers.py +0 -0
  134. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/approval/policy.py +0 -0
  135. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/benchmark_manager.py +0 -0
  136. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/benchmarks.py +0 -0
  137. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/chat_session.py +0 -0
  138. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/code_interpreter.py +0 -0
  139. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/comparison.py +0 -0
  140. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/config_schema.py +0 -0
  141. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/context_store.py +0 -0
  142. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/delegation.py +0 -0
  143. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/docker_interpreter.py +0 -0
  144. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/environments.py +0 -0
  145. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/events.py +0 -0
  146. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/__init__.py +0 -0
  147. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/adk_rlm_adapter.py +0 -0
  148. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/base.py +0 -0
  149. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/deepagents_adapter.py +0 -0
  150. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/dspy_rlm_adapter.py +0 -0
  151. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/google_adk_adapter.py +0 -0
  152. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/pydantic_ai_adapter.py +0 -0
  153. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/registry.py +0 -0
  154. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/leaderboard.py +0 -0
  155. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/memory_compaction.py +0 -0
  156. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/mock_interpreter.py +0 -0
  157. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/monty_interpreter.py +0 -0
  158. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/observability.py +0 -0
  159. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/observability_sinks.py +0 -0
  160. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/policies/__init__.py +0 -0
  161. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/policies/action_policies.py +0 -0
  162. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/policies/base.py +0 -0
  163. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/policies/compaction_policies.py +0 -0
  164. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/policies/registry.py +0 -0
  165. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/policies/reward_policies.py +0 -0
  166. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/policies/termination_policies.py +0 -0
  167. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/pure_rlm_environment.py +0 -0
  168. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/repl_types.py +0 -0
  169. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/__init__.py +0 -0
  170. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/theme.py +0 -0
  171. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/widgets/__init__.py +0 -0
  172. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/widgets/animated.py +0 -0
  173. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/widgets/panels.py +0 -0
  174. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/task_signature.py +0 -0
  175. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/termination.py +0 -0
  176. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/trajectory.py +0 -0
  177. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/__init__.py +0 -0
  178. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/__init__.py +0 -0
  179. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/apple_container_runtime.py +0 -0
  180. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/base.py +0 -0
  181. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/__init__.py +0 -0
  182. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/daytona_runtime.py +0 -0
  183. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/e2b_runtime.py +0 -0
  184. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/modal_runtime.py +0 -0
  185. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/command_runtime.py +0 -0
  186. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/docker_runtime.py +0 -0
  187. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/local_runtime.py +0 -0
  188. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/monty_runtime.py +0 -0
  189. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/registry.py +0 -0
  190. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/superbox.py +0 -0
  191. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/session/__init__.py +0 -0
  192. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/session/state_manager.py +0 -0
  193. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/.env.example +0 -0
  194. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/adapters.py +0 -0
  195. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/async_streaming.py +0 -0
  196. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/complete_programs.py +0 -0
  197. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/dspy_config_example.yaml +0 -0
  198. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/evaluation.py +0 -0
  199. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/industry_templates.py +0 -0
  200. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/optimizers.py +0 -0
  201. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/retrievers.py +0 -0
  202. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/rlm_benchmarks_example.yaml +0 -0
  203. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/tests/__init__.py +0 -0
  204. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/tests/rlm/__init__.py +0 -0
  205. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/tests/rlm/test_phase2.py +0 -0
  206. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/tests/rlm/test_pure_rlm.py +0 -0
  207. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/traces/__init__.py +0 -0
  208. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/traces/index.py +0 -0
  209. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/traces/models.py +0 -0
  210. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/traces/store.py +0 -0
  211. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/__init__.py +0 -0
  212. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/agent_collab_view.py +0 -0
  213. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/animations.py +0 -0
  214. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/conversation.py +0 -0
  215. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/design_system.py +0 -0
  216. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/diff_viewer.py +0 -0
  217. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/notifications.py +0 -0
  218. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/persistent_shell.py +0 -0
  219. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/prompt_widget.py +0 -0
  220. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/prompts.py +0 -0
  221. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/pty_terminal.py +0 -0
  222. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/resizable_divider.py +0 -0
  223. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/thinking_display.py +0 -0
  224. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/tui_utils.py +0 -0
  225. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/welcome.py +0 -0
  226. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/__init__.py +0 -0
  227. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/anti_patterns.py +0 -0
  228. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/auto_fixer.py +0 -0
  229. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/best_practices.py +0 -0
  230. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/code_validator.py +0 -0
  231. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/config_validator.py +0 -0
  232. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/exceptions.py +0 -0
  233. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/input_validator.py +0 -0
  234. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/learning_integration.py +0 -0
  235. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/models.py +0 -0
  236. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/module_validator.py +0 -0
  237. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/predictor_validator.py +0 -0
  238. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/quality_scorer.py +0 -0
  239. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/report_generator.py +0 -0
  240. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/security.py +0 -0
  241. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/security_validator.py +0 -0
  242. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/signature_validator.py +0 -0
  243. {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/validator.py +0 -0
  244. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/__init__.py +0 -0
  245. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/conftest.py +0 -0
  246. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/fixtures/rlm_ci_baseline_generic_smoke.json +0 -0
  247. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_adk_rlm_adapter.py +0 -0
  248. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_code_interpreter.py +0 -0
  249. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_deepagents_adapter.py +0 -0
  250. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_dspy_rlm_adapter.py +0 -0
  251. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_extract_fallback.py +0 -0
  252. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_framework_registry_coverage.py +0 -0
  253. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_google_adk_adapter.py +0 -0
  254. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_leaderboard.py +0 -0
  255. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_mock_interpreter.py +0 -0
  256. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_monty_interpreter.py +0 -0
  257. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_observability_sinks.py +0 -0
  258. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_p0_features.py +0 -0
  259. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_phase3.py +0 -0
  260. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_phase4.py +0 -0
  261. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_pure_rlm_runtime_modes.py +0 -0
  262. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_pydantic_ai_adapter.py +0 -0
  263. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_repl_history.py +0 -0
  264. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_security_hardening.py +0 -0
  265. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_submit.py +0 -0
  266. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_task_signature.py +0 -0
  267. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_user_tools.py +0 -0
  268. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_anti_patterns.py +0 -0
  269. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_auto_fixer.py +0 -0
  270. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_cache.py +0 -0
  271. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_execution_engine.py +0 -0
  272. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_export_import.py +0 -0
  273. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_harness_registry.py +0 -0
  274. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_harness_runner.py +0 -0
  275. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_init_command.py +0 -0
  276. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_integration.py +0 -0
  277. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_learning_integration.py +0 -0
  278. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_mcp_utils.py +0 -0
  279. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_module_validator.py +0 -0
  280. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_optimization_workflow.py +0 -0
  281. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_persistent_shell.py +0 -0
  282. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_predictor_validator.py +0 -0
  283. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_project_scanner.py +0 -0
  284. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_prompt_widget.py +0 -0
  285. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_property_validators.py +0 -0
  286. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_provider_discovery.py +0 -0
  287. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_provider_registry.py +0 -0
  288. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_quality_scorer.py +0 -0
  289. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_report_generator.py +0 -0
  290. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_retry.py +0 -0
  291. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_rlm_config.py +0 -0
  292. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_rlm_dspy_environment.py +0 -0
  293. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_rlm_observability.py +0 -0
  294. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_sandbox_runtimes.py +0 -0
  295. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_security_validator.py +0 -0
  296. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_session_management.py +0 -0
  297. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_signature_validator.py +0 -0
  298. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_slash_harness_command.py +0 -0
  299. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_slash_rlm_command.py +0 -0
  300. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_slash_sandbox_command.py +0 -0
  301. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_streaming.py +0 -0
  302. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_superbox.py +0 -0
  303. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_trace_analysis.py +0 -0
  304. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_tui_utils.py +0 -0
  305. {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_validation.py +0 -0
@@ -153,6 +153,7 @@ cython_debug/
153
153
 
154
154
  # Project specific
155
155
  dspy_config.yaml
156
+ rlm_config.yaml
156
157
  *.log
157
158
 
158
159
  # Internal workspace data directories (all data in CWD)
@@ -5,6 +5,17 @@ All notable changes to this project are documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.1.9] - 2026-06-26
9
+
10
+ ### Added
11
+ - Pure RLM runner context initialization from explicit workspace file references in the task, with compact repository snapshot fallback.
12
+ - Context-load events for Pure RLM runs, including loaded file names and total context characters.
13
+ - Runner JSONL replay coverage for action code, observations, success state, token counts, and cumulative reward.
14
+
15
+ ### Changed
16
+ - TUI trajectory and replay views now show Pure RLM signals including REPL code, stdout/stderr previews, `llm_query` counts, executed code blocks, finalization status, and REPL variables.
17
+ - Run visualization now includes richer Pure RLM previews for completed runs.
18
+
8
19
  ## [0.1.8] - 2026-05-01
9
20
 
10
21
  ### Added
@@ -76,5 +87,6 @@ Initial public release of **RLM Code**.
76
87
 
77
88
  [0.1.5]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.5
78
89
  [0.1.6]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.6
90
+ [0.1.9]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.9
79
91
  [0.1.8]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.8
80
92
  [0.1.7]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.7
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlm-code
3
- Version: 0.1.8
3
+ Version: 0.1.9
4
4
  Summary: RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems
5
5
  Project-URL: Homepage, https://github.com/SuperagenticAI/rlm-code
6
6
  Project-URL: Documentation, https://superagenticai.github.io/rlm-code/
@@ -118,21 +118,20 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
118
118
 
119
119
  RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
120
120
 
121
- ## Release v0.1.8
121
+ ## Release v0.1.9
122
122
 
123
- This release extends HALO/AHE-style trace analysis with layered evidence export.
123
+ This release improves Pure RLM repository runs and makes completed trajectories more inspectable from the TUI and replay views.
124
124
 
125
- - New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
126
- - Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
127
- - AHE-style evidence corpus export with `overview.md`, per-trace detail reports, `index.json`, and optional processed raw JSONL spans
128
- - Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
129
- - `/rlm` help/docs updated for `env=trace_analysis`
130
- - Dedicated trace analysis docs under the Core Engine section
125
+ - Pure RLM runs now initialize `context` from explicit workspace files mentioned in the task, with a compact repository snapshot fallback
126
+ - Runner events now record context-load metadata for Pure RLM runs
127
+ - Legacy runner JSONL step events replay with action code, observations, success, token counts, and cumulative reward
128
+ - Run visualization now includes REPL code previews, stdout/stderr previews, `llm_query` counts, executed code blocks, finalization status, and REPL variables
129
+ - TUI trajectory and replay views now surface Pure RLM signals directly for completed runs
131
130
 
132
131
  Example:
133
132
 
134
133
  ```text
135
- /rlm run "Find systemic harness failures trace=./traces.jsonl" env=trace_analysis steps=6
134
+ /rlm run "Validate pure_rlm_environment.py and cite context, REPL, llm_query, and FINAL evidence" env=pure_rlm steps=6
136
135
  ```
137
136
 
138
137
  ## Documentation
@@ -25,21 +25,20 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
25
25
 
26
26
  RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
27
27
 
28
- ## Release v0.1.8
28
+ ## Release v0.1.9
29
29
 
30
- This release extends HALO/AHE-style trace analysis with layered evidence export.
30
+ This release improves Pure RLM repository runs and makes completed trajectories more inspectable from the TUI and replay views.
31
31
 
32
- - New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
33
- - Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
34
- - AHE-style evidence corpus export with `overview.md`, per-trace detail reports, `index.json`, and optional processed raw JSONL spans
35
- - Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
36
- - `/rlm` help/docs updated for `env=trace_analysis`
37
- - Dedicated trace analysis docs under the Core Engine section
32
+ - Pure RLM runs now initialize `context` from explicit workspace files mentioned in the task, with a compact repository snapshot fallback
33
+ - Runner events now record context-load metadata for Pure RLM runs
34
+ - Legacy runner JSONL step events replay with action code, observations, success, token counts, and cumulative reward
35
+ - Run visualization now includes REPL code previews, stdout/stderr previews, `llm_query` counts, executed code blocks, finalization status, and REPL variables
36
+ - TUI trajectory and replay views now surface Pure RLM signals directly for completed runs
38
37
 
39
38
  Example:
40
39
 
41
40
  ```text
42
- /rlm run "Find systemic harness failures trace=./traces.jsonl" env=trace_analysis steps=6
41
+ /rlm run "Validate pure_rlm_environment.py and cite context, REPL, llm_query, and FINAL evidence" env=pure_rlm steps=6
43
42
  ```
44
43
 
45
44
  ## Documentation
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "rlm-code"
7
- version = "0.1.8"
7
+ version = "0.1.9"
8
8
  description = "RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems"
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -5,5 +5,5 @@ This package provides tools for creating, managing, and optimizing DSPy componen
5
5
  through natural language interactions.
6
6
  """
7
7
 
8
- __version__ = "0.1.8"
8
+ __version__ = "0.1.9"
9
9
  __author__ = "Super Agentic AI"
@@ -17,7 +17,7 @@ from .exceptions import (
17
17
  )
18
18
  from .session_wrapper import MCPSessionWrapper
19
19
 
20
- __version__ = "0.1.8"
20
+ __version__ = "0.1.9"
21
21
 
22
22
  __all__ = [
23
23
  "MCPClientManager",
@@ -9,6 +9,7 @@ from __future__ import annotations
9
9
 
10
10
  import hashlib
11
11
  import json
12
+ import re
12
13
  import threading
13
14
  import time
14
15
  from dataclasses import asdict, dataclass, is_dataclass
@@ -29,7 +30,7 @@ from .benchmark_manager import (
29
30
  )
30
31
  from .benchmarks import RLMBenchmarkCase, load_benchmark_packs
31
32
  from .chat_session import ChatSessionMixin
32
- from .context_store import LazyFileContext
33
+ from .context_store import ContextRef, LazyFileContext
33
34
  from .delegation import DelegationMixin
34
35
  from .environments import (
35
36
  DSPyCodingRLMEnvironment,
@@ -467,6 +468,93 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
467
468
  allow_unsafe_exec=(selected_backend == "exec" and self._pure_rlm_allow_unsafe_exec),
468
469
  )
469
470
 
471
+ def _extract_task_file_refs(self, task: str, limit: int = 12) -> list[ContextRef]:
472
+ """Find explicit workspace file references mentioned in a task string."""
473
+ candidates = re.findall(
474
+ r"(?<![\w.-])(?:[\w.-]+/)*[\w.-]+\.(?:py|md|toml|yaml|yml|json|txt|js|jsx|ts|tsx)",
475
+ task,
476
+ )
477
+ seen: set[str] = set()
478
+ refs: list[ContextRef] = []
479
+ for candidate in candidates:
480
+ normalized = candidate.strip().strip("`'\".,:;)")
481
+ if not normalized or normalized in seen:
482
+ continue
483
+ seen.add(normalized)
484
+ refs.append(ContextRef(path=normalized))
485
+ if len(refs) >= limit:
486
+ break
487
+ return refs
488
+
489
+ def _build_pure_rlm_initial_context(self, task: str) -> dict[str, str]:
490
+ """
491
+ Build a small real-code context for Pure RLM runs.
492
+
493
+ The direct PureRLMEnvironment API expects context to be initialized
494
+ explicitly. Runner/TUI users expect `/rlm run ... env=pure_rlm` to
495
+ start with useful workspace data, so we seed `context` with explicit
496
+ files named in the task, falling back to a compact repository snapshot.
497
+ """
498
+ refs = self._extract_task_file_refs(task)
499
+ if not refs:
500
+ refs = self.context_store.discover(limit=12)
501
+
502
+ context: dict[str, str] = {}
503
+ for ref in refs:
504
+ snippet = self.context_store.read(ref, max_chars=12000)
505
+ if snippet:
506
+ context[ref.path] = snippet
507
+
508
+ if context:
509
+ return context
510
+
511
+ discovered = self.context_store.discover(limit=80)
512
+ tree = "\n".join(ref.path for ref in discovered)
513
+ return {
514
+ "_workspace": (
515
+ f"Workspace: {self.workdir}\n"
516
+ "No explicit file snippets were loaded. Available files:\n"
517
+ f"{tree}"
518
+ ).strip()
519
+ }
520
+
521
+ def _initialize_pure_rlm_run_context(
522
+ self,
523
+ env: RLMEnvironment,
524
+ task: str,
525
+ *,
526
+ run_id: str,
527
+ run_path: Path,
528
+ ) -> int:
529
+ """Initialize `context` for Pure RLM runs and persist a context event."""
530
+ if env.name != "pure_rlm" or not hasattr(env, "initialize_context"):
531
+ return 0
532
+
533
+ context = self._build_pure_rlm_initial_context(task)
534
+ env.initialize_context(
535
+ context,
536
+ description="Workspace files selected for this Pure RLM run",
537
+ additional_vars={"query": task},
538
+ )
539
+ context_event = {
540
+ "type": "context",
541
+ "run_id": run_id,
542
+ "environment": env.name,
543
+ "timestamp": self._utc_now(),
544
+ "context_files": list(context.keys()),
545
+ "context_chars": sum(len(value) for value in context.values()),
546
+ }
547
+ self._append_event(run_path, context_event)
548
+ self._emit_runtime_event(
549
+ "context_load",
550
+ {
551
+ "run_id": run_id,
552
+ "files": len(context),
553
+ "chars": context_event["context_chars"],
554
+ },
555
+ )
556
+ return len(context)
557
+
470
558
  def run_task(
471
559
  self,
472
560
  task: str,
@@ -596,6 +684,12 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
596
684
  final_response = ""
597
685
  cancelled = False
598
686
  trajectory: list[dict[str, Any]] = []
687
+ context_files = self._initialize_pure_rlm_run_context(
688
+ env,
689
+ cleaned_task,
690
+ run_id=run_id,
691
+ run_path=run_path,
692
+ )
599
693
  usage_start = self._usage_snapshot()
600
694
  self.observability.on_run_start(
601
695
  run_id,
@@ -616,6 +710,7 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
616
710
  "parent_run_id": _parent_run_id,
617
711
  "pure_rlm_backend": self._pure_rlm_backend if env.name == "pure_rlm" else None,
618
712
  "pure_rlm_strict": strict_pure_mode if env.name == "pure_rlm" else None,
713
+ "context_files": context_files if env.name == "pure_rlm" else None,
619
714
  },
620
715
  )
621
716
  self._emit_runtime_event(
@@ -627,6 +722,7 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
627
722
  "framework": native_framework,
628
723
  "depth": _depth,
629
724
  "parent_run_id": _parent_run_id,
725
+ "context_files": context_files if env.name == "pure_rlm" else None,
630
726
  },
631
727
  )
632
728
 
@@ -1035,14 +1035,30 @@ def _convert_legacy_step(data: dict[str, Any]) -> SessionEvent:
1035
1035
  step_type = data.get("type", "")
1036
1036
 
1037
1037
  if step_type == "step":
1038
+ observation = data.get("observation", {})
1039
+ observation_dict = observation if isinstance(observation, dict) else {}
1040
+ action = data.get("action", {})
1041
+ action_dict = action if isinstance(action, dict) else {}
1042
+ success = observation_dict.get("success")
1043
+ if success is None:
1044
+ success = not bool(observation_dict.get("error") or observation_dict.get("stderr"))
1045
+ usage = data.get("usage", {})
1046
+ usage_dict = usage if isinstance(usage, dict) else {}
1038
1047
  return SessionEvent(
1039
1048
  event_type=SessionEventType.STEP_END,
1040
1049
  timestamp=data.get("timestamp", _utc_now()),
1041
- step=data.get("step", 0),
1050
+ step=int(data.get("step", 0) or 0),
1042
1051
  data={
1043
- "action": data.get("action", {}),
1044
- "observation": data.get("observation", {}),
1052
+ "step": int(data.get("step", 0) or 0),
1053
+ "timestamp": data.get("timestamp", _utc_now()),
1054
+ "action": action_dict,
1055
+ "observation": observation_dict,
1045
1056
  "reward": data.get("reward", 0.0),
1057
+ "success": bool(success),
1058
+ "tokens_used": int(
1059
+ usage_dict.get("prompt_tokens", 0) or 0
1060
+ )
1061
+ + int(usage_dict.get("completion_tokens", 0) or 0),
1046
1062
  },
1047
1063
  run_id=data.get("run_id", ""),
1048
1064
  depth=data.get("depth", 0),
@@ -1125,12 +1141,18 @@ def _build_snapshot_from_events(
1125
1141
 
1126
1142
  elif event.event_type == SessionEventType.STEP_END:
1127
1143
  # Build StepState from accumulated data
1144
+ if "step" not in current_step_data:
1145
+ current_step_data = {
1146
+ "step": int(event.data.get("step", event.step) or 0),
1147
+ "timestamp": str(event.data.get("timestamp", event.timestamp) or ""),
1148
+ }
1128
1149
  if "step" in current_step_data:
1129
1150
  # Merge any additional data from STEP_END event
1130
1151
  if "action" in event.data:
1131
1152
  action = event.data["action"]
1132
1153
  current_step_data.setdefault("action_type", action.get("action", ""))
1133
1154
  current_step_data.setdefault("action_code", action.get("code", ""))
1155
+ current_step_data.setdefault("action_rationale", action.get("reasoning", ""))
1134
1156
  current_step_data.setdefault("raw_action", action)
1135
1157
  if "observation" in event.data:
1136
1158
  obs = event.data["observation"]
@@ -1138,12 +1160,16 @@ def _build_snapshot_from_events(
1138
1160
  current_step_data.setdefault("error", obs.get("error", obs.get("stderr", "")))
1139
1161
  current_step_data.setdefault("raw_observation", obs)
1140
1162
  if "reward" in event.data:
1163
+ reward = float(event.data.get("reward", 0.0) or 0.0)
1164
+ cumulative = event.data.get("cumulative_reward")
1165
+ if cumulative is None:
1166
+ cumulative = total_reward + reward
1141
1167
  current_step_data.setdefault("reward", event.data["reward"])
1142
- current_step_data.setdefault(
1143
- "cumulative_reward", event.data.get("cumulative_reward", 0.0)
1144
- )
1168
+ current_step_data.setdefault("cumulative_reward", cumulative)
1145
1169
  if "success" in event.data:
1146
1170
  current_step_data.setdefault("success", event.data["success"])
1171
+ if "tokens_used" in event.data:
1172
+ current_step_data.setdefault("tokens_used", event.data["tokens_used"])
1147
1173
 
1148
1174
  step_state = StepState(
1149
1175
  step=current_step_data.get("step", 0),
@@ -1163,6 +1189,8 @@ def _build_snapshot_from_events(
1163
1189
  raw_observation=current_step_data.get("raw_observation", {}),
1164
1190
  )
1165
1191
  steps.append(step_state)
1192
+ total_reward = float(step_state.cumulative_reward)
1193
+ total_tokens += int(step_state.tokens_used or 0)
1166
1194
  current_step_data = {}
1167
1195
 
1168
1196
  elif event.event_type == SessionEventType.MEMORY_UPDATE:
@@ -62,6 +62,16 @@ def build_run_visualization(
62
62
  "success": observation_dict.get("success") if "success" in observation_dict else None,
63
63
  "path": str(observation_dict.get("path") or ""),
64
64
  "children_executed": int(observation_dict.get("children_executed") or 0),
65
+ "planner_preview": _clip_text(str(step.get("planner_raw") or ""), limit=260),
66
+ "code_preview": _clip_text(_action_code(step), limit=260),
67
+ "stdout_preview": _clip_text(str(observation_dict.get("stdout") or ""), limit=260),
68
+ "stderr_preview": _clip_text(str(observation_dict.get("stderr") or ""), limit=180),
69
+ "llm_calls_made": int(observation_dict.get("llm_calls_made") or 0),
70
+ "code_blocks_executed": int(observation_dict.get("code_blocks_executed") or 0),
71
+ "final_detected": bool(observation_dict.get("final_detected", False)),
72
+ "repl_variables": list(observation_dict.get("repl_variables") or [])[:20]
73
+ if isinstance(observation_dict.get("repl_variables"), list)
74
+ else [],
65
75
  }
66
76
  error = _extract_error(step)
67
77
  if error:
@@ -190,6 +200,19 @@ def _action_name(step: dict[str, Any]) -> str:
190
200
  return "unknown"
191
201
 
192
202
 
203
+ def _action_code(step: dict[str, Any]) -> str:
204
+ action = step.get("action")
205
+ if not isinstance(action, dict):
206
+ return ""
207
+ code = action.get("code")
208
+ if isinstance(code, str) and code.strip():
209
+ return code
210
+ blocks = action.get("_code_blocks")
211
+ if isinstance(blocks, list):
212
+ return "\n\n".join(str(block) for block in blocks if str(block).strip())
213
+ return ""
214
+
215
+
193
216
  def _extract_error(step: dict[str, Any]) -> str:
194
217
  observation = step.get("observation")
195
218
  if not isinstance(observation, dict):
@@ -2403,14 +2403,40 @@ def run_textual_tui(config_manager: ConfigManager) -> None:
2403
2403
  if not timeline:
2404
2404
  target.update("[dim]No steps recorded in this run.[/dim]")
2405
2405
  return
2406
- lines = ["[bold cyan]Step Action Reward Success[/bold cyan]"]
2406
+ lines = [
2407
+ f"[bold cyan]Trajectory[/bold cyan] [dim]{viz.get('run_id', '')}[/dim]",
2408
+ "[bold cyan]Step Action Reward Success RLM signals[/bold cyan]",
2409
+ ]
2407
2410
  for entry in timeline:
2408
2411
  step = entry.get("step", "?")
2409
2412
  action = str(entry.get("action", "?"))[:14].ljust(14)
2410
2413
  reward = entry.get("reward", 0.0)
2411
2414
  cum = entry.get("cumulative_reward", 0.0)
2412
- ok = "[green]Y[/green]" if entry.get("success") else "[red]N[/red]"
2413
- lines.append(f" {step:<4} {action} {reward:+.3f} ({cum:.3f}) {ok}")
2415
+ success = entry.get("success")
2416
+ if success is None:
2417
+ ok = "[dim]-[/dim]"
2418
+ else:
2419
+ ok = "[green]Y[/green]" if success else "[red]N[/red]"
2420
+ signals: list[str] = []
2421
+ if entry.get("code_blocks_executed"):
2422
+ signals.append(f"code={entry.get('code_blocks_executed')}")
2423
+ if entry.get("llm_calls_made"):
2424
+ signals.append(f"llm={entry.get('llm_calls_made')}")
2425
+ if entry.get("final_detected"):
2426
+ signals.append("[green]FINAL[/green]")
2427
+ variables = entry.get("repl_variables") or []
2428
+ if variables:
2429
+ preview_vars = ", ".join(str(item) for item in variables[:5])
2430
+ signals.append(f"vars={preview_vars}")
2431
+ signal_text = " ".join(signals) if signals else "[dim]-[/dim]"
2432
+ lines.append(f" {step:<4} {action} {reward:+.3f} ({cum:.3f}) {ok} {signal_text}")
2433
+
2434
+ code_preview = str(entry.get("code_preview") or "").strip()
2435
+ stdout_preview = str(entry.get("stdout_preview") or "").strip()
2436
+ if code_preview:
2437
+ lines.append(f" [magenta]code[/magenta] {code_preview}")
2438
+ if stdout_preview:
2439
+ lines.append(f" [blue]out [/blue] {stdout_preview}")
2414
2440
  target.update("\n".join(lines))
2415
2441
 
2416
2442
  def _apply_view_mode(self) -> None:
@@ -2842,21 +2868,76 @@ def run_textual_tui(config_manager: ConfigManager) -> None:
2842
2868
  if self._session_replayer is None:
2843
2869
  return
2844
2870
  try:
2871
+ state = None
2845
2872
  if button_id == "replay_start_btn":
2846
2873
  self._session_replayer.goto_start()
2847
2874
  elif button_id == "replay_back_btn":
2848
- self._session_replayer.step_backward()
2875
+ state = self._session_replayer.step_backward()
2849
2876
  elif button_id == "replay_fwd_btn":
2850
- self._session_replayer.step_forward()
2877
+ state = self._session_replayer.step_forward()
2851
2878
  elif button_id == "replay_end_btn":
2852
2879
  self._session_replayer.goto_end()
2880
+ state = self._session_replayer.get_current_state()
2853
2881
  # Update position display
2854
2882
  cur = self._session_replayer.current_step
2855
2883
  total = self._session_replayer.total_steps
2856
2884
  self.query_one("#replay_position", Static).update(f"Step {cur}/{total}")
2885
+ if state is None:
2886
+ state = self._session_replayer.get_current_state()
2887
+ self._render_replay_step_detail(state)
2857
2888
  except Exception:
2858
2889
  pass
2859
2890
 
2891
+ def _render_replay_step_detail(self, state: Any | None) -> None:
2892
+ """Render the current replay step with pure-RLM-specific details."""
2893
+ try:
2894
+ target = self.query_one("#replay_step_detail", Static)
2895
+ except Exception:
2896
+ return
2897
+ if state is None:
2898
+ target.update("[dim]Replay is at the start or end of the run.[/dim]")
2899
+ return
2900
+
2901
+ raw_observation = getattr(state, "raw_observation", {}) or {}
2902
+ raw_action = getattr(state, "raw_action", {}) or {}
2903
+ lines = [
2904
+ f"[bold cyan]Step {getattr(state, 'step', '?')}[/bold cyan] "
2905
+ f"action=[bold]{getattr(state, 'action_type', '') or raw_action.get('action', '')}[/bold] "
2906
+ f"reward={float(getattr(state, 'reward', 0.0) or 0.0):+.3f}",
2907
+ ]
2908
+ code = str(getattr(state, "action_code", "") or raw_action.get("code", "") or "").strip()
2909
+ if code:
2910
+ lines.append("")
2911
+ lines.append("[magenta]REPL code[/magenta]")
2912
+ lines.append(code[:1800])
2913
+
2914
+ stdout = str(getattr(state, "output", "") or raw_observation.get("stdout", "") or "").strip()
2915
+ stderr = str(getattr(state, "error", "") or raw_observation.get("stderr", "") or "").strip()
2916
+ if stdout:
2917
+ lines.append("")
2918
+ lines.append("[blue]Observation stdout[/blue]")
2919
+ lines.append(stdout[:1800])
2920
+ if stderr:
2921
+ lines.append("")
2922
+ lines.append("[red]Observation stderr[/red]")
2923
+ lines.append(stderr[:1000])
2924
+
2925
+ signals: list[str] = []
2926
+ if raw_observation.get("code_blocks_executed"):
2927
+ signals.append(f"code_blocks={raw_observation.get('code_blocks_executed')}")
2928
+ if raw_observation.get("llm_calls_made"):
2929
+ signals.append(f"llm_calls={raw_observation.get('llm_calls_made')}")
2930
+ if raw_observation.get("final_detected"):
2931
+ signals.append("FINAL detected")
2932
+ variables = raw_observation.get("repl_variables")
2933
+ if isinstance(variables, list) and variables:
2934
+ signals.append("vars=" + ", ".join(str(item) for item in variables[:12]))
2935
+ if signals:
2936
+ lines.append("")
2937
+ lines.append("[green]RLM signals[/green] " + " ".join(signals))
2938
+
2939
+ target.update("\n".join(lines))
2940
+
2860
2941
  def _refresh_research_dashboard(self, run_path: Path) -> None:
2861
2942
  """Populate the Research dashboard from a completed run trace."""
2862
2943
  try:
@@ -2904,7 +2985,7 @@ def run_textual_tui(config_manager: ConfigManager) -> None:
2904
2985
  chart.values = [pt.get("cumulative_reward", 0.0) for pt in reward_curve]
2905
2986
 
2906
2987
  self.query_one("#replay_step_detail", Static).update(
2907
- "[dim]Use < > buttons to step through the run.[/dim]"
2988
+ "[dim]Use < > buttons to step through the run. Each step will show REPL code, observations, and pure-RLM signals.[/dim]"
2908
2989
  )
2909
2990
  self._set_research_sub_view("replay")
2910
2991
  except Exception as exc:
@@ -761,6 +761,62 @@ class TestLoadSession:
761
761
  replayer = load_session(jsonl_path)
762
762
  assert replayer.total_steps >= 1
763
763
 
764
+ def test_load_runner_jsonl_step_events(self):
765
+ """Runner JSONL step/final events should replay with useful state."""
766
+ with tempfile.TemporaryDirectory() as tmpdir:
767
+ jsonl_path = Path(tmpdir) / "runner.jsonl"
768
+ events = [
769
+ {
770
+ "type": "step",
771
+ "run_id": "run_demo",
772
+ "environment": "pure_rlm",
773
+ "task": "Validate pure RLM",
774
+ "timestamp": "2026-06-25T10:00:01+00:00",
775
+ "step": 1,
776
+ "action": {
777
+ "action": "run_repl",
778
+ "code": "print(context.keys())",
779
+ "reasoning": "Inspect context",
780
+ },
781
+ "observation": {
782
+ "success": True,
783
+ "stdout": "dict_keys(['a.py'])",
784
+ "llm_calls_made": 1,
785
+ "code_blocks_executed": 1,
786
+ "repl_variables": ["context", "answer"],
787
+ },
788
+ "reward": 0.4,
789
+ "usage": {"prompt_tokens": 10, "completion_tokens": 5},
790
+ },
791
+ {
792
+ "type": "final",
793
+ "run_id": "run_demo",
794
+ "environment": "pure_rlm",
795
+ "task": "Validate pure RLM",
796
+ "timestamp": "2026-06-25T10:00:02+00:00",
797
+ "completed": True,
798
+ "steps": 1,
799
+ "total_reward": 0.4,
800
+ "final_response": "Yes",
801
+ "usage": {"prompt_tokens": 10, "completion_tokens": 5},
802
+ },
803
+ ]
804
+ with jsonl_path.open("w") as f:
805
+ for event in events:
806
+ f.write(json.dumps(event) + "\n")
807
+
808
+ replayer = load_session(jsonl_path)
809
+
810
+ assert replayer.total_steps == 1
811
+ assert replayer.snapshot.completed is True
812
+ assert replayer.snapshot.final_answer == "Yes"
813
+ step = replayer.step_forward()
814
+ assert step is not None
815
+ assert step.action_type == "run_repl"
816
+ assert step.action_code == "print(context.keys())"
817
+ assert step.output == "dict_keys(['a.py'])"
818
+ assert step.raw_observation["llm_calls_made"] == 1
819
+
764
820
 
765
821
  class TestCreateRecorder:
766
822
  """Tests for create_recorder convenience function."""
@@ -1414,6 +1414,39 @@ def test_rlm_pure_strict_blocks_delegate_actions(tmp_path):
1414
1414
  assert "delegate action is disabled" in str(observation)
1415
1415
 
1416
1416
 
1417
+ def test_rlm_pure_run_initializes_context_from_task_files(tmp_path):
1418
+ source = tmp_path / "demo_module.py"
1419
+ source.write_text("VALUE = 42\n", encoding="utf-8")
1420
+ connector = _FakeConnector(
1421
+ responses=[
1422
+ '```repl\nfinal_answer = list(context.keys())\nFINAL_VAR("final_answer")\n```',
1423
+ ]
1424
+ )
1425
+ runner = RLMRunner(
1426
+ llm_connector=connector,
1427
+ execution_engine=_ConfigurableExecutionEngine(pure_rlm_backend="exec"),
1428
+ run_dir=tmp_path / "runs",
1429
+ workdir=tmp_path,
1430
+ )
1431
+
1432
+ result = runner.run_task(
1433
+ "Inspect demo_module.py",
1434
+ max_steps=1,
1435
+ exec_timeout=5,
1436
+ environment="pure_rlm",
1437
+ )
1438
+
1439
+ assert result.completed is True
1440
+ assert "demo_module.py" in result.final_response
1441
+ events = runner.load_run_events(result.run_id)
1442
+ context_event = next(event for event in events if event.get("type") == "context")
1443
+ assert context_event["context_files"] == ["demo_module.py"]
1444
+ step_event = next(event for event in events if event.get("type") == "step")
1445
+ observation = step_event.get("observation", {})
1446
+ assert observation.get("final_detected") is True
1447
+ assert "context" in observation.get("repl_variables", [])
1448
+
1449
+
1417
1450
  def test_rlm_runner_blocks_exec_without_unsafe_opt_in(tmp_path):
1418
1451
  engine = _ConfigurableExecutionEngine(
1419
1452
  pure_rlm_backend="exec",
File without changes
File without changes
File without changes
File without changes
File without changes