rlm-code 0.1.7__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. {rlm_code-0.1.7 → rlm_code-0.1.9}/.gitignore +1 -0
  2. {rlm_code-0.1.7 → rlm_code-0.1.9}/CHANGELOG.md +20 -0
  3. {rlm_code-0.1.7 → rlm_code-0.1.9}/PKG-INFO +9 -9
  4. {rlm_code-0.1.7 → rlm_code-0.1.9}/README.md +8 -8
  5. {rlm_code-0.1.7 → rlm_code-0.1.9}/pyproject.toml +1 -1
  6. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/__init__.py +1 -1
  7. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/__init__.py +1 -1
  8. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/environments.py +32 -1
  9. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/runner.py +97 -1
  10. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/session_replay.py +34 -6
  11. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/visualizer.py +23 -0
  12. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/traces/store.py +226 -0
  13. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/tui_app.py +87 -6
  14. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_session_replay.py +56 -0
  15. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_rlm_runner.py +33 -0
  16. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_trace_analysis.py +29 -0
  17. {rlm_code-0.1.7 → rlm_code-0.1.9}/LICENSE +0 -0
  18. {rlm_code-0.1.7 → rlm_code-0.1.9}/NOTICE +0 -0
  19. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/__init__.py +0 -0
  20. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/agent.py +0 -0
  21. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/agents/__init__.py +0 -0
  22. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/agents/rlm_agent.py +0 -0
  23. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/callbacks/__init__.py +0 -0
  24. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/callbacks/code_execution.py +0 -0
  25. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/cli.py +0 -0
  26. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/code_executor.py +0 -0
  27. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/events.py +0 -0
  28. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/__init__.py +0 -0
  29. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/base.py +0 -0
  30. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/lazy.py +0 -0
  31. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/loader.py +0 -0
  32. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/parsers/__init__.py +0 -0
  33. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/parsers/base.py +0 -0
  34. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/parsers/pdf.py +0 -0
  35. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/parsers/text.py +0 -0
  36. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/sources/__init__.py +0 -0
  37. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/sources/base.py +0 -0
  38. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/sources/gcs.py +0 -0
  39. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/sources/local.py +0 -0
  40. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/llm.py +0 -0
  41. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/logging/__init__.py +0 -0
  42. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/logging/rlm_logger.py +0 -0
  43. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/logging/verbose.py +0 -0
  44. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/main.py +0 -0
  45. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/prompts.py +0 -0
  46. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/repl/__init__.py +0 -0
  47. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/repl/local_repl.py +0 -0
  48. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/repl/safe_builtins.py +0 -0
  49. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/templates/index.html +0 -0
  50. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/tools/__init__.py +0 -0
  51. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/types.py +0 -0
  52. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/usage.py +0 -0
  53. {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/web.py +0 -0
  54. {rlm_code-0.1.7 → rlm_code-0.1.9}/eval/packs/README.md +0 -0
  55. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/__main__.py +0 -0
  56. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/__init__.py +0 -0
  57. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/config_command.py +0 -0
  58. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/create_command.py +0 -0
  59. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/demo_command.py +0 -0
  60. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/export_command.py +0 -0
  61. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/init_command.py +0 -0
  62. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/interactive_command.py +0 -0
  63. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/mcp_command.py +0 -0
  64. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/models_command.py +0 -0
  65. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/nl_command_router.py +0 -0
  66. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/optimize_command.py +0 -0
  67. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/run_command.py +0 -0
  68. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/slash_commands.py +0 -0
  69. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/__init__.py +0 -0
  70. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/config.py +0 -0
  71. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/debug_logger.py +0 -0
  72. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/directory_utils.py +0 -0
  73. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/exceptions.py +0 -0
  74. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/logging.py +0 -0
  75. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/venv_utils.py +0 -0
  76. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/version_checker.py +0 -0
  77. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/examples/__init__.py +0 -0
  78. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/examples/phase2_demo.py +0 -0
  79. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/examples/phase3_demo.py +0 -0
  80. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/examples/phase4_demo.py +0 -0
  81. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/examples/pure_rlm_demo.py +0 -0
  82. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/execution/__init__.py +0 -0
  83. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/execution/engine.py +0 -0
  84. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/execution/sandbox.py +0 -0
  85. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/export/__init__.py +0 -0
  86. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/export/handler.py +0 -0
  87. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/export/package_builder.py +0 -0
  88. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/generators/evaluation_generator.py +0 -0
  89. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/generators/gepa_generator.py +0 -0
  90. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/harness/__init__.py +0 -0
  91. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/harness/registry.py +0 -0
  92. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/harness/runner.py +0 -0
  93. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/main.py +0 -0
  94. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/client_manager.py +0 -0
  95. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/config.py +0 -0
  96. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/exceptions.py +0 -0
  97. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/retry.py +0 -0
  98. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/server/__init__.py +0 -0
  99. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/server/rlm_server.py +0 -0
  100. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/server/tools.py +0 -0
  101. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/session_wrapper.py +0 -0
  102. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/transports/__init__.py +0 -0
  103. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/transports/factory.py +0 -0
  104. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/transports/sse_transport.py +0 -0
  105. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/transports/stdio_transport.py +0 -0
  106. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/transports/websocket_transport.py +0 -0
  107. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/utils.py +0 -0
  108. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/__init__.py +0 -0
  109. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/cache.py +0 -0
  110. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/code_generator.py +0 -0
  111. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/dspy_reference_loader.py +0 -0
  112. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/llm_connector.py +0 -0
  113. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/model_manager.py +0 -0
  114. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/providers/__init__.py +0 -0
  115. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/providers/acp_discovery.py +0 -0
  116. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/providers/local_discovery.py +0 -0
  117. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/providers/model_catalog.py +0 -0
  118. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/providers/registry.py +0 -0
  119. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/streaming.py +0 -0
  120. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/task_collector.py +0 -0
  121. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/optimization/__init__.py +0 -0
  122. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/optimization/data_collector.py +0 -0
  123. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/optimization/executor.py +0 -0
  124. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/optimization/workflow_manager.py +0 -0
  125. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/project/__init__.py +0 -0
  126. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/project/context_manager.py +0 -0
  127. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/project/dspy_md_generator.py +0 -0
  128. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/project/initializer.py +0 -0
  129. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/project/scanner.py +0 -0
  130. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/py.typed +0 -0
  131. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/__init__.py +0 -0
  132. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/action_planner.py +0 -0
  133. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/approval/__init__.py +0 -0
  134. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/approval/audit.py +0 -0
  135. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/approval/gate.py +0 -0
  136. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/approval/handlers.py +0 -0
  137. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/approval/policy.py +0 -0
  138. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/benchmark_manager.py +0 -0
  139. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/benchmarks.py +0 -0
  140. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/chat_session.py +0 -0
  141. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/code_interpreter.py +0 -0
  142. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/comparison.py +0 -0
  143. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/config_schema.py +0 -0
  144. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/context_store.py +0 -0
  145. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/delegation.py +0 -0
  146. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/docker_interpreter.py +0 -0
  147. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/events.py +0 -0
  148. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/__init__.py +0 -0
  149. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/adk_rlm_adapter.py +0 -0
  150. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/base.py +0 -0
  151. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/deepagents_adapter.py +0 -0
  152. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/dspy_rlm_adapter.py +0 -0
  153. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/google_adk_adapter.py +0 -0
  154. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/pydantic_ai_adapter.py +0 -0
  155. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/registry.py +0 -0
  156. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/leaderboard.py +0 -0
  157. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/memory_compaction.py +0 -0
  158. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/mock_interpreter.py +0 -0
  159. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/monty_interpreter.py +0 -0
  160. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/observability.py +0 -0
  161. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/observability_sinks.py +0 -0
  162. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/policies/__init__.py +0 -0
  163. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/policies/action_policies.py +0 -0
  164. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/policies/base.py +0 -0
  165. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/policies/compaction_policies.py +0 -0
  166. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/policies/registry.py +0 -0
  167. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/policies/reward_policies.py +0 -0
  168. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/policies/termination_policies.py +0 -0
  169. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/pure_rlm_environment.py +0 -0
  170. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/repl_types.py +0 -0
  171. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/__init__.py +0 -0
  172. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/theme.py +0 -0
  173. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/widgets/__init__.py +0 -0
  174. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/widgets/animated.py +0 -0
  175. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/widgets/panels.py +0 -0
  176. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/task_signature.py +0 -0
  177. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/termination.py +0 -0
  178. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/trajectory.py +0 -0
  179. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/__init__.py +0 -0
  180. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/__init__.py +0 -0
  181. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/apple_container_runtime.py +0 -0
  182. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/base.py +0 -0
  183. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/__init__.py +0 -0
  184. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/daytona_runtime.py +0 -0
  185. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/e2b_runtime.py +0 -0
  186. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/modal_runtime.py +0 -0
  187. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/command_runtime.py +0 -0
  188. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/docker_runtime.py +0 -0
  189. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/local_runtime.py +0 -0
  190. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/monty_runtime.py +0 -0
  191. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/registry.py +0 -0
  192. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/superbox.py +0 -0
  193. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/session/__init__.py +0 -0
  194. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/session/state_manager.py +0 -0
  195. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/.env.example +0 -0
  196. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/adapters.py +0 -0
  197. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/async_streaming.py +0 -0
  198. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/complete_programs.py +0 -0
  199. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/dspy_config_example.yaml +0 -0
  200. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/evaluation.py +0 -0
  201. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/industry_templates.py +0 -0
  202. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/optimizers.py +0 -0
  203. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/retrievers.py +0 -0
  204. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/rlm_benchmarks_example.yaml +0 -0
  205. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/tests/__init__.py +0 -0
  206. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/tests/rlm/__init__.py +0 -0
  207. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/tests/rlm/test_phase2.py +0 -0
  208. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/tests/rlm/test_pure_rlm.py +0 -0
  209. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/traces/__init__.py +0 -0
  210. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/traces/index.py +0 -0
  211. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/traces/models.py +0 -0
  212. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/__init__.py +0 -0
  213. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/agent_collab_view.py +0 -0
  214. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/animations.py +0 -0
  215. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/conversation.py +0 -0
  216. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/design_system.py +0 -0
  217. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/diff_viewer.py +0 -0
  218. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/notifications.py +0 -0
  219. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/persistent_shell.py +0 -0
  220. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/prompt_widget.py +0 -0
  221. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/prompts.py +0 -0
  222. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/pty_terminal.py +0 -0
  223. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/resizable_divider.py +0 -0
  224. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/thinking_display.py +0 -0
  225. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/tui_utils.py +0 -0
  226. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/welcome.py +0 -0
  227. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/__init__.py +0 -0
  228. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/anti_patterns.py +0 -0
  229. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/auto_fixer.py +0 -0
  230. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/best_practices.py +0 -0
  231. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/code_validator.py +0 -0
  232. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/config_validator.py +0 -0
  233. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/exceptions.py +0 -0
  234. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/input_validator.py +0 -0
  235. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/learning_integration.py +0 -0
  236. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/models.py +0 -0
  237. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/module_validator.py +0 -0
  238. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/predictor_validator.py +0 -0
  239. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/quality_scorer.py +0 -0
  240. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/report_generator.py +0 -0
  241. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/security.py +0 -0
  242. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/security_validator.py +0 -0
  243. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/signature_validator.py +0 -0
  244. {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/validator.py +0 -0
  245. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/__init__.py +0 -0
  246. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/conftest.py +0 -0
  247. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/fixtures/rlm_ci_baseline_generic_smoke.json +0 -0
  248. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_adk_rlm_adapter.py +0 -0
  249. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_code_interpreter.py +0 -0
  250. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_deepagents_adapter.py +0 -0
  251. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_dspy_rlm_adapter.py +0 -0
  252. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_extract_fallback.py +0 -0
  253. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_framework_registry_coverage.py +0 -0
  254. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_google_adk_adapter.py +0 -0
  255. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_leaderboard.py +0 -0
  256. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_mock_interpreter.py +0 -0
  257. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_monty_interpreter.py +0 -0
  258. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_observability_sinks.py +0 -0
  259. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_p0_features.py +0 -0
  260. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_phase3.py +0 -0
  261. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_phase4.py +0 -0
  262. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_pure_rlm_runtime_modes.py +0 -0
  263. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_pydantic_ai_adapter.py +0 -0
  264. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_repl_history.py +0 -0
  265. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_security_hardening.py +0 -0
  266. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_submit.py +0 -0
  267. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_task_signature.py +0 -0
  268. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_user_tools.py +0 -0
  269. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_anti_patterns.py +0 -0
  270. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_auto_fixer.py +0 -0
  271. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_cache.py +0 -0
  272. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_execution_engine.py +0 -0
  273. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_export_import.py +0 -0
  274. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_harness_registry.py +0 -0
  275. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_harness_runner.py +0 -0
  276. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_init_command.py +0 -0
  277. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_integration.py +0 -0
  278. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_learning_integration.py +0 -0
  279. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_mcp_utils.py +0 -0
  280. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_module_validator.py +0 -0
  281. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_optimization_workflow.py +0 -0
  282. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_persistent_shell.py +0 -0
  283. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_predictor_validator.py +0 -0
  284. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_project_scanner.py +0 -0
  285. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_prompt_widget.py +0 -0
  286. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_property_validators.py +0 -0
  287. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_provider_discovery.py +0 -0
  288. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_provider_registry.py +0 -0
  289. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_quality_scorer.py +0 -0
  290. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_report_generator.py +0 -0
  291. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_retry.py +0 -0
  292. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_rlm_config.py +0 -0
  293. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_rlm_dspy_environment.py +0 -0
  294. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_rlm_observability.py +0 -0
  295. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_sandbox_runtimes.py +0 -0
  296. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_security_validator.py +0 -0
  297. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_session_management.py +0 -0
  298. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_signature_validator.py +0 -0
  299. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_slash_harness_command.py +0 -0
  300. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_slash_rlm_command.py +0 -0
  301. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_slash_sandbox_command.py +0 -0
  302. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_streaming.py +0 -0
  303. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_superbox.py +0 -0
  304. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_tui_utils.py +0 -0
  305. {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_validation.py +0 -0
@@ -153,6 +153,7 @@ cython_debug/
153
153
 
154
154
  # Project specific
155
155
  dspy_config.yaml
156
+ rlm_config.yaml
156
157
  *.log
157
158
 
158
159
  # Internal workspace data directories (all data in CWD)
@@ -5,6 +5,24 @@ All notable changes to this project are documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.1.9] - 2026-06-26
9
+
10
+ ### Added
11
+ - Pure RLM runner context initialization from explicit workspace file references in the task, with compact repository snapshot fallback.
12
+ - Context-load events for Pure RLM runs, including loaded file names and total context characters.
13
+ - Runner JSONL replay coverage for action code, observations, success state, token counts, and cumulative reward.
14
+
15
+ ### Changed
16
+ - TUI trajectory and replay views now show Pure RLM signals including REPL code, stdout/stderr previews, `llm_query` counts, executed code blocks, finalization status, and REPL variables.
17
+ - Run visualization now includes richer Pure RLM previews for completed runs.
18
+
19
+ ## [0.1.8] - 2026-05-01
20
+
21
+ ### Added
22
+ - AHE-style layered trace evidence corpus export from `TraceStore`.
23
+ - New `trace_analysis` action `export_evidence_corpus` for writing `overview.md`, per-trace detail reports, `index.json`, and optional processed raw JSONL spans.
24
+ - Evidence corpus tests covering direct store export and environment action export.
25
+
8
26
  ## [0.1.7] - 2026-04-30
9
27
 
10
28
  ### Added
@@ -69,4 +87,6 @@ Initial public release of **RLM Code**.
69
87
 
70
88
  [0.1.5]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.5
71
89
  [0.1.6]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.6
90
+ [0.1.9]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.9
91
+ [0.1.8]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.8
72
92
  [0.1.7]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.7
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlm-code
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems
5
5
  Project-URL: Homepage, https://github.com/SuperagenticAI/rlm-code
6
6
  Project-URL: Documentation, https://superagenticai.github.io/rlm-code/
@@ -118,20 +118,20 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
118
118
 
119
119
  RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
120
120
 
121
- ## Release v0.1.7
121
+ ## Release v0.1.9
122
122
 
123
- This release adds HALO-style trace analysis as a new RLM environment.
123
+ This release improves Pure RLM repository runs and makes completed trajectories more inspectable from the TUI and replay views.
124
124
 
125
- - New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
126
- - Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
127
- - Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
128
- - `/rlm` help/docs updated for `env=trace_analysis`
129
- - Dedicated trace analysis docs under the Core Engine section
125
+ - Pure RLM runs now initialize `context` from explicit workspace files mentioned in the task, with a compact repository snapshot fallback
126
+ - Runner events now record context-load metadata for Pure RLM runs
127
+ - Legacy runner JSONL step events replay with action code, observations, success, token counts, and cumulative reward
128
+ - Run visualization now includes REPL code previews, stdout/stderr previews, `llm_query` counts, executed code blocks, finalization status, and REPL variables
129
+ - TUI trajectory and replay views now surface Pure RLM signals directly for completed runs
130
130
 
131
131
  Example:
132
132
 
133
133
  ```text
134
- /rlm run "Find systemic harness failures trace=./traces.jsonl" env=trace_analysis steps=6
134
+ /rlm run "Validate pure_rlm_environment.py and cite context, REPL, llm_query, and FINAL evidence" env=pure_rlm steps=6
135
135
  ```
136
136
 
137
137
  ## Documentation
@@ -25,20 +25,20 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
25
25
 
26
26
  RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
27
27
 
28
- ## Release v0.1.7
28
+ ## Release v0.1.9
29
29
 
30
- This release adds HALO-style trace analysis as a new RLM environment.
30
+ This release improves Pure RLM repository runs and makes completed trajectories more inspectable from the TUI and replay views.
31
31
 
32
- - New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
33
- - Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
34
- - Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
35
- - `/rlm` help/docs updated for `env=trace_analysis`
36
- - Dedicated trace analysis docs under the Core Engine section
32
+ - Pure RLM runs now initialize `context` from explicit workspace files mentioned in the task, with a compact repository snapshot fallback
33
+ - Runner events now record context-load metadata for Pure RLM runs
34
+ - Legacy runner JSONL step events replay with action code, observations, success, token counts, and cumulative reward
35
+ - Run visualization now includes REPL code previews, stdout/stderr previews, `llm_query` counts, executed code blocks, finalization status, and REPL variables
36
+ - TUI trajectory and replay views now surface Pure RLM signals directly for completed runs
37
37
 
38
38
  Example:
39
39
 
40
40
  ```text
41
- /rlm run "Find systemic harness failures trace=./traces.jsonl" env=trace_analysis steps=6
41
+ /rlm run "Validate pure_rlm_environment.py and cite context, REPL, llm_query, and FINAL evidence" env=pure_rlm steps=6
42
42
  ```
43
43
 
44
44
  ## Documentation
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "rlm-code"
7
- version = "0.1.7"
7
+ version = "0.1.9"
8
8
  description = "RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems"
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -5,5 +5,5 @@ This package provides tools for creating, managing, and optimizing DSPy componen
5
5
  through natural language interactions.
6
6
  """
7
7
 
8
- __version__ = "0.1.7"
8
+ __version__ = "0.1.9"
9
9
  __author__ = "Super Agentic AI"
@@ -17,7 +17,7 @@ from .exceptions import (
17
17
  )
18
18
  from .session_wrapper import MCPSessionWrapper
19
19
 
20
- __version__ = "0.1.7"
20
+ __version__ = "0.1.9"
21
21
 
22
22
  __all__ = [
23
23
  "MCPClientManager",
@@ -306,8 +306,10 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
306
306
  "Return ONLY valid JSON object with keys:\n"
307
307
  "{"
308
308
  '"action": "set_trace_path" | "get_dataset_overview" | "query_traces" | '
309
- '"count_traces" | "view_trace" | "search_trace" | "view_spans" | "final", '
309
+ '"count_traces" | "view_trace" | "search_trace" | "view_spans" | '
310
+ '"export_evidence_corpus" | "final", '
310
311
  '"trace_path": "<path to JSONL traces>", '
312
+ '"output_dir": "<directory for exported evidence corpus>", '
311
313
  '"filters": {"has_errors": true, "model_names": ["..."], "service_names": ["..."], '
312
314
  '"agent_names": ["..."], "project_id": "..."}, '
313
315
  '"trace_id": "<trace id>", '
@@ -324,6 +326,7 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
324
326
  "- Always begin analysis with get_dataset_overview.\n"
325
327
  "- Use query_traces to choose real trace ids; never invent trace ids.\n"
326
328
  "- For large traces, prefer search_trace followed by view_spans.\n"
329
+ "- Use export_evidence_corpus when the caller needs files for MetaHarness or another coding agent.\n"
327
330
  "- Identify systemic harness failures, not one-off anomalies.\n"
328
331
  "- Output JSON only."
329
332
  )
@@ -448,6 +451,21 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
448
451
  reward=0.7,
449
452
  memory_note=f"Viewed selected spans for trace {trace_id}.",
450
453
  )
454
+ if action_name == "export_evidence_corpus":
455
+ output_dir = self._required_str(action, "output_dir")
456
+ resolved_output = Path(output_dir).expanduser()
457
+ if not resolved_output.is_absolute():
458
+ resolved_output = self.workdir / resolved_output
459
+ return self._ok(
460
+ observation=store.export_evidence_corpus(
461
+ resolved_output,
462
+ filters,
463
+ limit=self._int_arg(action, "limit", 100, minimum=1, maximum=1000),
464
+ include_raw=self._bool_arg(action, "include_raw", True),
465
+ ),
466
+ reward=0.75,
467
+ memory_note="Exported layered trace evidence corpus.",
468
+ )
451
469
  except Exception as exc:
452
470
  return EnvironmentActionResult(
453
471
  observation={"success": False, "error": f"{type(exc).__name__}: {exc}"},
@@ -530,6 +548,19 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
530
548
  parsed = default
531
549
  return max(minimum, min(maximum, parsed))
532
550
 
551
+ @staticmethod
552
+ def _bool_arg(action: dict[str, Any], key: str, default: bool) -> bool:
553
+ value = action.get(key, default)
554
+ if isinstance(value, bool):
555
+ return value
556
+ if isinstance(value, str):
557
+ normalized = value.strip().lower()
558
+ if normalized in {"1", "true", "yes", "on"}:
559
+ return True
560
+ if normalized in {"0", "false", "no", "off"}:
561
+ return False
562
+ return default
563
+
533
564
 
534
565
  class DSPyCodingRLMEnvironment(GenericRLMEnvironment):
535
566
  """DSPy-focused environment with file edit + tests + DSPy-aware scoring."""
@@ -9,6 +9,7 @@ from __future__ import annotations
9
9
 
10
10
  import hashlib
11
11
  import json
12
+ import re
12
13
  import threading
13
14
  import time
14
15
  from dataclasses import asdict, dataclass, is_dataclass
@@ -29,7 +30,7 @@ from .benchmark_manager import (
29
30
  )
30
31
  from .benchmarks import RLMBenchmarkCase, load_benchmark_packs
31
32
  from .chat_session import ChatSessionMixin
32
- from .context_store import LazyFileContext
33
+ from .context_store import ContextRef, LazyFileContext
33
34
  from .delegation import DelegationMixin
34
35
  from .environments import (
35
36
  DSPyCodingRLMEnvironment,
@@ -467,6 +468,93 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
467
468
  allow_unsafe_exec=(selected_backend == "exec" and self._pure_rlm_allow_unsafe_exec),
468
469
  )
469
470
 
471
+ def _extract_task_file_refs(self, task: str, limit: int = 12) -> list[ContextRef]:
472
+ """Find explicit workspace file references mentioned in a task string."""
473
+ candidates = re.findall(
474
+ r"(?<![\w.-])(?:[\w.-]+/)*[\w.-]+\.(?:py|md|toml|yaml|yml|json|txt|js|jsx|ts|tsx)",
475
+ task,
476
+ )
477
+ seen: set[str] = set()
478
+ refs: list[ContextRef] = []
479
+ for candidate in candidates:
480
+ normalized = candidate.strip().strip("`'\".,:;)")
481
+ if not normalized or normalized in seen:
482
+ continue
483
+ seen.add(normalized)
484
+ refs.append(ContextRef(path=normalized))
485
+ if len(refs) >= limit:
486
+ break
487
+ return refs
488
+
489
+ def _build_pure_rlm_initial_context(self, task: str) -> dict[str, str]:
490
+ """
491
+ Build a small real-code context for Pure RLM runs.
492
+
493
+ The direct PureRLMEnvironment API expects context to be initialized
494
+ explicitly. Runner/TUI users expect `/rlm run ... env=pure_rlm` to
495
+ start with useful workspace data, so we seed `context` with explicit
496
+ files named in the task, falling back to a compact repository snapshot.
497
+ """
498
+ refs = self._extract_task_file_refs(task)
499
+ if not refs:
500
+ refs = self.context_store.discover(limit=12)
501
+
502
+ context: dict[str, str] = {}
503
+ for ref in refs:
504
+ snippet = self.context_store.read(ref, max_chars=12000)
505
+ if snippet:
506
+ context[ref.path] = snippet
507
+
508
+ if context:
509
+ return context
510
+
511
+ discovered = self.context_store.discover(limit=80)
512
+ tree = "\n".join(ref.path for ref in discovered)
513
+ return {
514
+ "_workspace": (
515
+ f"Workspace: {self.workdir}\n"
516
+ "No explicit file snippets were loaded. Available files:\n"
517
+ f"{tree}"
518
+ ).strip()
519
+ }
520
+
521
+ def _initialize_pure_rlm_run_context(
522
+ self,
523
+ env: RLMEnvironment,
524
+ task: str,
525
+ *,
526
+ run_id: str,
527
+ run_path: Path,
528
+ ) -> int:
529
+ """Initialize `context` for Pure RLM runs and persist a context event."""
530
+ if env.name != "pure_rlm" or not hasattr(env, "initialize_context"):
531
+ return 0
532
+
533
+ context = self._build_pure_rlm_initial_context(task)
534
+ env.initialize_context(
535
+ context,
536
+ description="Workspace files selected for this Pure RLM run",
537
+ additional_vars={"query": task},
538
+ )
539
+ context_event = {
540
+ "type": "context",
541
+ "run_id": run_id,
542
+ "environment": env.name,
543
+ "timestamp": self._utc_now(),
544
+ "context_files": list(context.keys()),
545
+ "context_chars": sum(len(value) for value in context.values()),
546
+ }
547
+ self._append_event(run_path, context_event)
548
+ self._emit_runtime_event(
549
+ "context_load",
550
+ {
551
+ "run_id": run_id,
552
+ "files": len(context),
553
+ "chars": context_event["context_chars"],
554
+ },
555
+ )
556
+ return len(context)
557
+
470
558
  def run_task(
471
559
  self,
472
560
  task: str,
@@ -596,6 +684,12 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
596
684
  final_response = ""
597
685
  cancelled = False
598
686
  trajectory: list[dict[str, Any]] = []
687
+ context_files = self._initialize_pure_rlm_run_context(
688
+ env,
689
+ cleaned_task,
690
+ run_id=run_id,
691
+ run_path=run_path,
692
+ )
599
693
  usage_start = self._usage_snapshot()
600
694
  self.observability.on_run_start(
601
695
  run_id,
@@ -616,6 +710,7 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
616
710
  "parent_run_id": _parent_run_id,
617
711
  "pure_rlm_backend": self._pure_rlm_backend if env.name == "pure_rlm" else None,
618
712
  "pure_rlm_strict": strict_pure_mode if env.name == "pure_rlm" else None,
713
+ "context_files": context_files if env.name == "pure_rlm" else None,
619
714
  },
620
715
  )
621
716
  self._emit_runtime_event(
@@ -627,6 +722,7 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
627
722
  "framework": native_framework,
628
723
  "depth": _depth,
629
724
  "parent_run_id": _parent_run_id,
725
+ "context_files": context_files if env.name == "pure_rlm" else None,
630
726
  },
631
727
  )
632
728
 
@@ -1035,14 +1035,30 @@ def _convert_legacy_step(data: dict[str, Any]) -> SessionEvent:
1035
1035
  step_type = data.get("type", "")
1036
1036
 
1037
1037
  if step_type == "step":
1038
+ observation = data.get("observation", {})
1039
+ observation_dict = observation if isinstance(observation, dict) else {}
1040
+ action = data.get("action", {})
1041
+ action_dict = action if isinstance(action, dict) else {}
1042
+ success = observation_dict.get("success")
1043
+ if success is None:
1044
+ success = not bool(observation_dict.get("error") or observation_dict.get("stderr"))
1045
+ usage = data.get("usage", {})
1046
+ usage_dict = usage if isinstance(usage, dict) else {}
1038
1047
  return SessionEvent(
1039
1048
  event_type=SessionEventType.STEP_END,
1040
1049
  timestamp=data.get("timestamp", _utc_now()),
1041
- step=data.get("step", 0),
1050
+ step=int(data.get("step", 0) or 0),
1042
1051
  data={
1043
- "action": data.get("action", {}),
1044
- "observation": data.get("observation", {}),
1052
+ "step": int(data.get("step", 0) or 0),
1053
+ "timestamp": data.get("timestamp", _utc_now()),
1054
+ "action": action_dict,
1055
+ "observation": observation_dict,
1045
1056
  "reward": data.get("reward", 0.0),
1057
+ "success": bool(success),
1058
+ "tokens_used": int(
1059
+ usage_dict.get("prompt_tokens", 0) or 0
1060
+ )
1061
+ + int(usage_dict.get("completion_tokens", 0) or 0),
1046
1062
  },
1047
1063
  run_id=data.get("run_id", ""),
1048
1064
  depth=data.get("depth", 0),
@@ -1125,12 +1141,18 @@ def _build_snapshot_from_events(
1125
1141
 
1126
1142
  elif event.event_type == SessionEventType.STEP_END:
1127
1143
  # Build StepState from accumulated data
1144
+ if "step" not in current_step_data:
1145
+ current_step_data = {
1146
+ "step": int(event.data.get("step", event.step) or 0),
1147
+ "timestamp": str(event.data.get("timestamp", event.timestamp) or ""),
1148
+ }
1128
1149
  if "step" in current_step_data:
1129
1150
  # Merge any additional data from STEP_END event
1130
1151
  if "action" in event.data:
1131
1152
  action = event.data["action"]
1132
1153
  current_step_data.setdefault("action_type", action.get("action", ""))
1133
1154
  current_step_data.setdefault("action_code", action.get("code", ""))
1155
+ current_step_data.setdefault("action_rationale", action.get("reasoning", ""))
1134
1156
  current_step_data.setdefault("raw_action", action)
1135
1157
  if "observation" in event.data:
1136
1158
  obs = event.data["observation"]
@@ -1138,12 +1160,16 @@ def _build_snapshot_from_events(
1138
1160
  current_step_data.setdefault("error", obs.get("error", obs.get("stderr", "")))
1139
1161
  current_step_data.setdefault("raw_observation", obs)
1140
1162
  if "reward" in event.data:
1163
+ reward = float(event.data.get("reward", 0.0) or 0.0)
1164
+ cumulative = event.data.get("cumulative_reward")
1165
+ if cumulative is None:
1166
+ cumulative = total_reward + reward
1141
1167
  current_step_data.setdefault("reward", event.data["reward"])
1142
- current_step_data.setdefault(
1143
- "cumulative_reward", event.data.get("cumulative_reward", 0.0)
1144
- )
1168
+ current_step_data.setdefault("cumulative_reward", cumulative)
1145
1169
  if "success" in event.data:
1146
1170
  current_step_data.setdefault("success", event.data["success"])
1171
+ if "tokens_used" in event.data:
1172
+ current_step_data.setdefault("tokens_used", event.data["tokens_used"])
1147
1173
 
1148
1174
  step_state = StepState(
1149
1175
  step=current_step_data.get("step", 0),
@@ -1163,6 +1189,8 @@ def _build_snapshot_from_events(
1163
1189
  raw_observation=current_step_data.get("raw_observation", {}),
1164
1190
  )
1165
1191
  steps.append(step_state)
1192
+ total_reward = float(step_state.cumulative_reward)
1193
+ total_tokens += int(step_state.tokens_used or 0)
1166
1194
  current_step_data = {}
1167
1195
 
1168
1196
  elif event.event_type == SessionEventType.MEMORY_UPDATE:
@@ -62,6 +62,16 @@ def build_run_visualization(
62
62
  "success": observation_dict.get("success") if "success" in observation_dict else None,
63
63
  "path": str(observation_dict.get("path") or ""),
64
64
  "children_executed": int(observation_dict.get("children_executed") or 0),
65
+ "planner_preview": _clip_text(str(step.get("planner_raw") or ""), limit=260),
66
+ "code_preview": _clip_text(_action_code(step), limit=260),
67
+ "stdout_preview": _clip_text(str(observation_dict.get("stdout") or ""), limit=260),
68
+ "stderr_preview": _clip_text(str(observation_dict.get("stderr") or ""), limit=180),
69
+ "llm_calls_made": int(observation_dict.get("llm_calls_made") or 0),
70
+ "code_blocks_executed": int(observation_dict.get("code_blocks_executed") or 0),
71
+ "final_detected": bool(observation_dict.get("final_detected", False)),
72
+ "repl_variables": list(observation_dict.get("repl_variables") or [])[:20]
73
+ if isinstance(observation_dict.get("repl_variables"), list)
74
+ else [],
65
75
  }
66
76
  error = _extract_error(step)
67
77
  if error:
@@ -190,6 +200,19 @@ def _action_name(step: dict[str, Any]) -> str:
190
200
  return "unknown"
191
201
 
192
202
 
203
+ def _action_code(step: dict[str, Any]) -> str:
204
+ action = step.get("action")
205
+ if not isinstance(action, dict):
206
+ return ""
207
+ code = action.get("code")
208
+ if isinstance(code, str) and code.strip():
209
+ return code
210
+ blocks = action.get("_code_blocks")
211
+ if isinstance(blocks, list):
212
+ return "\n\n".join(str(block) for block in blocks if str(block).strip())
213
+ return ""
214
+
215
+
193
216
  def _extract_error(step: dict[str, Any]) -> str:
194
217
  observation = step.get("observation")
195
218
  if not isinstance(observation, dict):