rlm-code 0.1.6__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. {rlm_code-0.1.6 → rlm_code-0.1.8}/CHANGELOG.md +22 -0
  2. {rlm_code-0.1.6 → rlm_code-0.1.8}/PKG-INFO +66 -9
  3. {rlm_code-0.1.6 → rlm_code-0.1.8}/README.md +65 -8
  4. {rlm_code-0.1.6 → rlm_code-0.1.8}/pyproject.toml +1 -1
  5. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/__init__.py +1 -1
  6. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/commands/slash_commands.py +8 -8
  7. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/__init__.py +1 -1
  8. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/action_planner.py +3 -0
  9. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/environments.py +276 -0
  10. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/runner.py +13 -0
  11. rlm_code-0.1.8/rlm_code/traces/__init__.py +6 -0
  12. rlm_code-0.1.8/rlm_code/traces/index.py +170 -0
  13. rlm_code-0.1.8/rlm_code/traces/models.py +103 -0
  14. rlm_code-0.1.8/rlm_code/traces/store.py +447 -0
  15. rlm_code-0.1.8/tests/test_trace_analysis.py +144 -0
  16. {rlm_code-0.1.6 → rlm_code-0.1.8}/.gitignore +0 -0
  17. {rlm_code-0.1.6 → rlm_code-0.1.8}/LICENSE +0 -0
  18. {rlm_code-0.1.6 → rlm_code-0.1.8}/NOTICE +0 -0
  19. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/__init__.py +0 -0
  20. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/agent.py +0 -0
  21. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/agents/__init__.py +0 -0
  22. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/agents/rlm_agent.py +0 -0
  23. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/callbacks/__init__.py +0 -0
  24. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/callbacks/code_execution.py +0 -0
  25. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/cli.py +0 -0
  26. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/code_executor.py +0 -0
  27. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/events.py +0 -0
  28. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/files/__init__.py +0 -0
  29. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/files/base.py +0 -0
  30. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/files/lazy.py +0 -0
  31. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/files/loader.py +0 -0
  32. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/files/parsers/__init__.py +0 -0
  33. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/files/parsers/base.py +0 -0
  34. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/files/parsers/pdf.py +0 -0
  35. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/files/parsers/text.py +0 -0
  36. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/files/sources/__init__.py +0 -0
  37. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/files/sources/base.py +0 -0
  38. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/files/sources/gcs.py +0 -0
  39. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/files/sources/local.py +0 -0
  40. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/llm.py +0 -0
  41. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/logging/__init__.py +0 -0
  42. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/logging/rlm_logger.py +0 -0
  43. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/logging/verbose.py +0 -0
  44. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/main.py +0 -0
  45. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/prompts.py +0 -0
  46. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/repl/__init__.py +0 -0
  47. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/repl/local_repl.py +0 -0
  48. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/repl/safe_builtins.py +0 -0
  49. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/templates/index.html +0 -0
  50. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/tools/__init__.py +0 -0
  51. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/types.py +0 -0
  52. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/usage.py +0 -0
  53. {rlm_code-0.1.6 → rlm_code-0.1.8}/adk_rlm/web.py +0 -0
  54. {rlm_code-0.1.6 → rlm_code-0.1.8}/eval/packs/README.md +0 -0
  55. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/__main__.py +0 -0
  56. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/commands/__init__.py +0 -0
  57. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/commands/config_command.py +0 -0
  58. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/commands/create_command.py +0 -0
  59. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/commands/demo_command.py +0 -0
  60. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/commands/export_command.py +0 -0
  61. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/commands/init_command.py +0 -0
  62. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/commands/interactive_command.py +0 -0
  63. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/commands/mcp_command.py +0 -0
  64. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/commands/models_command.py +0 -0
  65. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/commands/nl_command_router.py +0 -0
  66. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/commands/optimize_command.py +0 -0
  67. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/commands/run_command.py +0 -0
  68. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/core/__init__.py +0 -0
  69. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/core/config.py +0 -0
  70. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/core/debug_logger.py +0 -0
  71. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/core/directory_utils.py +0 -0
  72. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/core/exceptions.py +0 -0
  73. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/core/logging.py +0 -0
  74. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/core/venv_utils.py +0 -0
  75. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/core/version_checker.py +0 -0
  76. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/examples/__init__.py +0 -0
  77. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/examples/phase2_demo.py +0 -0
  78. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/examples/phase3_demo.py +0 -0
  79. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/examples/phase4_demo.py +0 -0
  80. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/examples/pure_rlm_demo.py +0 -0
  81. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/execution/__init__.py +0 -0
  82. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/execution/engine.py +0 -0
  83. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/execution/sandbox.py +0 -0
  84. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/export/__init__.py +0 -0
  85. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/export/handler.py +0 -0
  86. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/export/package_builder.py +0 -0
  87. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/generators/evaluation_generator.py +0 -0
  88. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/generators/gepa_generator.py +0 -0
  89. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/harness/__init__.py +0 -0
  90. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/harness/registry.py +0 -0
  91. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/harness/runner.py +0 -0
  92. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/main.py +0 -0
  93. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/client_manager.py +0 -0
  94. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/config.py +0 -0
  95. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/exceptions.py +0 -0
  96. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/retry.py +0 -0
  97. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/server/__init__.py +0 -0
  98. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/server/rlm_server.py +0 -0
  99. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/server/tools.py +0 -0
  100. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/session_wrapper.py +0 -0
  101. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/transports/__init__.py +0 -0
  102. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/transports/factory.py +0 -0
  103. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/transports/sse_transport.py +0 -0
  104. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/transports/stdio_transport.py +0 -0
  105. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/transports/websocket_transport.py +0 -0
  106. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/mcp/utils.py +0 -0
  107. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/models/__init__.py +0 -0
  108. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/models/cache.py +0 -0
  109. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/models/code_generator.py +0 -0
  110. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/models/dspy_reference_loader.py +0 -0
  111. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/models/llm_connector.py +0 -0
  112. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/models/model_manager.py +0 -0
  113. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/models/providers/__init__.py +0 -0
  114. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/models/providers/acp_discovery.py +0 -0
  115. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/models/providers/local_discovery.py +0 -0
  116. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/models/providers/model_catalog.py +0 -0
  117. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/models/providers/registry.py +0 -0
  118. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/models/streaming.py +0 -0
  119. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/models/task_collector.py +0 -0
  120. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/optimization/__init__.py +0 -0
  121. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/optimization/data_collector.py +0 -0
  122. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/optimization/executor.py +0 -0
  123. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/optimization/workflow_manager.py +0 -0
  124. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/project/__init__.py +0 -0
  125. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/project/context_manager.py +0 -0
  126. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/project/dspy_md_generator.py +0 -0
  127. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/project/initializer.py +0 -0
  128. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/project/scanner.py +0 -0
  129. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/py.typed +0 -0
  130. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/__init__.py +0 -0
  131. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/approval/__init__.py +0 -0
  132. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/approval/audit.py +0 -0
  133. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/approval/gate.py +0 -0
  134. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/approval/handlers.py +0 -0
  135. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/approval/policy.py +0 -0
  136. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/benchmark_manager.py +0 -0
  137. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/benchmarks.py +0 -0
  138. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/chat_session.py +0 -0
  139. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/code_interpreter.py +0 -0
  140. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/comparison.py +0 -0
  141. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/config_schema.py +0 -0
  142. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/context_store.py +0 -0
  143. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/delegation.py +0 -0
  144. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/docker_interpreter.py +0 -0
  145. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/events.py +0 -0
  146. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/__init__.py +0 -0
  147. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/adk_rlm_adapter.py +0 -0
  148. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/base.py +0 -0
  149. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/deepagents_adapter.py +0 -0
  150. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/dspy_rlm_adapter.py +0 -0
  151. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/google_adk_adapter.py +0 -0
  152. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/pydantic_ai_adapter.py +0 -0
  153. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/registry.py +0 -0
  154. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/leaderboard.py +0 -0
  155. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/memory_compaction.py +0 -0
  156. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/mock_interpreter.py +0 -0
  157. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/monty_interpreter.py +0 -0
  158. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/observability.py +0 -0
  159. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/observability_sinks.py +0 -0
  160. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/policies/__init__.py +0 -0
  161. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/policies/action_policies.py +0 -0
  162. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/policies/base.py +0 -0
  163. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/policies/compaction_policies.py +0 -0
  164. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/policies/registry.py +0 -0
  165. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/policies/reward_policies.py +0 -0
  166. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/policies/termination_policies.py +0 -0
  167. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/pure_rlm_environment.py +0 -0
  168. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/repl_types.py +0 -0
  169. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/__init__.py +0 -0
  170. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/theme.py +0 -0
  171. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/widgets/__init__.py +0 -0
  172. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/widgets/animated.py +0 -0
  173. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/widgets/panels.py +0 -0
  174. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/session_replay.py +0 -0
  175. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/task_signature.py +0 -0
  176. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/termination.py +0 -0
  177. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/trajectory.py +0 -0
  178. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/rlm/visualizer.py +0 -0
  179. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/sandbox/__init__.py +0 -0
  180. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/__init__.py +0 -0
  181. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/apple_container_runtime.py +0 -0
  182. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/base.py +0 -0
  183. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/cloud/__init__.py +0 -0
  184. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/cloud/daytona_runtime.py +0 -0
  185. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/cloud/e2b_runtime.py +0 -0
  186. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/cloud/modal_runtime.py +0 -0
  187. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/command_runtime.py +0 -0
  188. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/docker_runtime.py +0 -0
  189. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/local_runtime.py +0 -0
  190. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/monty_runtime.py +0 -0
  191. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/registry.py +0 -0
  192. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/sandbox/superbox.py +0 -0
  193. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/session/__init__.py +0 -0
  194. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/session/state_manager.py +0 -0
  195. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/templates/.env.example +0 -0
  196. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/templates/adapters.py +0 -0
  197. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/templates/async_streaming.py +0 -0
  198. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/templates/complete_programs.py +0 -0
  199. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/templates/dspy_config_example.yaml +0 -0
  200. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/templates/evaluation.py +0 -0
  201. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/templates/industry_templates.py +0 -0
  202. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/templates/optimizers.py +0 -0
  203. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/templates/retrievers.py +0 -0
  204. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/templates/rlm_benchmarks_example.yaml +0 -0
  205. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/tests/__init__.py +0 -0
  206. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/tests/rlm/__init__.py +0 -0
  207. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/tests/rlm/test_phase2.py +0 -0
  208. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/tests/rlm/test_pure_rlm.py +0 -0
  209. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/__init__.py +0 -0
  210. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/agent_collab_view.py +0 -0
  211. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/animations.py +0 -0
  212. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/conversation.py +0 -0
  213. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/design_system.py +0 -0
  214. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/diff_viewer.py +0 -0
  215. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/notifications.py +0 -0
  216. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/persistent_shell.py +0 -0
  217. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/prompt_widget.py +0 -0
  218. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/prompts.py +0 -0
  219. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/pty_terminal.py +0 -0
  220. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/resizable_divider.py +0 -0
  221. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/thinking_display.py +0 -0
  222. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/tui_app.py +0 -0
  223. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/tui_utils.py +0 -0
  224. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/ui/welcome.py +0 -0
  225. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/__init__.py +0 -0
  226. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/anti_patterns.py +0 -0
  227. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/auto_fixer.py +0 -0
  228. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/best_practices.py +0 -0
  229. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/code_validator.py +0 -0
  230. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/config_validator.py +0 -0
  231. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/exceptions.py +0 -0
  232. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/input_validator.py +0 -0
  233. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/learning_integration.py +0 -0
  234. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/models.py +0 -0
  235. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/module_validator.py +0 -0
  236. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/predictor_validator.py +0 -0
  237. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/quality_scorer.py +0 -0
  238. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/report_generator.py +0 -0
  239. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/security.py +0 -0
  240. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/security_validator.py +0 -0
  241. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/signature_validator.py +0 -0
  242. {rlm_code-0.1.6 → rlm_code-0.1.8}/rlm_code/validation/validator.py +0 -0
  243. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/__init__.py +0 -0
  244. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/conftest.py +0 -0
  245. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/fixtures/rlm_ci_baseline_generic_smoke.json +0 -0
  246. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_adk_rlm_adapter.py +0 -0
  247. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_code_interpreter.py +0 -0
  248. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_deepagents_adapter.py +0 -0
  249. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_dspy_rlm_adapter.py +0 -0
  250. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_extract_fallback.py +0 -0
  251. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_framework_registry_coverage.py +0 -0
  252. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_google_adk_adapter.py +0 -0
  253. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_leaderboard.py +0 -0
  254. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_mock_interpreter.py +0 -0
  255. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_monty_interpreter.py +0 -0
  256. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_observability_sinks.py +0 -0
  257. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_p0_features.py +0 -0
  258. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_phase3.py +0 -0
  259. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_phase4.py +0 -0
  260. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_pure_rlm_runtime_modes.py +0 -0
  261. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_pydantic_ai_adapter.py +0 -0
  262. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_repl_history.py +0 -0
  263. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_security_hardening.py +0 -0
  264. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_session_replay.py +0 -0
  265. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_submit.py +0 -0
  266. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_task_signature.py +0 -0
  267. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/rlm/test_user_tools.py +0 -0
  268. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_anti_patterns.py +0 -0
  269. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_auto_fixer.py +0 -0
  270. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_cache.py +0 -0
  271. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_execution_engine.py +0 -0
  272. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_export_import.py +0 -0
  273. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_harness_registry.py +0 -0
  274. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_harness_runner.py +0 -0
  275. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_init_command.py +0 -0
  276. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_integration.py +0 -0
  277. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_learning_integration.py +0 -0
  278. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_mcp_utils.py +0 -0
  279. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_module_validator.py +0 -0
  280. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_optimization_workflow.py +0 -0
  281. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_persistent_shell.py +0 -0
  282. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_predictor_validator.py +0 -0
  283. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_project_scanner.py +0 -0
  284. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_prompt_widget.py +0 -0
  285. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_property_validators.py +0 -0
  286. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_provider_discovery.py +0 -0
  287. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_provider_registry.py +0 -0
  288. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_quality_scorer.py +0 -0
  289. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_report_generator.py +0 -0
  290. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_retry.py +0 -0
  291. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_rlm_config.py +0 -0
  292. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_rlm_dspy_environment.py +0 -0
  293. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_rlm_observability.py +0 -0
  294. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_rlm_runner.py +0 -0
  295. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_sandbox_runtimes.py +0 -0
  296. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_security_validator.py +0 -0
  297. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_session_management.py +0 -0
  298. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_signature_validator.py +0 -0
  299. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_slash_harness_command.py +0 -0
  300. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_slash_rlm_command.py +0 -0
  301. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_slash_sandbox_command.py +0 -0
  302. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_streaming.py +0 -0
  303. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_superbox.py +0 -0
  304. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_tui_utils.py +0 -0
  305. {rlm_code-0.1.6 → rlm_code-0.1.8}/tests/test_validation.py +0 -0
@@ -5,6 +5,26 @@ All notable changes to this project are documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.1.8] - 2026-05-01
9
+
10
+ ### Added
11
+ - AHE-style layered trace evidence corpus export from `TraceStore`.
12
+ - New `trace_analysis` action `export_evidence_corpus` for writing `overview.md`, per-trace detail reports, `index.json`, and optional processed raw JSONL spans.
13
+ - Evidence corpus tests covering direct store export and environment action export.
14
+
15
+ ## [0.1.7] - 2026-04-30
16
+
17
+ ### Added
18
+ - HALO-style `trace_analysis` RLM environment for diagnosing agent harness failures from one-span-per-line JSONL traces.
19
+ - Trace sidecar indexing with dataset rollups for trace counts, span counts, error traces, services, models, agents, token totals, and sample trace ids.
20
+ - Bounded trace inspection actions: `get_dataset_overview`, `query_traces`, `count_traces`, `view_trace`, `search_trace`, and `view_spans`.
21
+ - Large-trace safeguards: per-attribute truncation, oversized trace summaries, and higher-cap selected-span reads.
22
+ - Tests for trace indexing, querying, searching, selected-span viewing, and trace environment actions.
23
+ - Trace analysis documentation under the Core Engine docs.
24
+
25
+ ### Changed
26
+ - `/rlm` command help now advertises `env=trace_analysis` for run, chat, and doctor workflows.
27
+
8
28
  ## [0.1.6] - 2026-02-20
9
29
 
10
30
  ### Added
@@ -56,3 +76,5 @@ Initial public release of **RLM Code**.
56
76
 
57
77
  [0.1.5]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.5
58
78
  [0.1.6]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.6
79
+ [0.1.8]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.8
80
+ [0.1.7]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.7
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlm-code
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems
5
5
  Project-URL: Homepage, https://github.com/SuperagenticAI/rlm-code
6
6
  Project-URL: Documentation, https://superagenticai.github.io/rlm-code/
@@ -118,20 +118,21 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
118
118
 
119
119
  RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
120
120
 
121
- ## Release v0.1.6
121
+ ## Release v0.1.8
122
122
 
123
- This release adds the new CodeMode path as an opt-in harness strategy.
123
+ This release extends HALO/AHE-style trace analysis with layered evidence export.
124
124
 
125
- - New harness strategy: `strategy=codemode` (default remains `strategy=tool_call`)
126
- - MCP bridge flow for CodeMode: `search_tools` -> typed tool surface -> `call_tool_chain`
127
- - Guardrails before execution: blocked API classes plus timeout/size/tool-call caps
128
- - Benchmark telemetry for side-by-side comparison: `tool_call` vs `codemode`
129
- - Dedicated docs section for CodeMode: quickstart, architecture, guardrails, evaluation
125
+ - New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
126
+ - Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
127
+ - AHE-style evidence corpus export with `overview.md`, per-trace detail reports, `index.json`, and optional processed raw JSONL spans
128
+ - Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
129
+ - `/rlm` help/docs updated for `env=trace_analysis`
130
+ - Dedicated trace analysis docs under the Core Engine section
130
131
 
131
132
  Example:
132
133
 
133
134
  ```text
134
- /harness run "implement feature and add tests" steps=8 mcp=on strategy=codemode mcp_server=codemode
135
+ /rlm run "Find systemic harness failures trace=./traces.jsonl" env=trace_analysis steps=6
135
136
  ```
136
137
 
137
138
  ## Documentation
@@ -287,6 +288,62 @@ Notes:
287
288
  - In Local/BYOK connection modes, likely coding prompts in chat can auto-route to harness.
288
289
  - In ACP mode, auto-routing is intentionally off; use `/harness run ...` explicitly.
289
290
 
291
+ ### 8. CodeMode with UTCP and Cloudflare MCP
292
+
293
+ Use these server entries in your project `rlm_config.yaml`:
294
+
295
+ ```yaml
296
+ mcp_servers:
297
+ utcp-codemode:
298
+ name: utcp-codemode
299
+ description: "Local CodeMode MCP bridge"
300
+ enabled: true
301
+ auto_connect: false
302
+ timeout_seconds: 30
303
+ retry_attempts: 3
304
+ transport:
305
+ type: stdio
306
+ command: npx
307
+ args:
308
+ - "@utcp/code-mode-mcp"
309
+
310
+ cloudflare-codemode:
311
+ name: cloudflare-codemode
312
+ description: "Cloudflare MCP via remote bridge"
313
+ enabled: true
314
+ auto_connect: false
315
+ timeout_seconds: 30
316
+ retry_attempts: 3
317
+ transport:
318
+ type: stdio
319
+ command: npx
320
+ args:
321
+ - "mcp-remote"
322
+ - "https://mcp.cloudflare.com/mcp"
323
+ ```
324
+
325
+ UTCP path (native CodeMode in current release):
326
+
327
+ ```text
328
+ /mcp-connect utcp-codemode
329
+ /mcp-tools utcp-codemode
330
+ /harness run "analyze this repo, find TODO/FIXME, and create report.json" steps=3 mcp=on strategy=codemode mcp_server=utcp-codemode
331
+ ```
332
+
333
+ Cloudflare path (recommended strategy today):
334
+
335
+ ```text
336
+ /mcp-connect cloudflare-codemode
337
+ /mcp-tools cloudflare-codemode
338
+ /harness run "list available tools and run one safe read-only action, then summarize in 3 bullets" steps=3 mcp=on strategy=tool_call mcp_server=cloudflare-codemode
339
+ ```
340
+
341
+ Notes:
342
+
343
+ - On first Cloudflare connect, `mcp-remote` may ask for interactive authentication.
344
+ - In this release, `strategy=codemode` expects the `search_tools` + `call_tool_chain` bridge contract.
345
+ - If a remote MCP server exposes a different tool contract, use `strategy=tool_call`.
346
+
290
347
  ## How the RLM Loop Works
291
348
 
292
349
  Traditional LLM usage: paste your document into the prompt, ask a question, hope the model doesn't lose details in the middle.
@@ -25,20 +25,21 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
25
25
 
26
26
  RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
27
27
 
28
- ## Release v0.1.6
28
+ ## Release v0.1.8
29
29
 
30
- This release adds the new CodeMode path as an opt-in harness strategy.
30
+ This release extends HALO/AHE-style trace analysis with layered evidence export.
31
31
 
32
- - New harness strategy: `strategy=codemode` (default remains `strategy=tool_call`)
33
- - MCP bridge flow for CodeMode: `search_tools` -> typed tool surface -> `call_tool_chain`
34
- - Guardrails before execution: blocked API classes plus timeout/size/tool-call caps
35
- - Benchmark telemetry for side-by-side comparison: `tool_call` vs `codemode`
36
- - Dedicated docs section for CodeMode: quickstart, architecture, guardrails, evaluation
32
+ - New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
33
+ - Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
34
+ - AHE-style evidence corpus export with `overview.md`, per-trace detail reports, `index.json`, and optional processed raw JSONL spans
35
+ - Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
36
+ - `/rlm` help/docs updated for `env=trace_analysis`
37
+ - Dedicated trace analysis docs under the Core Engine section
37
38
 
38
39
  Example:
39
40
 
40
41
  ```text
41
- /harness run "implement feature and add tests" steps=8 mcp=on strategy=codemode mcp_server=codemode
42
+ /rlm run "Find systemic harness failures trace=./traces.jsonl" env=trace_analysis steps=6
42
43
  ```
43
44
 
44
45
  ## Documentation
@@ -194,6 +195,62 @@ Notes:
194
195
  - In Local/BYOK connection modes, likely coding prompts in chat can auto-route to harness.
195
196
  - In ACP mode, auto-routing is intentionally off; use `/harness run ...` explicitly.
196
197
 
198
+ ### 8. CodeMode with UTCP and Cloudflare MCP
199
+
200
+ Use these server entries in your project `rlm_config.yaml`:
201
+
202
+ ```yaml
203
+ mcp_servers:
204
+ utcp-codemode:
205
+ name: utcp-codemode
206
+ description: "Local CodeMode MCP bridge"
207
+ enabled: true
208
+ auto_connect: false
209
+ timeout_seconds: 30
210
+ retry_attempts: 3
211
+ transport:
212
+ type: stdio
213
+ command: npx
214
+ args:
215
+ - "@utcp/code-mode-mcp"
216
+
217
+ cloudflare-codemode:
218
+ name: cloudflare-codemode
219
+ description: "Cloudflare MCP via remote bridge"
220
+ enabled: true
221
+ auto_connect: false
222
+ timeout_seconds: 30
223
+ retry_attempts: 3
224
+ transport:
225
+ type: stdio
226
+ command: npx
227
+ args:
228
+ - "mcp-remote"
229
+ - "https://mcp.cloudflare.com/mcp"
230
+ ```
231
+
232
+ UTCP path (native CodeMode in current release):
233
+
234
+ ```text
235
+ /mcp-connect utcp-codemode
236
+ /mcp-tools utcp-codemode
237
+ /harness run "analyze this repo, find TODO/FIXME, and create report.json" steps=3 mcp=on strategy=codemode mcp_server=utcp-codemode
238
+ ```
239
+
240
+ Cloudflare path (recommended strategy today):
241
+
242
+ ```text
243
+ /mcp-connect cloudflare-codemode
244
+ /mcp-tools cloudflare-codemode
245
+ /harness run "list available tools and run one safe read-only action, then summarize in 3 bullets" steps=3 mcp=on strategy=tool_call mcp_server=cloudflare-codemode
246
+ ```
247
+
248
+ Notes:
249
+
250
+ - On first Cloudflare connect, `mcp-remote` may ask for interactive authentication.
251
+ - In this release, `strategy=codemode` expects the `search_tools` + `call_tool_chain` bridge contract.
252
+ - If a remote MCP server exposes a different tool contract, use `strategy=tool_call`.
253
+
197
254
  ## How the RLM Loop Works
198
255
 
199
256
  Traditional LLM usage: paste your document into the prompt, ask a question, hope the model doesn't lose details in the middle.
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "rlm-code"
7
- version = "0.1.6"
7
+ version = "0.1.8"
8
8
  description = "RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems"
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -5,5 +5,5 @@ This package provides tools for creating, managing, and optimizing DSPy componen
5
5
  through natural language interactions.
6
6
  """
7
7
 
8
- __version__ = "0.1.6"
8
+ __version__ = "0.1.8"
9
9
  __author__ = "Super Agentic AI"
@@ -1684,7 +1684,7 @@ class SlashCommandHandler:
1684
1684
  Manage RLM runs.
1685
1685
 
1686
1686
  Usage:
1687
- /rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
1687
+ /rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm|trace_analysis] [sub=provider/model]
1688
1688
  /rlm bench [list|preset=name] [mode=native|harness|direct-llm] [strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
1689
1689
  /rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N]
1690
1690
  /rlm bench validate [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] [--json]
@@ -1696,8 +1696,8 @@ class SlashCommandHandler:
1696
1696
  /rlm status [run_id]
1697
1697
  /rlm abort [run_id|all]
1698
1698
  /rlm replay [run_id|latest]
1699
- /rlm doctor [env=generic|dspy|pure_rlm] [--json]
1700
- /rlm chat <message> [session=name] [env=generic|dspy|pure_rlm] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [sub=provider/model]
1699
+ /rlm doctor [env=generic|dspy|pure_rlm|trace_analysis] [--json]
1700
+ /rlm chat <message> [session=name] [env=generic|dspy|pure_rlm|trace_analysis] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [sub=provider/model]
1701
1701
  /rlm chat status [session=name]
1702
1702
  /rlm chat reset [session=name]
1703
1703
  /rlm observability
@@ -1708,14 +1708,14 @@ class SlashCommandHandler:
1708
1708
  console.print("[bold cyan]🧠 RLM Commands[/bold cyan]")
1709
1709
  console.print(
1710
1710
  " [yellow]/rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] "
1711
- f"[parallel=N] [budget=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] "
1711
+ f"[parallel=N] [budget=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm|trace_analysis] "
1712
1712
  "[sub=provider/model][/yellow]"
1713
1713
  )
1714
1714
  console.print(
1715
1715
  " [yellow]/rlm bench [list|preset=name] [mode=native|harness|direct-llm] "
1716
1716
  "[strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] "
1717
1717
  "[pack=path[,path2]] [limit=N] [steps=N] "
1718
- f"[timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] [sub=provider/model][/yellow]"
1718
+ f"[timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm|trace_analysis] [sub=provider/model][/yellow]"
1719
1719
  )
1720
1720
  console.print(
1721
1721
  " [yellow]/rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] "
@@ -1741,9 +1741,9 @@ class SlashCommandHandler:
1741
1741
  console.print(" [yellow]/rlm status [run_id][/yellow]")
1742
1742
  console.print(" [yellow]/rlm abort [run_id|all][/yellow]")
1743
1743
  console.print(" [yellow]/rlm replay [run_id|latest][/yellow]")
1744
- console.print(" [yellow]/rlm doctor [env=generic|dspy|pure_rlm] [--json][/yellow]")
1744
+ console.print(" [yellow]/rlm doctor [env=generic|dspy|pure_rlm|trace_analysis] [--json][/yellow]")
1745
1745
  console.print(
1746
- " [yellow]/rlm chat <message> [session=name] [env=generic|dspy|pure_rlm] [branch=N] [depth=N] "
1746
+ " [yellow]/rlm chat <message> [session=name] [env=generic|dspy|pure_rlm|trace_analysis] [branch=N] [depth=N] "
1747
1747
  f"[children=N] [parallel=N] [budget=N] [framework={framework_opts}] "
1748
1748
  "[sub=provider/model][/yellow]"
1749
1749
  )
@@ -2135,7 +2135,7 @@ class SlashCommandHandler:
2135
2135
  task = " ".join(task_tokens).strip()
2136
2136
  if not task:
2137
2137
  show_error_message(
2138
- "Usage: /rlm run <task> [steps=N] [timeout=N] [env=generic|dspy|pure_rlm] "
2138
+ "Usage: /rlm run <task> [steps=N] [timeout=N] [env=generic|dspy|pure_rlm|trace_analysis] "
2139
2139
  "[depth=N] [children=N] [parallel=N] [budget=N] "
2140
2140
  f"[framework={framework_opts}] "
2141
2141
  "[branch=N] [sub=provider/model]"
@@ -17,7 +17,7 @@ from .exceptions import (
17
17
  )
18
18
  from .session_wrapper import MCPSessionWrapper
19
19
 
20
- __version__ = "0.1.6"
20
+ __version__ = "0.1.8"
21
21
 
22
22
  __all__ = [
23
23
  "MCPClientManager",
@@ -15,6 +15,7 @@ from .environments import (
15
15
  DSPyCodingRLMEnvironment,
16
16
  GenericRLMEnvironment,
17
17
  RLMEnvironment,
18
+ TraceAnalysisEnvironment,
18
19
  )
19
20
  from .pure_rlm_environment import PureRLMConfig, PureRLMEnvironment
20
21
 
@@ -276,6 +277,8 @@ class ActionPlannerMixin:
276
277
  )
277
278
  if isinstance(env, DSPyCodingRLMEnvironment):
278
279
  return DSPyCodingRLMEnvironment(workdir=workdir, reward_profile=self.reward_profile)
280
+ if isinstance(env, TraceAnalysisEnvironment):
281
+ return TraceAnalysisEnvironment(workdir=workdir, reward_profile=self.reward_profile)
279
282
  if isinstance(env, GenericRLMEnvironment):
280
283
  return GenericRLMEnvironment(workdir=workdir, reward_profile=self.reward_profile)
281
284
  # Fallback to generic environment in preview if an unknown env type appears.
@@ -286,6 +286,282 @@ class GenericRLMEnvironment:
286
286
  return "Execution failed without stderr."
287
287
 
288
288
 
289
+ class TraceAnalysisEnvironment(GenericRLMEnvironment):
290
+ """HALO-style trace analysis environment over one-span-per-line JSONL traces."""
291
+
292
+ name = "trace_analysis"
293
+
294
+ def __init__(
295
+ self,
296
+ workdir: Path | None = None,
297
+ reward_profile: RLMRewardProfile | dict[str, Any] | None = None,
298
+ ):
299
+ super().__init__(workdir=workdir, reward_profile=reward_profile)
300
+ self._trace_path: Path | None = None
301
+ self._store: Any | None = None
302
+
303
+ def system_prompt(self) -> str:
304
+ return (
305
+ "You are an RLM planner specialized for analyzing agent execution traces.\n"
306
+ "Return ONLY valid JSON object with keys:\n"
307
+ "{"
308
+ '"action": "set_trace_path" | "get_dataset_overview" | "query_traces" | '
309
+ '"count_traces" | "view_trace" | "search_trace" | "view_spans" | '
310
+ '"export_evidence_corpus" | "final", '
311
+ '"trace_path": "<path to JSONL traces>", '
312
+ '"output_dir": "<directory for exported evidence corpus>", '
313
+ '"filters": {"has_errors": true, "model_names": ["..."], "service_names": ["..."], '
314
+ '"agent_names": ["..."], "project_id": "..."}, '
315
+ '"trace_id": "<trace id>", '
316
+ '"span_ids": ["<span id>"], '
317
+ '"pattern": "<literal substring>", '
318
+ '"limit": <integer>, '
319
+ '"offset": <integer>, '
320
+ '"rationale": "<brief reason>", '
321
+ '"done": true|false, '
322
+ '"final_response": "<required when action=final>"'
323
+ "}\n"
324
+ "Rules:\n"
325
+ "- Load a trace file first if one is not already active.\n"
326
+ "- Always begin analysis with get_dataset_overview.\n"
327
+ "- Use query_traces to choose real trace ids; never invent trace ids.\n"
328
+ "- For large traces, prefer search_trace followed by view_spans.\n"
329
+ "- Use export_evidence_corpus when the caller needs files for MetaHarness or another coding agent.\n"
330
+ "- Identify systemic harness failures, not one-off anomalies.\n"
331
+ "- Output JSON only."
332
+ )
333
+
334
+ def planner_prompt(
335
+ self, task: str, memory: list[str], trajectory: list[dict[str, Any]], step_index: int
336
+ ) -> str:
337
+ inferred = self._extract_trace_path(task)
338
+ if inferred is not None and inferred != self._trace_path:
339
+ try:
340
+ self._load_store(inferred)
341
+ except Exception:
342
+ # Surface the failure through the prompt; execute_action will return
343
+ # the structured error if the planner attempts to use the path.
344
+ self._trace_path = inferred
345
+ self._store = None
346
+
347
+ base = super().planner_prompt(task, memory, trajectory, step_index)
348
+ active = str(self._trace_path) if self._trace_path is not None else "(none)"
349
+ overview = ""
350
+ if self._store is not None:
351
+ try:
352
+ data = self._store.get_overview({})
353
+ overview = (
354
+ f"\nActive trace overview: traces={data['total_traces']} "
355
+ f"spans={data['total_spans']} errors={data['error_trace_count']} "
356
+ f"sample_trace_ids={data['sample_trace_ids'][:5]}"
357
+ )
358
+ except Exception:
359
+ overview = ""
360
+ return (
361
+ f"{base}\n\n"
362
+ f"Trace analysis environment.\n"
363
+ f"Active trace path: {active}\n"
364
+ "If the task includes trace=<path> or trace_path=<path>, use that file.\n"
365
+ "Goal: produce a concise evidence report of repeated harness failure modes "
366
+ "with concrete trace ids/spans and suggested harness changes."
367
+ f"{overview}"
368
+ )
369
+
370
+ def execute_action(
371
+ self,
372
+ action: dict[str, Any],
373
+ execution_engine: Any,
374
+ exec_timeout: int,
375
+ llm_connector: Any | None = None,
376
+ ) -> EnvironmentActionResult:
377
+ action_name = str(action.get("action", "")).strip().lower()
378
+ if action_name == "final":
379
+ return super().execute_action(
380
+ action,
381
+ execution_engine,
382
+ exec_timeout,
383
+ llm_connector=llm_connector,
384
+ )
385
+
386
+ try:
387
+ if action_name == "set_trace_path":
388
+ store = self._store_from_action(action, required_path=True)
389
+ return EnvironmentActionResult(
390
+ observation={
391
+ "success": True,
392
+ "trace_path": str(store.trace_path),
393
+ "index_path": str(store.index_path),
394
+ "overview": store.get_overview({}),
395
+ },
396
+ reward=0.55,
397
+ memory_note=f"Loaded trace dataset: {store.trace_path}",
398
+ )
399
+
400
+ store = self._store_from_action(action, required_path=False)
401
+ filters = action.get("filters") if isinstance(action.get("filters"), dict) else {}
402
+
403
+ if action_name == "get_dataset_overview":
404
+ return self._ok(
405
+ observation=store.get_overview(filters),
406
+ reward=0.45,
407
+ memory_note="Loaded trace dataset overview.",
408
+ )
409
+ if action_name == "query_traces":
410
+ return self._ok(
411
+ observation=store.query_traces(
412
+ filters,
413
+ limit=self._int_arg(action, "limit", 50, minimum=1, maximum=200),
414
+ offset=self._int_arg(action, "offset", 0, minimum=0, maximum=1_000_000),
415
+ ),
416
+ reward=0.5,
417
+ memory_note="Queried trace summaries.",
418
+ )
419
+ if action_name == "count_traces":
420
+ return self._ok(
421
+ observation=store.count_traces(filters),
422
+ reward=0.35,
423
+ memory_note="Counted traces matching filters.",
424
+ )
425
+ if action_name == "view_trace":
426
+ trace_id = self._required_str(action, "trace_id")
427
+ return self._ok(
428
+ observation=store.view_trace(trace_id),
429
+ reward=0.65,
430
+ memory_note=f"Viewed trace {trace_id}.",
431
+ )
432
+ if action_name == "search_trace":
433
+ trace_id = self._required_str(action, "trace_id")
434
+ pattern = self._required_str(action, "pattern")
435
+ return self._ok(
436
+ observation=store.search_trace(
437
+ trace_id,
438
+ pattern,
439
+ limit=self._int_arg(action, "limit", 100, minimum=1, maximum=500),
440
+ ),
441
+ reward=0.65,
442
+ memory_note=f"Searched trace {trace_id} for {pattern!r}.",
443
+ )
444
+ if action_name == "view_spans":
445
+ trace_id = self._required_str(action, "trace_id")
446
+ span_ids = action.get("span_ids")
447
+ if not isinstance(span_ids, list) or not span_ids:
448
+ raise ValueError("view_spans requires non-empty span_ids list")
449
+ return self._ok(
450
+ observation=store.view_spans(trace_id, [str(item) for item in span_ids]),
451
+ reward=0.7,
452
+ memory_note=f"Viewed selected spans for trace {trace_id}.",
453
+ )
454
+ if action_name == "export_evidence_corpus":
455
+ output_dir = self._required_str(action, "output_dir")
456
+ resolved_output = Path(output_dir).expanduser()
457
+ if not resolved_output.is_absolute():
458
+ resolved_output = self.workdir / resolved_output
459
+ return self._ok(
460
+ observation=store.export_evidence_corpus(
461
+ resolved_output,
462
+ filters,
463
+ limit=self._int_arg(action, "limit", 100, minimum=1, maximum=1000),
464
+ include_raw=self._bool_arg(action, "include_raw", True),
465
+ ),
466
+ reward=0.75,
467
+ memory_note="Exported layered trace evidence corpus.",
468
+ )
469
+ except Exception as exc:
470
+ return EnvironmentActionResult(
471
+ observation={"success": False, "error": f"{type(exc).__name__}: {exc}"},
472
+ reward=-0.25,
473
+ memory_note=f"Trace analysis action failed: {type(exc).__name__}.",
474
+ )
475
+
476
+ return EnvironmentActionResult(
477
+ observation={"success": False, "error": f"Unsupported action '{action_name}'."},
478
+ reward=-0.2,
479
+ memory_note="Planner produced unsupported trace action.",
480
+ )
481
+
482
+ def doctor_checks(self) -> list[EnvironmentDoctorCheck]:
483
+ checks = super().doctor_checks()
484
+ checks.append(
485
+ EnvironmentDoctorCheck(
486
+ name="trace_analysis",
487
+ status="pass",
488
+ detail="Trace analysis environment is available.",
489
+ )
490
+ )
491
+ return checks
492
+
493
+ def _ok(self, *, observation: dict[str, Any], reward: float, memory_note: str) -> EnvironmentActionResult:
494
+ payload = {"success": True, **observation}
495
+ return EnvironmentActionResult(
496
+ observation=payload,
497
+ reward=reward,
498
+ memory_note=memory_note,
499
+ )
500
+
501
+ def _store_from_action(self, action: dict[str, Any], *, required_path: bool):
502
+ raw = action.get("trace_path") or action.get("path")
503
+ if isinstance(raw, str) and raw.strip():
504
+ return self._load_store(Path(raw.strip()).expanduser())
505
+ if self._store is not None:
506
+ return self._store
507
+ if required_path:
508
+ raise ValueError("trace_path is required")
509
+ raise ValueError("no trace dataset loaded; pass trace_path or use set_trace_path first")
510
+
511
+ def _load_store(self, trace_path: Path):
512
+ from ..traces import TraceStore
513
+
514
+ resolved = trace_path if trace_path.is_absolute() else (self.workdir / trace_path)
515
+ store = TraceStore.load(resolved)
516
+ self._trace_path = resolved.resolve()
517
+ self._store = store
518
+ return store
519
+
520
+ @staticmethod
521
+ def _extract_trace_path(task: str) -> Path | None:
522
+ match = re.search(r"(?:^|\s)(?:trace|trace_path)=([^\s]+)", task)
523
+ if not match:
524
+ return None
525
+ raw = match.group(1).strip().strip("\"'")
526
+ return Path(raw).expanduser() if raw else None
527
+
528
+ @staticmethod
529
+ def _required_str(action: dict[str, Any], key: str) -> str:
530
+ value = action.get(key)
531
+ if not isinstance(value, str) or not value.strip():
532
+ raise ValueError(f"{key} is required")
533
+ return value.strip()
534
+
535
+ @staticmethod
536
+ def _int_arg(
537
+ action: dict[str, Any],
538
+ key: str,
539
+ default: int,
540
+ *,
541
+ minimum: int,
542
+ maximum: int,
543
+ ) -> int:
544
+ value = action.get(key, default)
545
+ try:
546
+ parsed = int(value)
547
+ except Exception:
548
+ parsed = default
549
+ return max(minimum, min(maximum, parsed))
550
+
551
+ @staticmethod
552
+ def _bool_arg(action: dict[str, Any], key: str, default: bool) -> bool:
553
+ value = action.get(key, default)
554
+ if isinstance(value, bool):
555
+ return value
556
+ if isinstance(value, str):
557
+ normalized = value.strip().lower()
558
+ if normalized in {"1", "true", "yes", "on"}:
559
+ return True
560
+ if normalized in {"0", "false", "no", "off"}:
561
+ return False
562
+ return default
563
+
564
+
289
565
  class DSPyCodingRLMEnvironment(GenericRLMEnvironment):
290
566
  """DSPy-focused environment with file edit + tests + DSPy-aware scoring."""
291
567
 
@@ -38,6 +38,7 @@ from .environments import (
38
38
  GenericRLMEnvironment,
39
39
  RLMEnvironment,
40
40
  RLMRewardProfile,
41
+ TraceAnalysisEnvironment,
41
42
  )
42
43
  from .events import RLMEventBus
43
44
  from .frameworks import FrameworkAdapterRegistry, FrameworkEpisodeResult
@@ -279,6 +280,18 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
279
280
  workdir=self.workdir,
280
281
  reward_profile=self.reward_profile,
281
282
  ),
283
+ "trace_analysis": TraceAnalysisEnvironment(
284
+ workdir=self.workdir,
285
+ reward_profile=self.reward_profile,
286
+ ),
287
+ "trace-analysis": TraceAnalysisEnvironment(
288
+ workdir=self.workdir,
289
+ reward_profile=self.reward_profile,
290
+ ),
291
+ "traces": TraceAnalysisEnvironment(
292
+ workdir=self.workdir,
293
+ reward_profile=self.reward_profile,
294
+ ),
282
295
  "framework": DSPyCodingRLMEnvironment(
283
296
  workdir=self.workdir,
284
297
  reward_profile=self.reward_profile,
@@ -0,0 +1,6 @@
1
+ """Trace indexing and query helpers for HALO-style RLM analysis."""
2
+
3
+ from .index import TraceIndexBuilder
4
+ from .store import TraceStore
5
+
6
+ __all__ = ["TraceIndexBuilder", "TraceStore"]