rlm-code 0.1.6__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. {rlm_code-0.1.6 → rlm_code-0.1.7}/CHANGELOG.md +14 -0
  2. {rlm_code-0.1.6 → rlm_code-0.1.7}/PKG-INFO +65 -9
  3. {rlm_code-0.1.6 → rlm_code-0.1.7}/README.md +64 -8
  4. {rlm_code-0.1.6 → rlm_code-0.1.7}/pyproject.toml +1 -1
  5. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/__init__.py +1 -1
  6. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/slash_commands.py +8 -8
  7. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/__init__.py +1 -1
  8. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/action_planner.py +3 -0
  9. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/environments.py +245 -0
  10. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/runner.py +13 -0
  11. rlm_code-0.1.7/rlm_code/traces/__init__.py +6 -0
  12. rlm_code-0.1.7/rlm_code/traces/index.py +170 -0
  13. rlm_code-0.1.7/rlm_code/traces/models.py +103 -0
  14. rlm_code-0.1.7/rlm_code/traces/store.py +221 -0
  15. rlm_code-0.1.7/tests/test_trace_analysis.py +115 -0
  16. {rlm_code-0.1.6 → rlm_code-0.1.7}/.gitignore +0 -0
  17. {rlm_code-0.1.6 → rlm_code-0.1.7}/LICENSE +0 -0
  18. {rlm_code-0.1.6 → rlm_code-0.1.7}/NOTICE +0 -0
  19. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/__init__.py +0 -0
  20. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/agent.py +0 -0
  21. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/agents/__init__.py +0 -0
  22. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/agents/rlm_agent.py +0 -0
  23. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/callbacks/__init__.py +0 -0
  24. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/callbacks/code_execution.py +0 -0
  25. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/cli.py +0 -0
  26. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/code_executor.py +0 -0
  27. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/events.py +0 -0
  28. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/__init__.py +0 -0
  29. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/base.py +0 -0
  30. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/lazy.py +0 -0
  31. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/loader.py +0 -0
  32. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/parsers/__init__.py +0 -0
  33. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/parsers/base.py +0 -0
  34. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/parsers/pdf.py +0 -0
  35. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/parsers/text.py +0 -0
  36. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/sources/__init__.py +0 -0
  37. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/sources/base.py +0 -0
  38. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/sources/gcs.py +0 -0
  39. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/sources/local.py +0 -0
  40. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/llm.py +0 -0
  41. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/logging/__init__.py +0 -0
  42. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/logging/rlm_logger.py +0 -0
  43. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/logging/verbose.py +0 -0
  44. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/main.py +0 -0
  45. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/prompts.py +0 -0
  46. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/repl/__init__.py +0 -0
  47. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/repl/local_repl.py +0 -0
  48. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/repl/safe_builtins.py +0 -0
  49. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/templates/index.html +0 -0
  50. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/tools/__init__.py +0 -0
  51. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/types.py +0 -0
  52. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/usage.py +0 -0
  53. {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/web.py +0 -0
  54. {rlm_code-0.1.6 → rlm_code-0.1.7}/eval/packs/README.md +0 -0
  55. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/__main__.py +0 -0
  56. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/__init__.py +0 -0
  57. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/config_command.py +0 -0
  58. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/create_command.py +0 -0
  59. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/demo_command.py +0 -0
  60. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/export_command.py +0 -0
  61. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/init_command.py +0 -0
  62. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/interactive_command.py +0 -0
  63. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/mcp_command.py +0 -0
  64. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/models_command.py +0 -0
  65. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/nl_command_router.py +0 -0
  66. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/optimize_command.py +0 -0
  67. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/run_command.py +0 -0
  68. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/__init__.py +0 -0
  69. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/config.py +0 -0
  70. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/debug_logger.py +0 -0
  71. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/directory_utils.py +0 -0
  72. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/exceptions.py +0 -0
  73. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/logging.py +0 -0
  74. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/venv_utils.py +0 -0
  75. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/version_checker.py +0 -0
  76. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/examples/__init__.py +0 -0
  77. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/examples/phase2_demo.py +0 -0
  78. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/examples/phase3_demo.py +0 -0
  79. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/examples/phase4_demo.py +0 -0
  80. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/examples/pure_rlm_demo.py +0 -0
  81. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/execution/__init__.py +0 -0
  82. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/execution/engine.py +0 -0
  83. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/execution/sandbox.py +0 -0
  84. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/export/__init__.py +0 -0
  85. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/export/handler.py +0 -0
  86. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/export/package_builder.py +0 -0
  87. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/generators/evaluation_generator.py +0 -0
  88. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/generators/gepa_generator.py +0 -0
  89. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/harness/__init__.py +0 -0
  90. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/harness/registry.py +0 -0
  91. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/harness/runner.py +0 -0
  92. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/main.py +0 -0
  93. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/client_manager.py +0 -0
  94. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/config.py +0 -0
  95. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/exceptions.py +0 -0
  96. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/retry.py +0 -0
  97. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/server/__init__.py +0 -0
  98. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/server/rlm_server.py +0 -0
  99. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/server/tools.py +0 -0
  100. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/session_wrapper.py +0 -0
  101. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/transports/__init__.py +0 -0
  102. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/transports/factory.py +0 -0
  103. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/transports/sse_transport.py +0 -0
  104. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/transports/stdio_transport.py +0 -0
  105. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/transports/websocket_transport.py +0 -0
  106. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/utils.py +0 -0
  107. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/__init__.py +0 -0
  108. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/cache.py +0 -0
  109. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/code_generator.py +0 -0
  110. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/dspy_reference_loader.py +0 -0
  111. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/llm_connector.py +0 -0
  112. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/model_manager.py +0 -0
  113. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/providers/__init__.py +0 -0
  114. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/providers/acp_discovery.py +0 -0
  115. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/providers/local_discovery.py +0 -0
  116. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/providers/model_catalog.py +0 -0
  117. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/providers/registry.py +0 -0
  118. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/streaming.py +0 -0
  119. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/task_collector.py +0 -0
  120. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/optimization/__init__.py +0 -0
  121. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/optimization/data_collector.py +0 -0
  122. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/optimization/executor.py +0 -0
  123. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/optimization/workflow_manager.py +0 -0
  124. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/project/__init__.py +0 -0
  125. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/project/context_manager.py +0 -0
  126. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/project/dspy_md_generator.py +0 -0
  127. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/project/initializer.py +0 -0
  128. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/project/scanner.py +0 -0
  129. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/py.typed +0 -0
  130. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/__init__.py +0 -0
  131. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/approval/__init__.py +0 -0
  132. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/approval/audit.py +0 -0
  133. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/approval/gate.py +0 -0
  134. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/approval/handlers.py +0 -0
  135. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/approval/policy.py +0 -0
  136. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/benchmark_manager.py +0 -0
  137. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/benchmarks.py +0 -0
  138. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/chat_session.py +0 -0
  139. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/code_interpreter.py +0 -0
  140. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/comparison.py +0 -0
  141. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/config_schema.py +0 -0
  142. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/context_store.py +0 -0
  143. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/delegation.py +0 -0
  144. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/docker_interpreter.py +0 -0
  145. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/events.py +0 -0
  146. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/__init__.py +0 -0
  147. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/adk_rlm_adapter.py +0 -0
  148. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/base.py +0 -0
  149. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/deepagents_adapter.py +0 -0
  150. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/dspy_rlm_adapter.py +0 -0
  151. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/google_adk_adapter.py +0 -0
  152. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/pydantic_ai_adapter.py +0 -0
  153. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/registry.py +0 -0
  154. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/leaderboard.py +0 -0
  155. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/memory_compaction.py +0 -0
  156. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/mock_interpreter.py +0 -0
  157. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/monty_interpreter.py +0 -0
  158. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/observability.py +0 -0
  159. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/observability_sinks.py +0 -0
  160. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/policies/__init__.py +0 -0
  161. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/policies/action_policies.py +0 -0
  162. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/policies/base.py +0 -0
  163. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/policies/compaction_policies.py +0 -0
  164. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/policies/registry.py +0 -0
  165. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/policies/reward_policies.py +0 -0
  166. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/policies/termination_policies.py +0 -0
  167. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/pure_rlm_environment.py +0 -0
  168. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/repl_types.py +0 -0
  169. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/__init__.py +0 -0
  170. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/theme.py +0 -0
  171. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/widgets/__init__.py +0 -0
  172. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/widgets/animated.py +0 -0
  173. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/widgets/panels.py +0 -0
  174. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/session_replay.py +0 -0
  175. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/task_signature.py +0 -0
  176. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/termination.py +0 -0
  177. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/trajectory.py +0 -0
  178. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/visualizer.py +0 -0
  179. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/__init__.py +0 -0
  180. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/__init__.py +0 -0
  181. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/apple_container_runtime.py +0 -0
  182. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/base.py +0 -0
  183. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/__init__.py +0 -0
  184. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/daytona_runtime.py +0 -0
  185. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/e2b_runtime.py +0 -0
  186. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/modal_runtime.py +0 -0
  187. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/command_runtime.py +0 -0
  188. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/docker_runtime.py +0 -0
  189. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/local_runtime.py +0 -0
  190. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/monty_runtime.py +0 -0
  191. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/registry.py +0 -0
  192. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/superbox.py +0 -0
  193. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/session/__init__.py +0 -0
  194. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/session/state_manager.py +0 -0
  195. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/.env.example +0 -0
  196. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/adapters.py +0 -0
  197. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/async_streaming.py +0 -0
  198. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/complete_programs.py +0 -0
  199. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/dspy_config_example.yaml +0 -0
  200. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/evaluation.py +0 -0
  201. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/industry_templates.py +0 -0
  202. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/optimizers.py +0 -0
  203. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/retrievers.py +0 -0
  204. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/rlm_benchmarks_example.yaml +0 -0
  205. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/tests/__init__.py +0 -0
  206. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/tests/rlm/__init__.py +0 -0
  207. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/tests/rlm/test_phase2.py +0 -0
  208. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/tests/rlm/test_pure_rlm.py +0 -0
  209. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/__init__.py +0 -0
  210. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/agent_collab_view.py +0 -0
  211. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/animations.py +0 -0
  212. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/conversation.py +0 -0
  213. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/design_system.py +0 -0
  214. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/diff_viewer.py +0 -0
  215. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/notifications.py +0 -0
  216. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/persistent_shell.py +0 -0
  217. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/prompt_widget.py +0 -0
  218. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/prompts.py +0 -0
  219. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/pty_terminal.py +0 -0
  220. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/resizable_divider.py +0 -0
  221. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/thinking_display.py +0 -0
  222. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/tui_app.py +0 -0
  223. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/tui_utils.py +0 -0
  224. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/welcome.py +0 -0
  225. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/__init__.py +0 -0
  226. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/anti_patterns.py +0 -0
  227. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/auto_fixer.py +0 -0
  228. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/best_practices.py +0 -0
  229. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/code_validator.py +0 -0
  230. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/config_validator.py +0 -0
  231. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/exceptions.py +0 -0
  232. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/input_validator.py +0 -0
  233. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/learning_integration.py +0 -0
  234. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/models.py +0 -0
  235. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/module_validator.py +0 -0
  236. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/predictor_validator.py +0 -0
  237. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/quality_scorer.py +0 -0
  238. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/report_generator.py +0 -0
  239. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/security.py +0 -0
  240. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/security_validator.py +0 -0
  241. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/signature_validator.py +0 -0
  242. {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/validator.py +0 -0
  243. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/__init__.py +0 -0
  244. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/conftest.py +0 -0
  245. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/fixtures/rlm_ci_baseline_generic_smoke.json +0 -0
  246. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_adk_rlm_adapter.py +0 -0
  247. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_code_interpreter.py +0 -0
  248. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_deepagents_adapter.py +0 -0
  249. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_dspy_rlm_adapter.py +0 -0
  250. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_extract_fallback.py +0 -0
  251. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_framework_registry_coverage.py +0 -0
  252. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_google_adk_adapter.py +0 -0
  253. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_leaderboard.py +0 -0
  254. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_mock_interpreter.py +0 -0
  255. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_monty_interpreter.py +0 -0
  256. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_observability_sinks.py +0 -0
  257. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_p0_features.py +0 -0
  258. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_phase3.py +0 -0
  259. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_phase4.py +0 -0
  260. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_pure_rlm_runtime_modes.py +0 -0
  261. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_pydantic_ai_adapter.py +0 -0
  262. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_repl_history.py +0 -0
  263. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_security_hardening.py +0 -0
  264. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_session_replay.py +0 -0
  265. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_submit.py +0 -0
  266. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_task_signature.py +0 -0
  267. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_user_tools.py +0 -0
  268. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_anti_patterns.py +0 -0
  269. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_auto_fixer.py +0 -0
  270. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_cache.py +0 -0
  271. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_execution_engine.py +0 -0
  272. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_export_import.py +0 -0
  273. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_harness_registry.py +0 -0
  274. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_harness_runner.py +0 -0
  275. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_init_command.py +0 -0
  276. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_integration.py +0 -0
  277. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_learning_integration.py +0 -0
  278. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_mcp_utils.py +0 -0
  279. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_module_validator.py +0 -0
  280. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_optimization_workflow.py +0 -0
  281. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_persistent_shell.py +0 -0
  282. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_predictor_validator.py +0 -0
  283. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_project_scanner.py +0 -0
  284. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_prompt_widget.py +0 -0
  285. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_property_validators.py +0 -0
  286. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_provider_discovery.py +0 -0
  287. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_provider_registry.py +0 -0
  288. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_quality_scorer.py +0 -0
  289. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_report_generator.py +0 -0
  290. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_retry.py +0 -0
  291. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_rlm_config.py +0 -0
  292. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_rlm_dspy_environment.py +0 -0
  293. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_rlm_observability.py +0 -0
  294. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_rlm_runner.py +0 -0
  295. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_sandbox_runtimes.py +0 -0
  296. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_security_validator.py +0 -0
  297. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_session_management.py +0 -0
  298. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_signature_validator.py +0 -0
  299. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_slash_harness_command.py +0 -0
  300. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_slash_rlm_command.py +0 -0
  301. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_slash_sandbox_command.py +0 -0
  302. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_streaming.py +0 -0
  303. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_superbox.py +0 -0
  304. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_tui_utils.py +0 -0
  305. {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_validation.py +0 -0
@@ -5,6 +5,19 @@ All notable changes to this project are documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.1.7] - 2026-04-30
9
+
10
+ ### Added
11
+ - HALO-style `trace_analysis` RLM environment for diagnosing agent harness failures from one-span-per-line JSONL traces.
12
+ - Trace sidecar indexing with dataset rollups for trace counts, span counts, error traces, services, models, agents, token totals, and sample trace ids.
13
+ - Bounded trace inspection actions: `get_dataset_overview`, `query_traces`, `count_traces`, `view_trace`, `search_trace`, and `view_spans`.
14
+ - Large-trace safeguards: per-attribute truncation, oversized trace summaries, and higher-cap selected-span reads.
15
+ - Tests for trace indexing, querying, searching, selected-span viewing, and trace environment actions.
16
+ - Trace analysis documentation under the Core Engine docs.
17
+
18
+ ### Changed
19
+ - `/rlm` command help now advertises `env=trace_analysis` for run, chat, and doctor workflows.
20
+
8
21
  ## [0.1.6] - 2026-02-20
9
22
 
10
23
  ### Added
@@ -56,3 +69,4 @@ Initial public release of **RLM Code**.
56
69
 
57
70
  [0.1.5]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.5
58
71
  [0.1.6]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.6
72
+ [0.1.7]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.7
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlm-code
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems
5
5
  Project-URL: Homepage, https://github.com/SuperagenticAI/rlm-code
6
6
  Project-URL: Documentation, https://superagenticai.github.io/rlm-code/
@@ -118,20 +118,20 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
118
118
 
119
119
  RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
120
120
 
121
- ## Release v0.1.6
121
+ ## Release v0.1.7
122
122
 
123
- This release adds the new CodeMode path as an opt-in harness strategy.
123
+ This release adds HALO-style trace analysis as a new RLM environment.
124
124
 
125
- - New harness strategy: `strategy=codemode` (default remains `strategy=tool_call`)
126
- - MCP bridge flow for CodeMode: `search_tools` -> typed tool surface -> `call_tool_chain`
127
- - Guardrails before execution: blocked API classes plus timeout/size/tool-call caps
128
- - Benchmark telemetry for side-by-side comparison: `tool_call` vs `codemode`
129
- - Dedicated docs section for CodeMode: quickstart, architecture, guardrails, evaluation
125
+ - New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
126
+ - Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
127
+ - Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
128
+ - `/rlm` help/docs updated for `env=trace_analysis`
129
+ - Dedicated trace analysis docs under the Core Engine section
130
130
 
131
131
  Example:
132
132
 
133
133
  ```text
134
- /harness run "implement feature and add tests" steps=8 mcp=on strategy=codemode mcp_server=codemode
134
+ /rlm run "Find systemic harness failures trace=./traces.jsonl" env=trace_analysis steps=6
135
135
  ```
136
136
 
137
137
  ## Documentation
@@ -287,6 +287,62 @@ Notes:
287
287
  - In Local/BYOK connection modes, likely coding prompts in chat can auto-route to harness.
288
288
  - In ACP mode, auto-routing is intentionally off; use `/harness run ...` explicitly.
289
289
 
290
+ ### 8. CodeMode with UTCP and Cloudflare MCP
291
+
292
+ Use these server entries in your project `rlm_config.yaml`:
293
+
294
+ ```yaml
295
+ mcp_servers:
296
+ utcp-codemode:
297
+ name: utcp-codemode
298
+ description: "Local CodeMode MCP bridge"
299
+ enabled: true
300
+ auto_connect: false
301
+ timeout_seconds: 30
302
+ retry_attempts: 3
303
+ transport:
304
+ type: stdio
305
+ command: npx
306
+ args:
307
+ - "@utcp/code-mode-mcp"
308
+
309
+ cloudflare-codemode:
310
+ name: cloudflare-codemode
311
+ description: "Cloudflare MCP via remote bridge"
312
+ enabled: true
313
+ auto_connect: false
314
+ timeout_seconds: 30
315
+ retry_attempts: 3
316
+ transport:
317
+ type: stdio
318
+ command: npx
319
+ args:
320
+ - "mcp-remote"
321
+ - "https://mcp.cloudflare.com/mcp"
322
+ ```
323
+
324
+ UTCP path (native CodeMode in current release):
325
+
326
+ ```text
327
+ /mcp-connect utcp-codemode
328
+ /mcp-tools utcp-codemode
329
+ /harness run "analyze this repo, find TODO/FIXME, and create report.json" steps=3 mcp=on strategy=codemode mcp_server=utcp-codemode
330
+ ```
331
+
332
+ Cloudflare path (recommended strategy today):
333
+
334
+ ```text
335
+ /mcp-connect cloudflare-codemode
336
+ /mcp-tools cloudflare-codemode
337
+ /harness run "list available tools and run one safe read-only action, then summarize in 3 bullets" steps=3 mcp=on strategy=tool_call mcp_server=cloudflare-codemode
338
+ ```
339
+
340
+ Notes:
341
+
342
+ - On first Cloudflare connect, `mcp-remote` may ask for interactive authentication.
343
+ - In this release, `strategy=codemode` expects the `search_tools` + `call_tool_chain` bridge contract.
344
+ - If a remote MCP server exposes a different tool contract, use `strategy=tool_call`.
345
+
290
346
  ## How the RLM Loop Works
291
347
 
292
348
  Traditional LLM usage: paste your document into the prompt, ask a question, hope the model doesn't lose details in the middle.
@@ -25,20 +25,20 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
25
25
 
26
26
  RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
27
27
 
28
- ## Release v0.1.6
28
+ ## Release v0.1.7
29
29
 
30
- This release adds the new CodeMode path as an opt-in harness strategy.
30
+ This release adds HALO-style trace analysis as a new RLM environment.
31
31
 
32
- - New harness strategy: `strategy=codemode` (default remains `strategy=tool_call`)
33
- - MCP bridge flow for CodeMode: `search_tools` -> typed tool surface -> `call_tool_chain`
34
- - Guardrails before execution: blocked API classes plus timeout/size/tool-call caps
35
- - Benchmark telemetry for side-by-side comparison: `tool_call` vs `codemode`
36
- - Dedicated docs section for CodeMode: quickstart, architecture, guardrails, evaluation
32
+ - New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
33
+ - Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
34
+ - Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
35
+ - `/rlm` help/docs updated for `env=trace_analysis`
36
+ - Dedicated trace analysis docs under the Core Engine section
37
37
 
38
38
  Example:
39
39
 
40
40
  ```text
41
- /harness run "implement feature and add tests" steps=8 mcp=on strategy=codemode mcp_server=codemode
41
+ /rlm run "Find systemic harness failures trace=./traces.jsonl" env=trace_analysis steps=6
42
42
  ```
43
43
 
44
44
  ## Documentation
@@ -194,6 +194,62 @@ Notes:
194
194
  - In Local/BYOK connection modes, likely coding prompts in chat can auto-route to harness.
195
195
  - In ACP mode, auto-routing is intentionally off; use `/harness run ...` explicitly.
196
196
 
197
+ ### 8. CodeMode with UTCP and Cloudflare MCP
198
+
199
+ Use these server entries in your project `rlm_config.yaml`:
200
+
201
+ ```yaml
202
+ mcp_servers:
203
+ utcp-codemode:
204
+ name: utcp-codemode
205
+ description: "Local CodeMode MCP bridge"
206
+ enabled: true
207
+ auto_connect: false
208
+ timeout_seconds: 30
209
+ retry_attempts: 3
210
+ transport:
211
+ type: stdio
212
+ command: npx
213
+ args:
214
+ - "@utcp/code-mode-mcp"
215
+
216
+ cloudflare-codemode:
217
+ name: cloudflare-codemode
218
+ description: "Cloudflare MCP via remote bridge"
219
+ enabled: true
220
+ auto_connect: false
221
+ timeout_seconds: 30
222
+ retry_attempts: 3
223
+ transport:
224
+ type: stdio
225
+ command: npx
226
+ args:
227
+ - "mcp-remote"
228
+ - "https://mcp.cloudflare.com/mcp"
229
+ ```
230
+
231
+ UTCP path (native CodeMode in current release):
232
+
233
+ ```text
234
+ /mcp-connect utcp-codemode
235
+ /mcp-tools utcp-codemode
236
+ /harness run "analyze this repo, find TODO/FIXME, and create report.json" steps=3 mcp=on strategy=codemode mcp_server=utcp-codemode
237
+ ```
238
+
239
+ Cloudflare path (recommended strategy today):
240
+
241
+ ```text
242
+ /mcp-connect cloudflare-codemode
243
+ /mcp-tools cloudflare-codemode
244
+ /harness run "list available tools and run one safe read-only action, then summarize in 3 bullets" steps=3 mcp=on strategy=tool_call mcp_server=cloudflare-codemode
245
+ ```
246
+
247
+ Notes:
248
+
249
+ - On first Cloudflare connect, `mcp-remote` may ask for interactive authentication.
250
+ - In this release, `strategy=codemode` expects the `search_tools` + `call_tool_chain` bridge contract.
251
+ - If a remote MCP server exposes a different tool contract, use `strategy=tool_call`.
252
+
197
253
  ## How the RLM Loop Works
198
254
 
199
255
  Traditional LLM usage: paste your document into the prompt, ask a question, hope the model doesn't lose details in the middle.
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "rlm-code"
7
- version = "0.1.6"
7
+ version = "0.1.7"
8
8
  description = "RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems"
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -5,5 +5,5 @@ This package provides tools for creating, managing, and optimizing DSPy componen
5
5
  through natural language interactions.
6
6
  """
7
7
 
8
- __version__ = "0.1.6"
8
+ __version__ = "0.1.7"
9
9
  __author__ = "Super Agentic AI"
@@ -1684,7 +1684,7 @@ class SlashCommandHandler:
1684
1684
  Manage RLM runs.
1685
1685
 
1686
1686
  Usage:
1687
- /rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
1687
+ /rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm|trace_analysis] [sub=provider/model]
1688
1688
  /rlm bench [list|preset=name] [mode=native|harness|direct-llm] [strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
1689
1689
  /rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N]
1690
1690
  /rlm bench validate [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] [--json]
@@ -1696,8 +1696,8 @@ class SlashCommandHandler:
1696
1696
  /rlm status [run_id]
1697
1697
  /rlm abort [run_id|all]
1698
1698
  /rlm replay [run_id|latest]
1699
- /rlm doctor [env=generic|dspy|pure_rlm] [--json]
1700
- /rlm chat <message> [session=name] [env=generic|dspy|pure_rlm] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [sub=provider/model]
1699
+ /rlm doctor [env=generic|dspy|pure_rlm|trace_analysis] [--json]
1700
+ /rlm chat <message> [session=name] [env=generic|dspy|pure_rlm|trace_analysis] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [sub=provider/model]
1701
1701
  /rlm chat status [session=name]
1702
1702
  /rlm chat reset [session=name]
1703
1703
  /rlm observability
@@ -1708,14 +1708,14 @@ class SlashCommandHandler:
1708
1708
  console.print("[bold cyan]🧠 RLM Commands[/bold cyan]")
1709
1709
  console.print(
1710
1710
  " [yellow]/rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] "
1711
- f"[parallel=N] [budget=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] "
1711
+ f"[parallel=N] [budget=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm|trace_analysis] "
1712
1712
  "[sub=provider/model][/yellow]"
1713
1713
  )
1714
1714
  console.print(
1715
1715
  " [yellow]/rlm bench [list|preset=name] [mode=native|harness|direct-llm] "
1716
1716
  "[strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] "
1717
1717
  "[pack=path[,path2]] [limit=N] [steps=N] "
1718
- f"[timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] [sub=provider/model][/yellow]"
1718
+ f"[timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm|trace_analysis] [sub=provider/model][/yellow]"
1719
1719
  )
1720
1720
  console.print(
1721
1721
  " [yellow]/rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] "
@@ -1741,9 +1741,9 @@ class SlashCommandHandler:
1741
1741
  console.print(" [yellow]/rlm status [run_id][/yellow]")
1742
1742
  console.print(" [yellow]/rlm abort [run_id|all][/yellow]")
1743
1743
  console.print(" [yellow]/rlm replay [run_id|latest][/yellow]")
1744
- console.print(" [yellow]/rlm doctor [env=generic|dspy|pure_rlm] [--json][/yellow]")
1744
+ console.print(" [yellow]/rlm doctor [env=generic|dspy|pure_rlm|trace_analysis] [--json][/yellow]")
1745
1745
  console.print(
1746
- " [yellow]/rlm chat <message> [session=name] [env=generic|dspy|pure_rlm] [branch=N] [depth=N] "
1746
+ " [yellow]/rlm chat <message> [session=name] [env=generic|dspy|pure_rlm|trace_analysis] [branch=N] [depth=N] "
1747
1747
  f"[children=N] [parallel=N] [budget=N] [framework={framework_opts}] "
1748
1748
  "[sub=provider/model][/yellow]"
1749
1749
  )
@@ -2135,7 +2135,7 @@ class SlashCommandHandler:
2135
2135
  task = " ".join(task_tokens).strip()
2136
2136
  if not task:
2137
2137
  show_error_message(
2138
- "Usage: /rlm run <task> [steps=N] [timeout=N] [env=generic|dspy|pure_rlm] "
2138
+ "Usage: /rlm run <task> [steps=N] [timeout=N] [env=generic|dspy|pure_rlm|trace_analysis] "
2139
2139
  "[depth=N] [children=N] [parallel=N] [budget=N] "
2140
2140
  f"[framework={framework_opts}] "
2141
2141
  "[branch=N] [sub=provider/model]"
@@ -17,7 +17,7 @@ from .exceptions import (
17
17
  )
18
18
  from .session_wrapper import MCPSessionWrapper
19
19
 
20
- __version__ = "0.1.6"
20
+ __version__ = "0.1.7"
21
21
 
22
22
  __all__ = [
23
23
  "MCPClientManager",
@@ -15,6 +15,7 @@ from .environments import (
15
15
  DSPyCodingRLMEnvironment,
16
16
  GenericRLMEnvironment,
17
17
  RLMEnvironment,
18
+ TraceAnalysisEnvironment,
18
19
  )
19
20
  from .pure_rlm_environment import PureRLMConfig, PureRLMEnvironment
20
21
 
@@ -276,6 +277,8 @@ class ActionPlannerMixin:
276
277
  )
277
278
  if isinstance(env, DSPyCodingRLMEnvironment):
278
279
  return DSPyCodingRLMEnvironment(workdir=workdir, reward_profile=self.reward_profile)
280
+ if isinstance(env, TraceAnalysisEnvironment):
281
+ return TraceAnalysisEnvironment(workdir=workdir, reward_profile=self.reward_profile)
279
282
  if isinstance(env, GenericRLMEnvironment):
280
283
  return GenericRLMEnvironment(workdir=workdir, reward_profile=self.reward_profile)
281
284
  # Fallback to generic environment in preview if an unknown env type appears.
@@ -286,6 +286,251 @@ class GenericRLMEnvironment:
286
286
  return "Execution failed without stderr."
287
287
 
288
288
 
289
+ class TraceAnalysisEnvironment(GenericRLMEnvironment):
290
+ """HALO-style trace analysis environment over one-span-per-line JSONL traces."""
291
+
292
+ name = "trace_analysis"
293
+
294
+ def __init__(
295
+ self,
296
+ workdir: Path | None = None,
297
+ reward_profile: RLMRewardProfile | dict[str, Any] | None = None,
298
+ ):
299
+ super().__init__(workdir=workdir, reward_profile=reward_profile)
300
+ self._trace_path: Path | None = None
301
+ self._store: Any | None = None
302
+
303
+ def system_prompt(self) -> str:
304
+ return (
305
+ "You are an RLM planner specialized for analyzing agent execution traces.\n"
306
+ "Return ONLY valid JSON object with keys:\n"
307
+ "{"
308
+ '"action": "set_trace_path" | "get_dataset_overview" | "query_traces" | '
309
+ '"count_traces" | "view_trace" | "search_trace" | "view_spans" | "final", '
310
+ '"trace_path": "<path to JSONL traces>", '
311
+ '"filters": {"has_errors": true, "model_names": ["..."], "service_names": ["..."], '
312
+ '"agent_names": ["..."], "project_id": "..."}, '
313
+ '"trace_id": "<trace id>", '
314
+ '"span_ids": ["<span id>"], '
315
+ '"pattern": "<literal substring>", '
316
+ '"limit": <integer>, '
317
+ '"offset": <integer>, '
318
+ '"rationale": "<brief reason>", '
319
+ '"done": true|false, '
320
+ '"final_response": "<required when action=final>"'
321
+ "}\n"
322
+ "Rules:\n"
323
+ "- Load a trace file first if one is not already active.\n"
324
+ "- Always begin analysis with get_dataset_overview.\n"
325
+ "- Use query_traces to choose real trace ids; never invent trace ids.\n"
326
+ "- For large traces, prefer search_trace followed by view_spans.\n"
327
+ "- Identify systemic harness failures, not one-off anomalies.\n"
328
+ "- Output JSON only."
329
+ )
330
+
331
+ def planner_prompt(
332
+ self, task: str, memory: list[str], trajectory: list[dict[str, Any]], step_index: int
333
+ ) -> str:
334
+ inferred = self._extract_trace_path(task)
335
+ if inferred is not None and inferred != self._trace_path:
336
+ try:
337
+ self._load_store(inferred)
338
+ except Exception:
339
+ # Surface the failure through the prompt; execute_action will return
340
+ # the structured error if the planner attempts to use the path.
341
+ self._trace_path = inferred
342
+ self._store = None
343
+
344
+ base = super().planner_prompt(task, memory, trajectory, step_index)
345
+ active = str(self._trace_path) if self._trace_path is not None else "(none)"
346
+ overview = ""
347
+ if self._store is not None:
348
+ try:
349
+ data = self._store.get_overview({})
350
+ overview = (
351
+ f"\nActive trace overview: traces={data['total_traces']} "
352
+ f"spans={data['total_spans']} errors={data['error_trace_count']} "
353
+ f"sample_trace_ids={data['sample_trace_ids'][:5]}"
354
+ )
355
+ except Exception:
356
+ overview = ""
357
+ return (
358
+ f"{base}\n\n"
359
+ f"Trace analysis environment.\n"
360
+ f"Active trace path: {active}\n"
361
+ "If the task includes trace=<path> or trace_path=<path>, use that file.\n"
362
+ "Goal: produce a concise evidence report of repeated harness failure modes "
363
+ "with concrete trace ids/spans and suggested harness changes."
364
+ f"{overview}"
365
+ )
366
+
367
+ def execute_action(
368
+ self,
369
+ action: dict[str, Any],
370
+ execution_engine: Any,
371
+ exec_timeout: int,
372
+ llm_connector: Any | None = None,
373
+ ) -> EnvironmentActionResult:
374
+ action_name = str(action.get("action", "")).strip().lower()
375
+ if action_name == "final":
376
+ return super().execute_action(
377
+ action,
378
+ execution_engine,
379
+ exec_timeout,
380
+ llm_connector=llm_connector,
381
+ )
382
+
383
+ try:
384
+ if action_name == "set_trace_path":
385
+ store = self._store_from_action(action, required_path=True)
386
+ return EnvironmentActionResult(
387
+ observation={
388
+ "success": True,
389
+ "trace_path": str(store.trace_path),
390
+ "index_path": str(store.index_path),
391
+ "overview": store.get_overview({}),
392
+ },
393
+ reward=0.55,
394
+ memory_note=f"Loaded trace dataset: {store.trace_path}",
395
+ )
396
+
397
+ store = self._store_from_action(action, required_path=False)
398
+ filters = action.get("filters") if isinstance(action.get("filters"), dict) else {}
399
+
400
+ if action_name == "get_dataset_overview":
401
+ return self._ok(
402
+ observation=store.get_overview(filters),
403
+ reward=0.45,
404
+ memory_note="Loaded trace dataset overview.",
405
+ )
406
+ if action_name == "query_traces":
407
+ return self._ok(
408
+ observation=store.query_traces(
409
+ filters,
410
+ limit=self._int_arg(action, "limit", 50, minimum=1, maximum=200),
411
+ offset=self._int_arg(action, "offset", 0, minimum=0, maximum=1_000_000),
412
+ ),
413
+ reward=0.5,
414
+ memory_note="Queried trace summaries.",
415
+ )
416
+ if action_name == "count_traces":
417
+ return self._ok(
418
+ observation=store.count_traces(filters),
419
+ reward=0.35,
420
+ memory_note="Counted traces matching filters.",
421
+ )
422
+ if action_name == "view_trace":
423
+ trace_id = self._required_str(action, "trace_id")
424
+ return self._ok(
425
+ observation=store.view_trace(trace_id),
426
+ reward=0.65,
427
+ memory_note=f"Viewed trace {trace_id}.",
428
+ )
429
+ if action_name == "search_trace":
430
+ trace_id = self._required_str(action, "trace_id")
431
+ pattern = self._required_str(action, "pattern")
432
+ return self._ok(
433
+ observation=store.search_trace(
434
+ trace_id,
435
+ pattern,
436
+ limit=self._int_arg(action, "limit", 100, minimum=1, maximum=500),
437
+ ),
438
+ reward=0.65,
439
+ memory_note=f"Searched trace {trace_id} for {pattern!r}.",
440
+ )
441
+ if action_name == "view_spans":
442
+ trace_id = self._required_str(action, "trace_id")
443
+ span_ids = action.get("span_ids")
444
+ if not isinstance(span_ids, list) or not span_ids:
445
+ raise ValueError("view_spans requires non-empty span_ids list")
446
+ return self._ok(
447
+ observation=store.view_spans(trace_id, [str(item) for item in span_ids]),
448
+ reward=0.7,
449
+ memory_note=f"Viewed selected spans for trace {trace_id}.",
450
+ )
451
+ except Exception as exc:
452
+ return EnvironmentActionResult(
453
+ observation={"success": False, "error": f"{type(exc).__name__}: {exc}"},
454
+ reward=-0.25,
455
+ memory_note=f"Trace analysis action failed: {type(exc).__name__}.",
456
+ )
457
+
458
+ return EnvironmentActionResult(
459
+ observation={"success": False, "error": f"Unsupported action '{action_name}'."},
460
+ reward=-0.2,
461
+ memory_note="Planner produced unsupported trace action.",
462
+ )
463
+
464
+ def doctor_checks(self) -> list[EnvironmentDoctorCheck]:
465
+ checks = super().doctor_checks()
466
+ checks.append(
467
+ EnvironmentDoctorCheck(
468
+ name="trace_analysis",
469
+ status="pass",
470
+ detail="Trace analysis environment is available.",
471
+ )
472
+ )
473
+ return checks
474
+
475
+ def _ok(self, *, observation: dict[str, Any], reward: float, memory_note: str) -> EnvironmentActionResult:
476
+ payload = {"success": True, **observation}
477
+ return EnvironmentActionResult(
478
+ observation=payload,
479
+ reward=reward,
480
+ memory_note=memory_note,
481
+ )
482
+
483
+ def _store_from_action(self, action: dict[str, Any], *, required_path: bool):
484
+ raw = action.get("trace_path") or action.get("path")
485
+ if isinstance(raw, str) and raw.strip():
486
+ return self._load_store(Path(raw.strip()).expanduser())
487
+ if self._store is not None:
488
+ return self._store
489
+ if required_path:
490
+ raise ValueError("trace_path is required")
491
+ raise ValueError("no trace dataset loaded; pass trace_path or use set_trace_path first")
492
+
493
+ def _load_store(self, trace_path: Path):
494
+ from ..traces import TraceStore
495
+
496
+ resolved = trace_path if trace_path.is_absolute() else (self.workdir / trace_path)
497
+ store = TraceStore.load(resolved)
498
+ self._trace_path = resolved.resolve()
499
+ self._store = store
500
+ return store
501
+
502
+ @staticmethod
503
+ def _extract_trace_path(task: str) -> Path | None:
504
+ match = re.search(r"(?:^|\s)(?:trace|trace_path)=([^\s]+)", task)
505
+ if not match:
506
+ return None
507
+ raw = match.group(1).strip().strip("\"'")
508
+ return Path(raw).expanduser() if raw else None
509
+
510
+ @staticmethod
511
+ def _required_str(action: dict[str, Any], key: str) -> str:
512
+ value = action.get(key)
513
+ if not isinstance(value, str) or not value.strip():
514
+ raise ValueError(f"{key} is required")
515
+ return value.strip()
516
+
517
+ @staticmethod
518
+ def _int_arg(
519
+ action: dict[str, Any],
520
+ key: str,
521
+ default: int,
522
+ *,
523
+ minimum: int,
524
+ maximum: int,
525
+ ) -> int:
526
+ value = action.get(key, default)
527
+ try:
528
+ parsed = int(value)
529
+ except Exception:
530
+ parsed = default
531
+ return max(minimum, min(maximum, parsed))
532
+
533
+
289
534
  class DSPyCodingRLMEnvironment(GenericRLMEnvironment):
290
535
  """DSPy-focused environment with file edit + tests + DSPy-aware scoring."""
291
536
 
@@ -38,6 +38,7 @@ from .environments import (
38
38
  GenericRLMEnvironment,
39
39
  RLMEnvironment,
40
40
  RLMRewardProfile,
41
+ TraceAnalysisEnvironment,
41
42
  )
42
43
  from .events import RLMEventBus
43
44
  from .frameworks import FrameworkAdapterRegistry, FrameworkEpisodeResult
@@ -279,6 +280,18 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
279
280
  workdir=self.workdir,
280
281
  reward_profile=self.reward_profile,
281
282
  ),
283
+ "trace_analysis": TraceAnalysisEnvironment(
284
+ workdir=self.workdir,
285
+ reward_profile=self.reward_profile,
286
+ ),
287
+ "trace-analysis": TraceAnalysisEnvironment(
288
+ workdir=self.workdir,
289
+ reward_profile=self.reward_profile,
290
+ ),
291
+ "traces": TraceAnalysisEnvironment(
292
+ workdir=self.workdir,
293
+ reward_profile=self.reward_profile,
294
+ ),
282
295
  "framework": DSPyCodingRLMEnvironment(
283
296
  workdir=self.workdir,
284
297
  reward_profile=self.reward_profile,
@@ -0,0 +1,6 @@
1
+ """Trace indexing and query helpers for HALO-style RLM analysis."""
2
+
3
+ from .index import TraceIndexBuilder
4
+ from .store import TraceStore
5
+
6
+ __all__ = ["TraceIndexBuilder", "TraceStore"]