rlm-code 0.1.7__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (305) hide show
  1. {rlm_code-0.1.7 → rlm_code-0.1.8}/CHANGELOG.md +8 -0
  2. {rlm_code-0.1.7 → rlm_code-0.1.8}/PKG-INFO +4 -3
  3. {rlm_code-0.1.7 → rlm_code-0.1.8}/README.md +3 -2
  4. {rlm_code-0.1.7 → rlm_code-0.1.8}/pyproject.toml +1 -1
  5. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/__init__.py +1 -1
  6. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/__init__.py +1 -1
  7. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/environments.py +32 -1
  8. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/traces/store.py +226 -0
  9. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_trace_analysis.py +29 -0
  10. {rlm_code-0.1.7 → rlm_code-0.1.8}/.gitignore +0 -0
  11. {rlm_code-0.1.7 → rlm_code-0.1.8}/LICENSE +0 -0
  12. {rlm_code-0.1.7 → rlm_code-0.1.8}/NOTICE +0 -0
  13. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/__init__.py +0 -0
  14. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/agent.py +0 -0
  15. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/agents/__init__.py +0 -0
  16. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/agents/rlm_agent.py +0 -0
  17. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/callbacks/__init__.py +0 -0
  18. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/callbacks/code_execution.py +0 -0
  19. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/cli.py +0 -0
  20. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/code_executor.py +0 -0
  21. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/events.py +0 -0
  22. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/__init__.py +0 -0
  23. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/base.py +0 -0
  24. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/lazy.py +0 -0
  25. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/loader.py +0 -0
  26. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/parsers/__init__.py +0 -0
  27. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/parsers/base.py +0 -0
  28. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/parsers/pdf.py +0 -0
  29. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/parsers/text.py +0 -0
  30. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/sources/__init__.py +0 -0
  31. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/sources/base.py +0 -0
  32. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/sources/gcs.py +0 -0
  33. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/sources/local.py +0 -0
  34. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/llm.py +0 -0
  35. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/logging/__init__.py +0 -0
  36. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/logging/rlm_logger.py +0 -0
  37. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/logging/verbose.py +0 -0
  38. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/main.py +0 -0
  39. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/prompts.py +0 -0
  40. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/repl/__init__.py +0 -0
  41. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/repl/local_repl.py +0 -0
  42. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/repl/safe_builtins.py +0 -0
  43. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/templates/index.html +0 -0
  44. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/tools/__init__.py +0 -0
  45. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/types.py +0 -0
  46. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/usage.py +0 -0
  47. {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/web.py +0 -0
  48. {rlm_code-0.1.7 → rlm_code-0.1.8}/eval/packs/README.md +0 -0
  49. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/__main__.py +0 -0
  50. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/__init__.py +0 -0
  51. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/config_command.py +0 -0
  52. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/create_command.py +0 -0
  53. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/demo_command.py +0 -0
  54. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/export_command.py +0 -0
  55. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/init_command.py +0 -0
  56. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/interactive_command.py +0 -0
  57. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/mcp_command.py +0 -0
  58. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/models_command.py +0 -0
  59. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/nl_command_router.py +0 -0
  60. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/optimize_command.py +0 -0
  61. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/run_command.py +0 -0
  62. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/slash_commands.py +0 -0
  63. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/__init__.py +0 -0
  64. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/config.py +0 -0
  65. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/debug_logger.py +0 -0
  66. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/directory_utils.py +0 -0
  67. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/exceptions.py +0 -0
  68. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/logging.py +0 -0
  69. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/venv_utils.py +0 -0
  70. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/version_checker.py +0 -0
  71. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/examples/__init__.py +0 -0
  72. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/examples/phase2_demo.py +0 -0
  73. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/examples/phase3_demo.py +0 -0
  74. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/examples/phase4_demo.py +0 -0
  75. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/examples/pure_rlm_demo.py +0 -0
  76. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/execution/__init__.py +0 -0
  77. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/execution/engine.py +0 -0
  78. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/execution/sandbox.py +0 -0
  79. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/export/__init__.py +0 -0
  80. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/export/handler.py +0 -0
  81. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/export/package_builder.py +0 -0
  82. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/generators/evaluation_generator.py +0 -0
  83. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/generators/gepa_generator.py +0 -0
  84. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/harness/__init__.py +0 -0
  85. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/harness/registry.py +0 -0
  86. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/harness/runner.py +0 -0
  87. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/main.py +0 -0
  88. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/client_manager.py +0 -0
  89. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/config.py +0 -0
  90. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/exceptions.py +0 -0
  91. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/retry.py +0 -0
  92. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/server/__init__.py +0 -0
  93. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/server/rlm_server.py +0 -0
  94. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/server/tools.py +0 -0
  95. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/session_wrapper.py +0 -0
  96. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/transports/__init__.py +0 -0
  97. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/transports/factory.py +0 -0
  98. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/transports/sse_transport.py +0 -0
  99. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/transports/stdio_transport.py +0 -0
  100. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/transports/websocket_transport.py +0 -0
  101. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/utils.py +0 -0
  102. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/__init__.py +0 -0
  103. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/cache.py +0 -0
  104. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/code_generator.py +0 -0
  105. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/dspy_reference_loader.py +0 -0
  106. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/llm_connector.py +0 -0
  107. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/model_manager.py +0 -0
  108. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/providers/__init__.py +0 -0
  109. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/providers/acp_discovery.py +0 -0
  110. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/providers/local_discovery.py +0 -0
  111. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/providers/model_catalog.py +0 -0
  112. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/providers/registry.py +0 -0
  113. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/streaming.py +0 -0
  114. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/task_collector.py +0 -0
  115. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/optimization/__init__.py +0 -0
  116. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/optimization/data_collector.py +0 -0
  117. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/optimization/executor.py +0 -0
  118. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/optimization/workflow_manager.py +0 -0
  119. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/project/__init__.py +0 -0
  120. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/project/context_manager.py +0 -0
  121. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/project/dspy_md_generator.py +0 -0
  122. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/project/initializer.py +0 -0
  123. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/project/scanner.py +0 -0
  124. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/py.typed +0 -0
  125. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/__init__.py +0 -0
  126. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/action_planner.py +0 -0
  127. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/approval/__init__.py +0 -0
  128. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/approval/audit.py +0 -0
  129. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/approval/gate.py +0 -0
  130. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/approval/handlers.py +0 -0
  131. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/approval/policy.py +0 -0
  132. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/benchmark_manager.py +0 -0
  133. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/benchmarks.py +0 -0
  134. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/chat_session.py +0 -0
  135. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/code_interpreter.py +0 -0
  136. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/comparison.py +0 -0
  137. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/config_schema.py +0 -0
  138. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/context_store.py +0 -0
  139. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/delegation.py +0 -0
  140. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/docker_interpreter.py +0 -0
  141. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/events.py +0 -0
  142. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/__init__.py +0 -0
  143. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/adk_rlm_adapter.py +0 -0
  144. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/base.py +0 -0
  145. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/deepagents_adapter.py +0 -0
  146. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/dspy_rlm_adapter.py +0 -0
  147. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/google_adk_adapter.py +0 -0
  148. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/pydantic_ai_adapter.py +0 -0
  149. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/registry.py +0 -0
  150. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/leaderboard.py +0 -0
  151. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/memory_compaction.py +0 -0
  152. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/mock_interpreter.py +0 -0
  153. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/monty_interpreter.py +0 -0
  154. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/observability.py +0 -0
  155. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/observability_sinks.py +0 -0
  156. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/policies/__init__.py +0 -0
  157. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/policies/action_policies.py +0 -0
  158. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/policies/base.py +0 -0
  159. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/policies/compaction_policies.py +0 -0
  160. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/policies/registry.py +0 -0
  161. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/policies/reward_policies.py +0 -0
  162. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/policies/termination_policies.py +0 -0
  163. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/pure_rlm_environment.py +0 -0
  164. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/repl_types.py +0 -0
  165. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/__init__.py +0 -0
  166. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/theme.py +0 -0
  167. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/widgets/__init__.py +0 -0
  168. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/widgets/animated.py +0 -0
  169. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/widgets/panels.py +0 -0
  170. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/runner.py +0 -0
  171. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/session_replay.py +0 -0
  172. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/task_signature.py +0 -0
  173. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/termination.py +0 -0
  174. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/trajectory.py +0 -0
  175. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/visualizer.py +0 -0
  176. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/__init__.py +0 -0
  177. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/__init__.py +0 -0
  178. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/apple_container_runtime.py +0 -0
  179. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/base.py +0 -0
  180. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/cloud/__init__.py +0 -0
  181. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/cloud/daytona_runtime.py +0 -0
  182. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/cloud/e2b_runtime.py +0 -0
  183. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/cloud/modal_runtime.py +0 -0
  184. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/command_runtime.py +0 -0
  185. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/docker_runtime.py +0 -0
  186. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/local_runtime.py +0 -0
  187. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/monty_runtime.py +0 -0
  188. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/registry.py +0 -0
  189. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/superbox.py +0 -0
  190. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/session/__init__.py +0 -0
  191. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/session/state_manager.py +0 -0
  192. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/.env.example +0 -0
  193. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/adapters.py +0 -0
  194. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/async_streaming.py +0 -0
  195. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/complete_programs.py +0 -0
  196. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/dspy_config_example.yaml +0 -0
  197. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/evaluation.py +0 -0
  198. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/industry_templates.py +0 -0
  199. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/optimizers.py +0 -0
  200. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/retrievers.py +0 -0
  201. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/rlm_benchmarks_example.yaml +0 -0
  202. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/tests/__init__.py +0 -0
  203. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/tests/rlm/__init__.py +0 -0
  204. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/tests/rlm/test_phase2.py +0 -0
  205. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/tests/rlm/test_pure_rlm.py +0 -0
  206. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/traces/__init__.py +0 -0
  207. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/traces/index.py +0 -0
  208. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/traces/models.py +0 -0
  209. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/__init__.py +0 -0
  210. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/agent_collab_view.py +0 -0
  211. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/animations.py +0 -0
  212. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/conversation.py +0 -0
  213. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/design_system.py +0 -0
  214. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/diff_viewer.py +0 -0
  215. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/notifications.py +0 -0
  216. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/persistent_shell.py +0 -0
  217. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/prompt_widget.py +0 -0
  218. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/prompts.py +0 -0
  219. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/pty_terminal.py +0 -0
  220. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/resizable_divider.py +0 -0
  221. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/thinking_display.py +0 -0
  222. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/tui_app.py +0 -0
  223. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/tui_utils.py +0 -0
  224. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/welcome.py +0 -0
  225. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/__init__.py +0 -0
  226. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/anti_patterns.py +0 -0
  227. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/auto_fixer.py +0 -0
  228. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/best_practices.py +0 -0
  229. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/code_validator.py +0 -0
  230. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/config_validator.py +0 -0
  231. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/exceptions.py +0 -0
  232. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/input_validator.py +0 -0
  233. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/learning_integration.py +0 -0
  234. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/models.py +0 -0
  235. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/module_validator.py +0 -0
  236. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/predictor_validator.py +0 -0
  237. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/quality_scorer.py +0 -0
  238. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/report_generator.py +0 -0
  239. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/security.py +0 -0
  240. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/security_validator.py +0 -0
  241. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/signature_validator.py +0 -0
  242. {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/validator.py +0 -0
  243. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/__init__.py +0 -0
  244. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/conftest.py +0 -0
  245. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/fixtures/rlm_ci_baseline_generic_smoke.json +0 -0
  246. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_adk_rlm_adapter.py +0 -0
  247. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_code_interpreter.py +0 -0
  248. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_deepagents_adapter.py +0 -0
  249. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_dspy_rlm_adapter.py +0 -0
  250. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_extract_fallback.py +0 -0
  251. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_framework_registry_coverage.py +0 -0
  252. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_google_adk_adapter.py +0 -0
  253. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_leaderboard.py +0 -0
  254. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_mock_interpreter.py +0 -0
  255. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_monty_interpreter.py +0 -0
  256. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_observability_sinks.py +0 -0
  257. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_p0_features.py +0 -0
  258. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_phase3.py +0 -0
  259. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_phase4.py +0 -0
  260. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_pure_rlm_runtime_modes.py +0 -0
  261. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_pydantic_ai_adapter.py +0 -0
  262. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_repl_history.py +0 -0
  263. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_security_hardening.py +0 -0
  264. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_session_replay.py +0 -0
  265. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_submit.py +0 -0
  266. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_task_signature.py +0 -0
  267. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_user_tools.py +0 -0
  268. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_anti_patterns.py +0 -0
  269. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_auto_fixer.py +0 -0
  270. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_cache.py +0 -0
  271. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_execution_engine.py +0 -0
  272. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_export_import.py +0 -0
  273. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_harness_registry.py +0 -0
  274. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_harness_runner.py +0 -0
  275. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_init_command.py +0 -0
  276. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_integration.py +0 -0
  277. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_learning_integration.py +0 -0
  278. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_mcp_utils.py +0 -0
  279. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_module_validator.py +0 -0
  280. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_optimization_workflow.py +0 -0
  281. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_persistent_shell.py +0 -0
  282. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_predictor_validator.py +0 -0
  283. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_project_scanner.py +0 -0
  284. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_prompt_widget.py +0 -0
  285. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_property_validators.py +0 -0
  286. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_provider_discovery.py +0 -0
  287. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_provider_registry.py +0 -0
  288. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_quality_scorer.py +0 -0
  289. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_report_generator.py +0 -0
  290. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_retry.py +0 -0
  291. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_rlm_config.py +0 -0
  292. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_rlm_dspy_environment.py +0 -0
  293. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_rlm_observability.py +0 -0
  294. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_rlm_runner.py +0 -0
  295. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_sandbox_runtimes.py +0 -0
  296. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_security_validator.py +0 -0
  297. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_session_management.py +0 -0
  298. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_signature_validator.py +0 -0
  299. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_slash_harness_command.py +0 -0
  300. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_slash_rlm_command.py +0 -0
  301. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_slash_sandbox_command.py +0 -0
  302. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_streaming.py +0 -0
  303. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_superbox.py +0 -0
  304. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_tui_utils.py +0 -0
  305. {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_validation.py +0 -0
@@ -5,6 +5,13 @@ All notable changes to this project are documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.1.8] - 2026-05-01
9
+
10
+ ### Added
11
+ - AHE-style layered trace evidence corpus export from `TraceStore`.
12
+ - New `trace_analysis` action `export_evidence_corpus` for writing `overview.md`, per-trace detail reports, `index.json`, and optional processed raw JSONL spans.
13
+ - Evidence corpus tests covering direct store export and environment action export.
14
+
8
15
  ## [0.1.7] - 2026-04-30
9
16
 
10
17
  ### Added
@@ -69,4 +76,5 @@ Initial public release of **RLM Code**.
69
76
 
70
77
  [0.1.5]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.5
71
78
  [0.1.6]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.6
79
+ [0.1.8]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.8
72
80
  [0.1.7]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.7
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlm-code
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems
5
5
  Project-URL: Homepage, https://github.com/SuperagenticAI/rlm-code
6
6
  Project-URL: Documentation, https://superagenticai.github.io/rlm-code/
@@ -118,12 +118,13 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
118
118
 
119
119
  RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
120
120
 
121
- ## Release v0.1.7
121
+ ## Release v0.1.8
122
122
 
123
- This release adds HALO-style trace analysis as a new RLM environment.
123
+ This release extends HALO/AHE-style trace analysis with layered evidence export.
124
124
 
125
125
  - New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
126
126
  - Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
127
+ - AHE-style evidence corpus export with `overview.md`, per-trace detail reports, `index.json`, and optional processed raw JSONL spans
127
128
  - Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
128
129
  - `/rlm` help/docs updated for `env=trace_analysis`
129
130
  - Dedicated trace analysis docs under the Core Engine section
@@ -25,12 +25,13 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
25
25
 
26
26
  RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
27
27
 
28
- ## Release v0.1.7
28
+ ## Release v0.1.8
29
29
 
30
- This release adds HALO-style trace analysis as a new RLM environment.
30
+ This release extends HALO/AHE-style trace analysis with layered evidence export.
31
31
 
32
32
  - New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
33
33
  - Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
34
+ - AHE-style evidence corpus export with `overview.md`, per-trace detail reports, `index.json`, and optional processed raw JSONL spans
34
35
  - Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
35
36
  - `/rlm` help/docs updated for `env=trace_analysis`
36
37
  - Dedicated trace analysis docs under the Core Engine section
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "rlm-code"
7
- version = "0.1.7"
7
+ version = "0.1.8"
8
8
  description = "RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems"
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -5,5 +5,5 @@ This package provides tools for creating, managing, and optimizing DSPy componen
5
5
  through natural language interactions.
6
6
  """
7
7
 
8
- __version__ = "0.1.7"
8
+ __version__ = "0.1.8"
9
9
  __author__ = "Super Agentic AI"
@@ -17,7 +17,7 @@ from .exceptions import (
17
17
  )
18
18
  from .session_wrapper import MCPSessionWrapper
19
19
 
20
- __version__ = "0.1.7"
20
+ __version__ = "0.1.8"
21
21
 
22
22
  __all__ = [
23
23
  "MCPClientManager",
@@ -306,8 +306,10 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
306
306
  "Return ONLY valid JSON object with keys:\n"
307
307
  "{"
308
308
  '"action": "set_trace_path" | "get_dataset_overview" | "query_traces" | '
309
- '"count_traces" | "view_trace" | "search_trace" | "view_spans" | "final", '
309
+ '"count_traces" | "view_trace" | "search_trace" | "view_spans" | '
310
+ '"export_evidence_corpus" | "final", '
310
311
  '"trace_path": "<path to JSONL traces>", '
312
+ '"output_dir": "<directory for exported evidence corpus>", '
311
313
  '"filters": {"has_errors": true, "model_names": ["..."], "service_names": ["..."], '
312
314
  '"agent_names": ["..."], "project_id": "..."}, '
313
315
  '"trace_id": "<trace id>", '
@@ -324,6 +326,7 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
324
326
  "- Always begin analysis with get_dataset_overview.\n"
325
327
  "- Use query_traces to choose real trace ids; never invent trace ids.\n"
326
328
  "- For large traces, prefer search_trace followed by view_spans.\n"
329
+ "- Use export_evidence_corpus when the caller needs files for MetaHarness or another coding agent.\n"
327
330
  "- Identify systemic harness failures, not one-off anomalies.\n"
328
331
  "- Output JSON only."
329
332
  )
@@ -448,6 +451,21 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
448
451
  reward=0.7,
449
452
  memory_note=f"Viewed selected spans for trace {trace_id}.",
450
453
  )
454
+ if action_name == "export_evidence_corpus":
455
+ output_dir = self._required_str(action, "output_dir")
456
+ resolved_output = Path(output_dir).expanduser()
457
+ if not resolved_output.is_absolute():
458
+ resolved_output = self.workdir / resolved_output
459
+ return self._ok(
460
+ observation=store.export_evidence_corpus(
461
+ resolved_output,
462
+ filters,
463
+ limit=self._int_arg(action, "limit", 100, minimum=1, maximum=1000),
464
+ include_raw=self._bool_arg(action, "include_raw", True),
465
+ ),
466
+ reward=0.75,
467
+ memory_note="Exported layered trace evidence corpus.",
468
+ )
451
469
  except Exception as exc:
452
470
  return EnvironmentActionResult(
453
471
  observation={"success": False, "error": f"{type(exc).__name__}: {exc}"},
@@ -530,6 +548,19 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
530
548
  parsed = default
531
549
  return max(minimum, min(maximum, parsed))
532
550
 
551
+ @staticmethod
552
+ def _bool_arg(action: dict[str, Any], key: str, default: bool) -> bool:
553
+ value = action.get(key, default)
554
+ if isinstance(value, bool):
555
+ return value
556
+ if isinstance(value, str):
557
+ normalized = value.strip().lower()
558
+ if normalized in {"1", "true", "yes", "on"}:
559
+ return True
560
+ if normalized in {"0", "false", "no", "off"}:
561
+ return False
562
+ return default
563
+
533
564
 
534
565
  class DSPyCodingRLMEnvironment(GenericRLMEnvironment):
535
566
  """DSPy-focused environment with file edit + tests + DSPy-aware scoring."""
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
  import json
6
6
  import re
7
7
  from collections import Counter
8
+ from datetime import UTC, datetime
8
9
  from pathlib import Path
9
10
  from typing import Any
10
11
 
@@ -16,6 +17,22 @@ SURGICAL_ATTR_CAP = 16384
16
17
  VIEW_TRACE_CHAR_BUDGET = 150_000
17
18
  OVERVIEW_SAMPLE_TRACE_IDS = 20
18
19
  NOISY_FLAT_PROJECTION_RE = re.compile(r"^(?:llm\.(?:input|output)_messages|mcp\.tools)\.\d+\.")
20
+ EVIDENCE_ATTR_CAP = 2048
21
+ TASK_ID_ATTRS = (
22
+ "inference.task_id",
23
+ "task_id",
24
+ "task.id",
25
+ "benchmark.task_id",
26
+ "appworld.task_id",
27
+ )
28
+ ISSUE_ATTRS = (
29
+ "error.message",
30
+ "exception.message",
31
+ "exception.type",
32
+ "tool.name",
33
+ "input.value",
34
+ "output.value",
35
+ )
19
36
 
20
37
 
21
38
  def _truncate_value(value: Any, cap: int) -> Any:
@@ -168,6 +185,87 @@ class TraceStore:
168
185
  "truncated": len(matches) >= limit,
169
186
  }
170
187
 
188
+ def export_evidence_corpus(
189
+ self,
190
+ output_dir: str | Path,
191
+ filters: dict[str, Any] | None = None,
192
+ *,
193
+ limit: int = 100,
194
+ include_raw: bool = True,
195
+ ) -> dict[str, Any]:
196
+ """Export a layered evidence corpus for harness-optimization agents.
197
+
198
+ The corpus mirrors the AHE progressive-disclosure pattern:
199
+ a compact overview, one detail file per selected trace, an index, and
200
+ optional lightly processed raw JSONL spans for drill-down.
201
+ """
202
+ out = Path(output_dir).resolve()
203
+ detail_dir = out / "detail"
204
+ raw_dir = out / "raw"
205
+ detail_dir.mkdir(parents=True, exist_ok=True)
206
+ if include_raw:
207
+ raw_dir.mkdir(parents=True, exist_ok=True)
208
+
209
+ rows = self._filtered_rows(filters)[: max(0, limit)]
210
+ overview = self.get_overview(filters)
211
+ detail_entries: list[dict[str, Any]] = []
212
+ detail_lines = self._render_overview_markdown(overview, rows, include_raw=include_raw)
213
+
214
+ for row in rows:
215
+ spans = self._read_spans(row.trace_id)
216
+ safe_id = self._safe_filename(row.trace_id)
217
+ detail_path = detail_dir / f"{safe_id}.md"
218
+ raw_path = raw_dir / f"{safe_id}.jsonl" if include_raw else None
219
+ detail_path.write_text(
220
+ self._render_detail_markdown(row, spans, raw_path=raw_path),
221
+ encoding="utf-8",
222
+ )
223
+ if raw_path is not None:
224
+ self._write_raw_trace(raw_path, spans)
225
+ detail_entries.append(
226
+ {
227
+ "trace_id": row.trace_id,
228
+ "detail_path": str(detail_path),
229
+ "raw_path": str(raw_path) if raw_path is not None else None,
230
+ "has_errors": row.has_errors,
231
+ "span_count": row.span_count,
232
+ "task_ids": self._task_ids(spans),
233
+ "error_span_count": sum(1 for span in spans if span.status_code == "STATUS_CODE_ERROR"),
234
+ }
235
+ )
236
+ detail_lines.append(
237
+ f"- `{row.trace_id}`: {row.span_count} spans, "
238
+ f"errors={'yes' if row.has_errors else 'no'}, detail=`detail/{safe_id}.md`"
239
+ )
240
+
241
+ overview_path = out / "overview.md"
242
+ index_path = out / "index.json"
243
+ overview_path.write_text("\n".join(detail_lines) + "\n", encoding="utf-8")
244
+ index_payload = {
245
+ "schema_version": "rlm-code.trace_evidence_corpus.v1",
246
+ "created_at": datetime.now(UTC).isoformat(),
247
+ "source_trace_path": str(self.trace_path),
248
+ "source_index_path": str(self.index_path),
249
+ "filters": filters or {},
250
+ "limit": limit,
251
+ "include_raw": include_raw,
252
+ "overview_path": str(overview_path),
253
+ "detail_dir": str(detail_dir),
254
+ "raw_dir": str(raw_dir) if include_raw else None,
255
+ "overview": overview,
256
+ "traces": detail_entries,
257
+ }
258
+ index_path.write_text(json.dumps(index_payload, indent=2, sort_keys=True), encoding="utf-8")
259
+ return {
260
+ "output_dir": str(out),
261
+ "overview_path": str(overview_path),
262
+ "index_path": str(index_path),
263
+ "detail_dir": str(detail_dir),
264
+ "raw_dir": str(raw_dir) if include_raw else None,
265
+ "trace_count": len(detail_entries),
266
+ "detail_paths": [entry["detail_path"] for entry in detail_entries],
267
+ }
268
+
171
269
  def _read_spans(self, trace_id: str) -> list[SpanRecord]:
172
270
  if trace_id not in self.rows_by_id:
173
271
  raise KeyError(trace_id)
@@ -219,3 +317,131 @@ class TraceStore:
219
317
  "total_output_tokens": row.total_output_tokens,
220
318
  "project_id": row.project_id,
221
319
  }
320
+
321
+ @staticmethod
322
+ def _render_overview_markdown(
323
+ overview: dict[str, Any],
324
+ rows: list[TraceIndexRow],
325
+ *,
326
+ include_raw: bool,
327
+ ) -> list[str]:
328
+ lines = [
329
+ "# Trace Evidence Overview",
330
+ "",
331
+ "Generated by `rlm-code` trace analysis.",
332
+ "",
333
+ "## Dataset",
334
+ "",
335
+ f"- Traces selected: {len(rows)}",
336
+ f"- Total matching traces: {overview['total_traces']}",
337
+ f"- Total matching spans: {overview['total_spans']}",
338
+ f"- Error traces: {overview['error_trace_count']}",
339
+ f"- Services: {', '.join(overview['service_names']) or '-'}",
340
+ f"- Models: {', '.join(overview['model_names']) or '-'}",
341
+ f"- Agents: {', '.join(overview['agent_names']) or '-'}",
342
+ f"- Input tokens: {overview['total_input_tokens']}",
343
+ f"- Output tokens: {overview['total_output_tokens']}",
344
+ f"- Raw span files included: {'yes' if include_raw else 'no'}",
345
+ "",
346
+ "## Trace Details",
347
+ "",
348
+ ]
349
+ return lines
350
+
351
+ def _render_detail_markdown(
352
+ self,
353
+ row: TraceIndexRow,
354
+ spans: list[SpanRecord],
355
+ *,
356
+ raw_path: Path | None,
357
+ ) -> str:
358
+ task_ids = self._task_ids(spans)
359
+ error_spans = [span for span in spans if span.status_code == "STATUS_CODE_ERROR"]
360
+ tool_spans = [span for span in spans if self._looks_like_tool_span(span)]
361
+ top_names = Counter(span.name for span in spans).most_common(10)
362
+ lines = [
363
+ f"# Trace Detail: {row.trace_id}",
364
+ "",
365
+ "## Summary",
366
+ "",
367
+ f"- Trace id: `{row.trace_id}`",
368
+ f"- Spans: {row.span_count}",
369
+ f"- Has errors: {'yes' if row.has_errors else 'no'}",
370
+ f"- Error spans: {len(error_spans)}",
371
+ f"- Task ids: {', '.join(task_ids) or '-'}",
372
+ f"- Services: {', '.join(row.service_names) or '-'}",
373
+ f"- Models: {', '.join(row.model_names) or '-'}",
374
+ f"- Agents: {', '.join(row.agent_names) or '-'}",
375
+ f"- Start: {row.start_time or '-'}",
376
+ f"- End: {row.end_time or '-'}",
377
+ ]
378
+ if raw_path is not None:
379
+ lines.append(f"- Raw spans: `{raw_path.name}`")
380
+ lines.extend(["", "## Span Name Counts", ""])
381
+ lines.extend(f"- `{name}`: {count}" for name, count in top_names)
382
+ lines.extend(["", "## Error Spans", ""])
383
+ if error_spans:
384
+ for span in error_spans:
385
+ lines.extend(self._render_span_evidence(span))
386
+ else:
387
+ lines.append("- None")
388
+ lines.extend(["", "## Tool-Like Spans", ""])
389
+ if tool_spans:
390
+ for span in tool_spans[:20]:
391
+ lines.extend(self._render_span_evidence(span))
392
+ else:
393
+ lines.append("- None")
394
+ return "\n".join(lines) + "\n"
395
+
396
+ @staticmethod
397
+ def _render_span_evidence(span: SpanRecord) -> list[str]:
398
+ lines = [
399
+ f"### `{span.name or span.span_id}`",
400
+ "",
401
+ f"- Span id: `{span.span_id}`",
402
+ f"- Parent span id: `{span.parent_span_id or '-'}`",
403
+ f"- Status: {span.status_code}",
404
+ ]
405
+ attrs = {
406
+ key: _truncate_value(span.attributes[key], EVIDENCE_ATTR_CAP)
407
+ for key in ISSUE_ATTRS
408
+ if key in span.attributes
409
+ }
410
+ if attrs:
411
+ lines.append("- Evidence attributes:")
412
+ for key, value in attrs.items():
413
+ lines.append(f" - `{key}`: `{value}`")
414
+ return lines + [""]
415
+
416
+ @staticmethod
417
+ def _write_raw_trace(path: Path, spans: list[SpanRecord]) -> None:
418
+ with path.open("w", encoding="utf-8") as handle:
419
+ for span in spans:
420
+ handle.write(json.dumps(_render_span(span, SURGICAL_ATTR_CAP), sort_keys=True))
421
+ handle.write("\n")
422
+
423
+ @staticmethod
424
+ def _task_ids(spans: list[SpanRecord]) -> list[str]:
425
+ task_ids: set[str] = set()
426
+ for span in spans:
427
+ for key in TASK_ID_ATTRS:
428
+ value = span.attributes.get(key)
429
+ if isinstance(value, str) and value.strip():
430
+ task_ids.add(value.strip())
431
+ return sorted(task_ids)
432
+
433
+ @staticmethod
434
+ def _looks_like_tool_span(span: SpanRecord) -> bool:
435
+ name = span.name.lower()
436
+ return (
437
+ "tool" in name
438
+ or "function" in name
439
+ or "tool.name" in span.attributes
440
+ or "input.value" in span.attributes
441
+ or "output.value" in span.attributes
442
+ )
443
+
444
+ @staticmethod
445
+ def _safe_filename(value: str) -> str:
446
+ safe = re.sub(r"[^A-Za-z0-9_.-]+", "_", value).strip("._")
447
+ return safe or "trace"
@@ -23,6 +23,7 @@ def _write_trace_fixture(path: Path) -> None:
23
23
  "inference.llm.model_name": "gpt-test",
24
24
  "inference.llm.input_tokens": 10,
25
25
  "inference.llm.output_tokens": 5,
26
+ "inference.task_id": "task-ok",
26
27
  },
27
28
  },
28
29
  {
@@ -39,6 +40,7 @@ def _write_trace_fixture(path: Path) -> None:
39
40
  "inference.project_id": "demo",
40
41
  "inference.agent_name": "Root",
41
42
  "inference.llm.model_name": "gpt-test",
43
+ "inference.task_id": "task-error",
42
44
  "error.message": "hallucinated tool call spotify__login",
43
45
  },
44
46
  },
@@ -53,6 +55,7 @@ def _write_trace_fixture(path: Path) -> None:
53
55
  "status": {"code": "STATUS_CODE_ERROR"},
54
56
  "resource": {"attributes": {"service.name": "demo-agent"}},
55
57
  "attributes": {
58
+ "inference.task_id": "task-error",
56
59
  "tool.name": "spotify__login",
57
60
  "input.value": "{\"extra_argument\": true}",
58
61
  "output.value": "Unknown tool argument: extra_argument",
@@ -84,6 +87,19 @@ def test_trace_store_indexes_and_queries_jsonl(tmp_path: Path) -> None:
84
87
  selected = store.view_spans("trace-error", ["span-tool-error"])
85
88
  assert selected["spans"][0]["name"] == "function.spotify__login"
86
89
 
90
+ exported = store.export_evidence_corpus(tmp_path / "evidence", {"has_errors": True})
91
+ assert exported["trace_count"] == 1
92
+ overview_text = (tmp_path / "evidence" / "overview.md").read_text(encoding="utf-8")
93
+ assert "Trace Evidence Overview" in overview_text
94
+ assert "`trace-error`" in overview_text
95
+ detail_text = (tmp_path / "evidence" / "detail" / "trace-error.md").read_text(encoding="utf-8")
96
+ assert "task-error" in detail_text
97
+ assert "spotify__login" in detail_text
98
+ assert (tmp_path / "evidence" / "raw" / "trace-error.jsonl").exists()
99
+ index_data = json.loads((tmp_path / "evidence" / "index.json").read_text(encoding="utf-8"))
100
+ assert index_data["schema_version"] == "rlm-code.trace_evidence_corpus.v1"
101
+ assert index_data["traces"][0]["task_ids"] == ["task-error"]
102
+
87
103
 
88
104
  def test_trace_analysis_environment_actions(tmp_path: Path) -> None:
89
105
  trace_path = tmp_path / "traces.jsonl"
@@ -113,3 +129,16 @@ def test_trace_analysis_environment_actions(tmp_path: Path) -> None:
113
129
  )
114
130
  assert searched.observation["success"] is True
115
131
  assert searched.observation["match_count"] == 1
132
+
133
+ exported = env.execute_action(
134
+ {
135
+ "action": "export_evidence_corpus",
136
+ "output_dir": "trace-evidence",
137
+ "filters": {"has_errors": True},
138
+ },
139
+ execution_engine=None,
140
+ exec_timeout=1,
141
+ )
142
+ assert exported.observation["success"] is True
143
+ assert exported.observation["trace_count"] == 1
144
+ assert (tmp_path / "trace-evidence" / "overview.md").exists()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes