rlm-code 0.1.7__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rlm_code-0.1.7 → rlm_code-0.1.8}/CHANGELOG.md +8 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/PKG-INFO +4 -3
- {rlm_code-0.1.7 → rlm_code-0.1.8}/README.md +3 -2
- {rlm_code-0.1.7 → rlm_code-0.1.8}/pyproject.toml +1 -1
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/__init__.py +1 -1
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/__init__.py +1 -1
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/environments.py +32 -1
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/traces/store.py +226 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_trace_analysis.py +29 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/.gitignore +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/LICENSE +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/NOTICE +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/agent.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/agents/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/agents/rlm_agent.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/callbacks/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/callbacks/code_execution.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/cli.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/code_executor.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/events.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/base.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/lazy.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/loader.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/parsers/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/parsers/base.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/parsers/pdf.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/parsers/text.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/sources/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/sources/base.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/sources/gcs.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/files/sources/local.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/llm.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/logging/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/logging/rlm_logger.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/logging/verbose.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/main.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/prompts.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/repl/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/repl/local_repl.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/repl/safe_builtins.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/templates/index.html +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/tools/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/types.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/usage.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/adk_rlm/web.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/eval/packs/README.md +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/__main__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/config_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/create_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/demo_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/export_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/init_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/interactive_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/mcp_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/models_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/nl_command_router.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/optimize_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/run_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/commands/slash_commands.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/config.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/debug_logger.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/directory_utils.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/exceptions.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/logging.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/venv_utils.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/core/version_checker.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/examples/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/examples/phase2_demo.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/examples/phase3_demo.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/examples/phase4_demo.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/examples/pure_rlm_demo.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/execution/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/execution/engine.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/execution/sandbox.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/export/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/export/handler.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/export/package_builder.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/generators/evaluation_generator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/generators/gepa_generator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/harness/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/harness/registry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/harness/runner.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/main.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/client_manager.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/config.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/exceptions.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/retry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/server/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/server/rlm_server.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/server/tools.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/session_wrapper.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/transports/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/transports/factory.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/transports/sse_transport.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/transports/stdio_transport.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/transports/websocket_transport.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/mcp/utils.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/cache.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/code_generator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/dspy_reference_loader.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/llm_connector.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/model_manager.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/providers/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/providers/acp_discovery.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/providers/local_discovery.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/providers/model_catalog.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/providers/registry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/streaming.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/models/task_collector.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/optimization/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/optimization/data_collector.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/optimization/executor.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/optimization/workflow_manager.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/project/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/project/context_manager.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/project/dspy_md_generator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/project/initializer.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/project/scanner.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/py.typed +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/action_planner.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/approval/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/approval/audit.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/approval/gate.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/approval/handlers.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/approval/policy.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/benchmark_manager.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/benchmarks.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/chat_session.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/code_interpreter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/comparison.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/config_schema.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/context_store.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/delegation.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/docker_interpreter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/events.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/adk_rlm_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/base.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/deepagents_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/dspy_rlm_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/google_adk_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/pydantic_ai_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/frameworks/registry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/leaderboard.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/memory_compaction.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/mock_interpreter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/monty_interpreter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/observability.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/observability_sinks.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/policies/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/policies/action_policies.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/policies/base.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/policies/compaction_policies.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/policies/registry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/policies/reward_policies.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/policies/termination_policies.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/pure_rlm_environment.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/repl_types.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/theme.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/widgets/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/widgets/animated.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/research_tui/widgets/panels.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/runner.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/session_replay.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/task_signature.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/termination.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/trajectory.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/rlm/visualizer.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/apple_container_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/base.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/cloud/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/cloud/daytona_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/cloud/e2b_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/cloud/modal_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/command_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/docker_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/local_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/monty_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/runtimes/registry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/sandbox/superbox.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/session/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/session/state_manager.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/.env.example +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/adapters.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/async_streaming.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/complete_programs.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/dspy_config_example.yaml +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/evaluation.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/industry_templates.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/optimizers.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/retrievers.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/templates/rlm_benchmarks_example.yaml +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/tests/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/tests/rlm/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/tests/rlm/test_phase2.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/tests/rlm/test_pure_rlm.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/traces/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/traces/index.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/traces/models.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/agent_collab_view.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/animations.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/conversation.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/design_system.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/diff_viewer.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/notifications.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/persistent_shell.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/prompt_widget.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/prompts.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/pty_terminal.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/resizable_divider.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/thinking_display.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/tui_app.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/tui_utils.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/ui/welcome.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/anti_patterns.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/auto_fixer.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/best_practices.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/code_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/config_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/exceptions.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/input_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/learning_integration.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/models.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/module_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/predictor_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/quality_scorer.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/report_generator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/security.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/security_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/signature_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/rlm_code/validation/validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/conftest.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/fixtures/rlm_ci_baseline_generic_smoke.json +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_adk_rlm_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_code_interpreter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_deepagents_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_dspy_rlm_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_extract_fallback.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_framework_registry_coverage.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_google_adk_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_leaderboard.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_mock_interpreter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_monty_interpreter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_observability_sinks.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_p0_features.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_phase3.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_phase4.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_pure_rlm_runtime_modes.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_pydantic_ai_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_repl_history.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_security_hardening.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_session_replay.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_submit.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_task_signature.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/rlm/test_user_tools.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_anti_patterns.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_auto_fixer.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_cache.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_execution_engine.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_export_import.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_harness_registry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_harness_runner.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_init_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_integration.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_learning_integration.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_mcp_utils.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_module_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_optimization_workflow.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_persistent_shell.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_predictor_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_project_scanner.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_prompt_widget.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_property_validators.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_provider_discovery.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_provider_registry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_quality_scorer.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_report_generator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_retry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_rlm_config.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_rlm_dspy_environment.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_rlm_observability.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_rlm_runner.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_sandbox_runtimes.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_security_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_session_management.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_signature_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_slash_harness_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_slash_rlm_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_slash_sandbox_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_streaming.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_superbox.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_tui_utils.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.8}/tests/test_validation.py +0 -0
|
@@ -5,6 +5,13 @@ All notable changes to this project are documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.1.8] - 2026-05-01
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- AHE-style layered trace evidence corpus export from `TraceStore`.
|
|
12
|
+
- New `trace_analysis` action `export_evidence_corpus` for writing `overview.md`, per-trace detail reports, `index.json`, and optional processed raw JSONL spans.
|
|
13
|
+
- Evidence corpus tests covering direct store export and environment action export.
|
|
14
|
+
|
|
8
15
|
## [0.1.7] - 2026-04-30
|
|
9
16
|
|
|
10
17
|
### Added
|
|
@@ -69,4 +76,5 @@ Initial public release of **RLM Code**.
|
|
|
69
76
|
|
|
70
77
|
[0.1.5]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.5
|
|
71
78
|
[0.1.6]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.6
|
|
79
|
+
[0.1.8]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.8
|
|
72
80
|
[0.1.7]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.7
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rlm-code
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems
|
|
5
5
|
Project-URL: Homepage, https://github.com/SuperagenticAI/rlm-code
|
|
6
6
|
Project-URL: Documentation, https://superagenticai.github.io/rlm-code/
|
|
@@ -118,12 +118,13 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
|
|
|
118
118
|
|
|
119
119
|
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
|
|
120
120
|
|
|
121
|
-
## Release v0.1.
|
|
121
|
+
## Release v0.1.8
|
|
122
122
|
|
|
123
|
-
This release
|
|
123
|
+
This release extends HALO/AHE-style trace analysis with layered evidence export.
|
|
124
124
|
|
|
125
125
|
- New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
|
|
126
126
|
- Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
|
|
127
|
+
- AHE-style evidence corpus export with `overview.md`, per-trace detail reports, `index.json`, and optional processed raw JSONL spans
|
|
127
128
|
- Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
|
|
128
129
|
- `/rlm` help/docs updated for `env=trace_analysis`
|
|
129
130
|
- Dedicated trace analysis docs under the Core Engine section
|
|
@@ -25,12 +25,13 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
|
|
|
25
25
|
|
|
26
26
|
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
|
|
27
27
|
|
|
28
|
-
## Release v0.1.
|
|
28
|
+
## Release v0.1.8
|
|
29
29
|
|
|
30
|
-
This release
|
|
30
|
+
This release extends HALO/AHE-style trace analysis with layered evidence export.
|
|
31
31
|
|
|
32
32
|
- New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
|
|
33
33
|
- Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
|
|
34
|
+
- AHE-style evidence corpus export with `overview.md`, per-trace detail reports, `index.json`, and optional processed raw JSONL spans
|
|
34
35
|
- Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
|
|
35
36
|
- `/rlm` help/docs updated for `env=trace_analysis`
|
|
36
37
|
- Dedicated trace analysis docs under the Core Engine section
|
|
@@ -306,8 +306,10 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
|
|
|
306
306
|
"Return ONLY valid JSON object with keys:\n"
|
|
307
307
|
"{"
|
|
308
308
|
'"action": "set_trace_path" | "get_dataset_overview" | "query_traces" | '
|
|
309
|
-
'"count_traces" | "view_trace" | "search_trace" | "view_spans" |
|
|
309
|
+
'"count_traces" | "view_trace" | "search_trace" | "view_spans" | '
|
|
310
|
+
'"export_evidence_corpus" | "final", '
|
|
310
311
|
'"trace_path": "<path to JSONL traces>", '
|
|
312
|
+
'"output_dir": "<directory for exported evidence corpus>", '
|
|
311
313
|
'"filters": {"has_errors": true, "model_names": ["..."], "service_names": ["..."], '
|
|
312
314
|
'"agent_names": ["..."], "project_id": "..."}, '
|
|
313
315
|
'"trace_id": "<trace id>", '
|
|
@@ -324,6 +326,7 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
|
|
|
324
326
|
"- Always begin analysis with get_dataset_overview.\n"
|
|
325
327
|
"- Use query_traces to choose real trace ids; never invent trace ids.\n"
|
|
326
328
|
"- For large traces, prefer search_trace followed by view_spans.\n"
|
|
329
|
+
"- Use export_evidence_corpus when the caller needs files for MetaHarness or another coding agent.\n"
|
|
327
330
|
"- Identify systemic harness failures, not one-off anomalies.\n"
|
|
328
331
|
"- Output JSON only."
|
|
329
332
|
)
|
|
@@ -448,6 +451,21 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
|
|
|
448
451
|
reward=0.7,
|
|
449
452
|
memory_note=f"Viewed selected spans for trace {trace_id}.",
|
|
450
453
|
)
|
|
454
|
+
if action_name == "export_evidence_corpus":
|
|
455
|
+
output_dir = self._required_str(action, "output_dir")
|
|
456
|
+
resolved_output = Path(output_dir).expanduser()
|
|
457
|
+
if not resolved_output.is_absolute():
|
|
458
|
+
resolved_output = self.workdir / resolved_output
|
|
459
|
+
return self._ok(
|
|
460
|
+
observation=store.export_evidence_corpus(
|
|
461
|
+
resolved_output,
|
|
462
|
+
filters,
|
|
463
|
+
limit=self._int_arg(action, "limit", 100, minimum=1, maximum=1000),
|
|
464
|
+
include_raw=self._bool_arg(action, "include_raw", True),
|
|
465
|
+
),
|
|
466
|
+
reward=0.75,
|
|
467
|
+
memory_note="Exported layered trace evidence corpus.",
|
|
468
|
+
)
|
|
451
469
|
except Exception as exc:
|
|
452
470
|
return EnvironmentActionResult(
|
|
453
471
|
observation={"success": False, "error": f"{type(exc).__name__}: {exc}"},
|
|
@@ -530,6 +548,19 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
|
|
|
530
548
|
parsed = default
|
|
531
549
|
return max(minimum, min(maximum, parsed))
|
|
532
550
|
|
|
551
|
+
@staticmethod
|
|
552
|
+
def _bool_arg(action: dict[str, Any], key: str, default: bool) -> bool:
|
|
553
|
+
value = action.get(key, default)
|
|
554
|
+
if isinstance(value, bool):
|
|
555
|
+
return value
|
|
556
|
+
if isinstance(value, str):
|
|
557
|
+
normalized = value.strip().lower()
|
|
558
|
+
if normalized in {"1", "true", "yes", "on"}:
|
|
559
|
+
return True
|
|
560
|
+
if normalized in {"0", "false", "no", "off"}:
|
|
561
|
+
return False
|
|
562
|
+
return default
|
|
563
|
+
|
|
533
564
|
|
|
534
565
|
class DSPyCodingRLMEnvironment(GenericRLMEnvironment):
|
|
535
566
|
"""DSPy-focused environment with file edit + tests + DSPy-aware scoring."""
|
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import json
|
|
6
6
|
import re
|
|
7
7
|
from collections import Counter
|
|
8
|
+
from datetime import UTC, datetime
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
from typing import Any
|
|
10
11
|
|
|
@@ -16,6 +17,22 @@ SURGICAL_ATTR_CAP = 16384
|
|
|
16
17
|
VIEW_TRACE_CHAR_BUDGET = 150_000
|
|
17
18
|
OVERVIEW_SAMPLE_TRACE_IDS = 20
|
|
18
19
|
NOISY_FLAT_PROJECTION_RE = re.compile(r"^(?:llm\.(?:input|output)_messages|mcp\.tools)\.\d+\.")
|
|
20
|
+
EVIDENCE_ATTR_CAP = 2048
|
|
21
|
+
TASK_ID_ATTRS = (
|
|
22
|
+
"inference.task_id",
|
|
23
|
+
"task_id",
|
|
24
|
+
"task.id",
|
|
25
|
+
"benchmark.task_id",
|
|
26
|
+
"appworld.task_id",
|
|
27
|
+
)
|
|
28
|
+
ISSUE_ATTRS = (
|
|
29
|
+
"error.message",
|
|
30
|
+
"exception.message",
|
|
31
|
+
"exception.type",
|
|
32
|
+
"tool.name",
|
|
33
|
+
"input.value",
|
|
34
|
+
"output.value",
|
|
35
|
+
)
|
|
19
36
|
|
|
20
37
|
|
|
21
38
|
def _truncate_value(value: Any, cap: int) -> Any:
|
|
@@ -168,6 +185,87 @@ class TraceStore:
|
|
|
168
185
|
"truncated": len(matches) >= limit,
|
|
169
186
|
}
|
|
170
187
|
|
|
188
|
+
def export_evidence_corpus(
|
|
189
|
+
self,
|
|
190
|
+
output_dir: str | Path,
|
|
191
|
+
filters: dict[str, Any] | None = None,
|
|
192
|
+
*,
|
|
193
|
+
limit: int = 100,
|
|
194
|
+
include_raw: bool = True,
|
|
195
|
+
) -> dict[str, Any]:
|
|
196
|
+
"""Export a layered evidence corpus for harness-optimization agents.
|
|
197
|
+
|
|
198
|
+
The corpus mirrors the AHE progressive-disclosure pattern:
|
|
199
|
+
a compact overview, one detail file per selected trace, an index, and
|
|
200
|
+
optional lightly processed raw JSONL spans for drill-down.
|
|
201
|
+
"""
|
|
202
|
+
out = Path(output_dir).resolve()
|
|
203
|
+
detail_dir = out / "detail"
|
|
204
|
+
raw_dir = out / "raw"
|
|
205
|
+
detail_dir.mkdir(parents=True, exist_ok=True)
|
|
206
|
+
if include_raw:
|
|
207
|
+
raw_dir.mkdir(parents=True, exist_ok=True)
|
|
208
|
+
|
|
209
|
+
rows = self._filtered_rows(filters)[: max(0, limit)]
|
|
210
|
+
overview = self.get_overview(filters)
|
|
211
|
+
detail_entries: list[dict[str, Any]] = []
|
|
212
|
+
detail_lines = self._render_overview_markdown(overview, rows, include_raw=include_raw)
|
|
213
|
+
|
|
214
|
+
for row in rows:
|
|
215
|
+
spans = self._read_spans(row.trace_id)
|
|
216
|
+
safe_id = self._safe_filename(row.trace_id)
|
|
217
|
+
detail_path = detail_dir / f"{safe_id}.md"
|
|
218
|
+
raw_path = raw_dir / f"{safe_id}.jsonl" if include_raw else None
|
|
219
|
+
detail_path.write_text(
|
|
220
|
+
self._render_detail_markdown(row, spans, raw_path=raw_path),
|
|
221
|
+
encoding="utf-8",
|
|
222
|
+
)
|
|
223
|
+
if raw_path is not None:
|
|
224
|
+
self._write_raw_trace(raw_path, spans)
|
|
225
|
+
detail_entries.append(
|
|
226
|
+
{
|
|
227
|
+
"trace_id": row.trace_id,
|
|
228
|
+
"detail_path": str(detail_path),
|
|
229
|
+
"raw_path": str(raw_path) if raw_path is not None else None,
|
|
230
|
+
"has_errors": row.has_errors,
|
|
231
|
+
"span_count": row.span_count,
|
|
232
|
+
"task_ids": self._task_ids(spans),
|
|
233
|
+
"error_span_count": sum(1 for span in spans if span.status_code == "STATUS_CODE_ERROR"),
|
|
234
|
+
}
|
|
235
|
+
)
|
|
236
|
+
detail_lines.append(
|
|
237
|
+
f"- `{row.trace_id}`: {row.span_count} spans, "
|
|
238
|
+
f"errors={'yes' if row.has_errors else 'no'}, detail=`detail/{safe_id}.md`"
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
overview_path = out / "overview.md"
|
|
242
|
+
index_path = out / "index.json"
|
|
243
|
+
overview_path.write_text("\n".join(detail_lines) + "\n", encoding="utf-8")
|
|
244
|
+
index_payload = {
|
|
245
|
+
"schema_version": "rlm-code.trace_evidence_corpus.v1",
|
|
246
|
+
"created_at": datetime.now(UTC).isoformat(),
|
|
247
|
+
"source_trace_path": str(self.trace_path),
|
|
248
|
+
"source_index_path": str(self.index_path),
|
|
249
|
+
"filters": filters or {},
|
|
250
|
+
"limit": limit,
|
|
251
|
+
"include_raw": include_raw,
|
|
252
|
+
"overview_path": str(overview_path),
|
|
253
|
+
"detail_dir": str(detail_dir),
|
|
254
|
+
"raw_dir": str(raw_dir) if include_raw else None,
|
|
255
|
+
"overview": overview,
|
|
256
|
+
"traces": detail_entries,
|
|
257
|
+
}
|
|
258
|
+
index_path.write_text(json.dumps(index_payload, indent=2, sort_keys=True), encoding="utf-8")
|
|
259
|
+
return {
|
|
260
|
+
"output_dir": str(out),
|
|
261
|
+
"overview_path": str(overview_path),
|
|
262
|
+
"index_path": str(index_path),
|
|
263
|
+
"detail_dir": str(detail_dir),
|
|
264
|
+
"raw_dir": str(raw_dir) if include_raw else None,
|
|
265
|
+
"trace_count": len(detail_entries),
|
|
266
|
+
"detail_paths": [entry["detail_path"] for entry in detail_entries],
|
|
267
|
+
}
|
|
268
|
+
|
|
171
269
|
def _read_spans(self, trace_id: str) -> list[SpanRecord]:
|
|
172
270
|
if trace_id not in self.rows_by_id:
|
|
173
271
|
raise KeyError(trace_id)
|
|
@@ -219,3 +317,131 @@ class TraceStore:
|
|
|
219
317
|
"total_output_tokens": row.total_output_tokens,
|
|
220
318
|
"project_id": row.project_id,
|
|
221
319
|
}
|
|
320
|
+
|
|
321
|
+
@staticmethod
|
|
322
|
+
def _render_overview_markdown(
|
|
323
|
+
overview: dict[str, Any],
|
|
324
|
+
rows: list[TraceIndexRow],
|
|
325
|
+
*,
|
|
326
|
+
include_raw: bool,
|
|
327
|
+
) -> list[str]:
|
|
328
|
+
lines = [
|
|
329
|
+
"# Trace Evidence Overview",
|
|
330
|
+
"",
|
|
331
|
+
"Generated by `rlm-code` trace analysis.",
|
|
332
|
+
"",
|
|
333
|
+
"## Dataset",
|
|
334
|
+
"",
|
|
335
|
+
f"- Traces selected: {len(rows)}",
|
|
336
|
+
f"- Total matching traces: {overview['total_traces']}",
|
|
337
|
+
f"- Total matching spans: {overview['total_spans']}",
|
|
338
|
+
f"- Error traces: {overview['error_trace_count']}",
|
|
339
|
+
f"- Services: {', '.join(overview['service_names']) or '-'}",
|
|
340
|
+
f"- Models: {', '.join(overview['model_names']) or '-'}",
|
|
341
|
+
f"- Agents: {', '.join(overview['agent_names']) or '-'}",
|
|
342
|
+
f"- Input tokens: {overview['total_input_tokens']}",
|
|
343
|
+
f"- Output tokens: {overview['total_output_tokens']}",
|
|
344
|
+
f"- Raw span files included: {'yes' if include_raw else 'no'}",
|
|
345
|
+
"",
|
|
346
|
+
"## Trace Details",
|
|
347
|
+
"",
|
|
348
|
+
]
|
|
349
|
+
return lines
|
|
350
|
+
|
|
351
|
+
def _render_detail_markdown(
|
|
352
|
+
self,
|
|
353
|
+
row: TraceIndexRow,
|
|
354
|
+
spans: list[SpanRecord],
|
|
355
|
+
*,
|
|
356
|
+
raw_path: Path | None,
|
|
357
|
+
) -> str:
|
|
358
|
+
task_ids = self._task_ids(spans)
|
|
359
|
+
error_spans = [span for span in spans if span.status_code == "STATUS_CODE_ERROR"]
|
|
360
|
+
tool_spans = [span for span in spans if self._looks_like_tool_span(span)]
|
|
361
|
+
top_names = Counter(span.name for span in spans).most_common(10)
|
|
362
|
+
lines = [
|
|
363
|
+
f"# Trace Detail: {row.trace_id}",
|
|
364
|
+
"",
|
|
365
|
+
"## Summary",
|
|
366
|
+
"",
|
|
367
|
+
f"- Trace id: `{row.trace_id}`",
|
|
368
|
+
f"- Spans: {row.span_count}",
|
|
369
|
+
f"- Has errors: {'yes' if row.has_errors else 'no'}",
|
|
370
|
+
f"- Error spans: {len(error_spans)}",
|
|
371
|
+
f"- Task ids: {', '.join(task_ids) or '-'}",
|
|
372
|
+
f"- Services: {', '.join(row.service_names) or '-'}",
|
|
373
|
+
f"- Models: {', '.join(row.model_names) or '-'}",
|
|
374
|
+
f"- Agents: {', '.join(row.agent_names) or '-'}",
|
|
375
|
+
f"- Start: {row.start_time or '-'}",
|
|
376
|
+
f"- End: {row.end_time or '-'}",
|
|
377
|
+
]
|
|
378
|
+
if raw_path is not None:
|
|
379
|
+
lines.append(f"- Raw spans: `{raw_path.name}`")
|
|
380
|
+
lines.extend(["", "## Span Name Counts", ""])
|
|
381
|
+
lines.extend(f"- `{name}`: {count}" for name, count in top_names)
|
|
382
|
+
lines.extend(["", "## Error Spans", ""])
|
|
383
|
+
if error_spans:
|
|
384
|
+
for span in error_spans:
|
|
385
|
+
lines.extend(self._render_span_evidence(span))
|
|
386
|
+
else:
|
|
387
|
+
lines.append("- None")
|
|
388
|
+
lines.extend(["", "## Tool-Like Spans", ""])
|
|
389
|
+
if tool_spans:
|
|
390
|
+
for span in tool_spans[:20]:
|
|
391
|
+
lines.extend(self._render_span_evidence(span))
|
|
392
|
+
else:
|
|
393
|
+
lines.append("- None")
|
|
394
|
+
return "\n".join(lines) + "\n"
|
|
395
|
+
|
|
396
|
+
@staticmethod
|
|
397
|
+
def _render_span_evidence(span: SpanRecord) -> list[str]:
|
|
398
|
+
lines = [
|
|
399
|
+
f"### `{span.name or span.span_id}`",
|
|
400
|
+
"",
|
|
401
|
+
f"- Span id: `{span.span_id}`",
|
|
402
|
+
f"- Parent span id: `{span.parent_span_id or '-'}`",
|
|
403
|
+
f"- Status: {span.status_code}",
|
|
404
|
+
]
|
|
405
|
+
attrs = {
|
|
406
|
+
key: _truncate_value(span.attributes[key], EVIDENCE_ATTR_CAP)
|
|
407
|
+
for key in ISSUE_ATTRS
|
|
408
|
+
if key in span.attributes
|
|
409
|
+
}
|
|
410
|
+
if attrs:
|
|
411
|
+
lines.append("- Evidence attributes:")
|
|
412
|
+
for key, value in attrs.items():
|
|
413
|
+
lines.append(f" - `{key}`: `{value}`")
|
|
414
|
+
return lines + [""]
|
|
415
|
+
|
|
416
|
+
@staticmethod
|
|
417
|
+
def _write_raw_trace(path: Path, spans: list[SpanRecord]) -> None:
|
|
418
|
+
with path.open("w", encoding="utf-8") as handle:
|
|
419
|
+
for span in spans:
|
|
420
|
+
handle.write(json.dumps(_render_span(span, SURGICAL_ATTR_CAP), sort_keys=True))
|
|
421
|
+
handle.write("\n")
|
|
422
|
+
|
|
423
|
+
@staticmethod
|
|
424
|
+
def _task_ids(spans: list[SpanRecord]) -> list[str]:
|
|
425
|
+
task_ids: set[str] = set()
|
|
426
|
+
for span in spans:
|
|
427
|
+
for key in TASK_ID_ATTRS:
|
|
428
|
+
value = span.attributes.get(key)
|
|
429
|
+
if isinstance(value, str) and value.strip():
|
|
430
|
+
task_ids.add(value.strip())
|
|
431
|
+
return sorted(task_ids)
|
|
432
|
+
|
|
433
|
+
@staticmethod
|
|
434
|
+
def _looks_like_tool_span(span: SpanRecord) -> bool:
|
|
435
|
+
name = span.name.lower()
|
|
436
|
+
return (
|
|
437
|
+
"tool" in name
|
|
438
|
+
or "function" in name
|
|
439
|
+
or "tool.name" in span.attributes
|
|
440
|
+
or "input.value" in span.attributes
|
|
441
|
+
or "output.value" in span.attributes
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
@staticmethod
|
|
445
|
+
def _safe_filename(value: str) -> str:
|
|
446
|
+
safe = re.sub(r"[^A-Za-z0-9_.-]+", "_", value).strip("._")
|
|
447
|
+
return safe or "trace"
|
|
@@ -23,6 +23,7 @@ def _write_trace_fixture(path: Path) -> None:
|
|
|
23
23
|
"inference.llm.model_name": "gpt-test",
|
|
24
24
|
"inference.llm.input_tokens": 10,
|
|
25
25
|
"inference.llm.output_tokens": 5,
|
|
26
|
+
"inference.task_id": "task-ok",
|
|
26
27
|
},
|
|
27
28
|
},
|
|
28
29
|
{
|
|
@@ -39,6 +40,7 @@ def _write_trace_fixture(path: Path) -> None:
|
|
|
39
40
|
"inference.project_id": "demo",
|
|
40
41
|
"inference.agent_name": "Root",
|
|
41
42
|
"inference.llm.model_name": "gpt-test",
|
|
43
|
+
"inference.task_id": "task-error",
|
|
42
44
|
"error.message": "hallucinated tool call spotify__login",
|
|
43
45
|
},
|
|
44
46
|
},
|
|
@@ -53,6 +55,7 @@ def _write_trace_fixture(path: Path) -> None:
|
|
|
53
55
|
"status": {"code": "STATUS_CODE_ERROR"},
|
|
54
56
|
"resource": {"attributes": {"service.name": "demo-agent"}},
|
|
55
57
|
"attributes": {
|
|
58
|
+
"inference.task_id": "task-error",
|
|
56
59
|
"tool.name": "spotify__login",
|
|
57
60
|
"input.value": "{\"extra_argument\": true}",
|
|
58
61
|
"output.value": "Unknown tool argument: extra_argument",
|
|
@@ -84,6 +87,19 @@ def test_trace_store_indexes_and_queries_jsonl(tmp_path: Path) -> None:
|
|
|
84
87
|
selected = store.view_spans("trace-error", ["span-tool-error"])
|
|
85
88
|
assert selected["spans"][0]["name"] == "function.spotify__login"
|
|
86
89
|
|
|
90
|
+
exported = store.export_evidence_corpus(tmp_path / "evidence", {"has_errors": True})
|
|
91
|
+
assert exported["trace_count"] == 1
|
|
92
|
+
overview_text = (tmp_path / "evidence" / "overview.md").read_text(encoding="utf-8")
|
|
93
|
+
assert "Trace Evidence Overview" in overview_text
|
|
94
|
+
assert "`trace-error`" in overview_text
|
|
95
|
+
detail_text = (tmp_path / "evidence" / "detail" / "trace-error.md").read_text(encoding="utf-8")
|
|
96
|
+
assert "task-error" in detail_text
|
|
97
|
+
assert "spotify__login" in detail_text
|
|
98
|
+
assert (tmp_path / "evidence" / "raw" / "trace-error.jsonl").exists()
|
|
99
|
+
index_data = json.loads((tmp_path / "evidence" / "index.json").read_text(encoding="utf-8"))
|
|
100
|
+
assert index_data["schema_version"] == "rlm-code.trace_evidence_corpus.v1"
|
|
101
|
+
assert index_data["traces"][0]["task_ids"] == ["task-error"]
|
|
102
|
+
|
|
87
103
|
|
|
88
104
|
def test_trace_analysis_environment_actions(tmp_path: Path) -> None:
|
|
89
105
|
trace_path = tmp_path / "traces.jsonl"
|
|
@@ -113,3 +129,16 @@ def test_trace_analysis_environment_actions(tmp_path: Path) -> None:
|
|
|
113
129
|
)
|
|
114
130
|
assert searched.observation["success"] is True
|
|
115
131
|
assert searched.observation["match_count"] == 1
|
|
132
|
+
|
|
133
|
+
exported = env.execute_action(
|
|
134
|
+
{
|
|
135
|
+
"action": "export_evidence_corpus",
|
|
136
|
+
"output_dir": "trace-evidence",
|
|
137
|
+
"filters": {"has_errors": True},
|
|
138
|
+
},
|
|
139
|
+
execution_engine=None,
|
|
140
|
+
exec_timeout=1,
|
|
141
|
+
)
|
|
142
|
+
assert exported.observation["success"] is True
|
|
143
|
+
assert exported.observation["trace_count"] == 1
|
|
144
|
+
assert (tmp_path / "trace-evidence" / "overview.md").exists()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|