rlm-code 0.1.6__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rlm_code-0.1.6 → rlm_code-0.1.7}/CHANGELOG.md +14 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/PKG-INFO +65 -9
- {rlm_code-0.1.6 → rlm_code-0.1.7}/README.md +64 -8
- {rlm_code-0.1.6 → rlm_code-0.1.7}/pyproject.toml +1 -1
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/__init__.py +1 -1
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/slash_commands.py +8 -8
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/__init__.py +1 -1
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/action_planner.py +3 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/environments.py +245 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/runner.py +13 -0
- rlm_code-0.1.7/rlm_code/traces/__init__.py +6 -0
- rlm_code-0.1.7/rlm_code/traces/index.py +170 -0
- rlm_code-0.1.7/rlm_code/traces/models.py +103 -0
- rlm_code-0.1.7/rlm_code/traces/store.py +221 -0
- rlm_code-0.1.7/tests/test_trace_analysis.py +115 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/.gitignore +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/LICENSE +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/NOTICE +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/agent.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/agents/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/agents/rlm_agent.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/callbacks/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/callbacks/code_execution.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/cli.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/code_executor.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/events.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/base.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/lazy.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/loader.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/parsers/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/parsers/base.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/parsers/pdf.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/parsers/text.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/sources/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/sources/base.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/sources/gcs.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/files/sources/local.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/llm.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/logging/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/logging/rlm_logger.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/logging/verbose.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/main.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/prompts.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/repl/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/repl/local_repl.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/repl/safe_builtins.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/templates/index.html +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/tools/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/types.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/usage.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/adk_rlm/web.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/eval/packs/README.md +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/__main__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/config_command.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/create_command.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/demo_command.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/export_command.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/init_command.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/interactive_command.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/mcp_command.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/models_command.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/nl_command_router.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/optimize_command.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/commands/run_command.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/config.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/debug_logger.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/directory_utils.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/exceptions.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/logging.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/venv_utils.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/core/version_checker.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/examples/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/examples/phase2_demo.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/examples/phase3_demo.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/examples/phase4_demo.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/examples/pure_rlm_demo.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/execution/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/execution/engine.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/execution/sandbox.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/export/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/export/handler.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/export/package_builder.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/generators/evaluation_generator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/generators/gepa_generator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/harness/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/harness/registry.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/harness/runner.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/main.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/client_manager.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/config.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/exceptions.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/retry.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/server/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/server/rlm_server.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/server/tools.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/session_wrapper.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/transports/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/transports/factory.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/transports/sse_transport.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/transports/stdio_transport.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/transports/websocket_transport.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/mcp/utils.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/cache.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/code_generator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/dspy_reference_loader.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/llm_connector.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/model_manager.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/providers/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/providers/acp_discovery.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/providers/local_discovery.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/providers/model_catalog.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/providers/registry.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/streaming.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/models/task_collector.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/optimization/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/optimization/data_collector.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/optimization/executor.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/optimization/workflow_manager.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/project/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/project/context_manager.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/project/dspy_md_generator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/project/initializer.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/project/scanner.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/py.typed +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/approval/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/approval/audit.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/approval/gate.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/approval/handlers.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/approval/policy.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/benchmark_manager.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/benchmarks.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/chat_session.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/code_interpreter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/comparison.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/config_schema.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/context_store.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/delegation.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/docker_interpreter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/events.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/adk_rlm_adapter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/base.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/deepagents_adapter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/dspy_rlm_adapter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/google_adk_adapter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/pydantic_ai_adapter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/frameworks/registry.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/leaderboard.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/memory_compaction.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/mock_interpreter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/monty_interpreter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/observability.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/observability_sinks.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/policies/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/policies/action_policies.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/policies/base.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/policies/compaction_policies.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/policies/registry.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/policies/reward_policies.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/policies/termination_policies.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/pure_rlm_environment.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/repl_types.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/theme.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/widgets/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/widgets/animated.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/research_tui/widgets/panels.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/session_replay.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/task_signature.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/termination.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/trajectory.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/rlm/visualizer.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/apple_container_runtime.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/base.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/daytona_runtime.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/e2b_runtime.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/cloud/modal_runtime.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/command_runtime.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/docker_runtime.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/local_runtime.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/monty_runtime.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/runtimes/registry.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/sandbox/superbox.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/session/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/session/state_manager.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/.env.example +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/adapters.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/async_streaming.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/complete_programs.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/dspy_config_example.yaml +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/evaluation.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/industry_templates.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/optimizers.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/retrievers.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/templates/rlm_benchmarks_example.yaml +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/tests/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/tests/rlm/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/tests/rlm/test_phase2.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/tests/rlm/test_pure_rlm.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/agent_collab_view.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/animations.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/conversation.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/design_system.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/diff_viewer.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/notifications.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/persistent_shell.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/prompt_widget.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/prompts.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/pty_terminal.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/resizable_divider.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/thinking_display.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/tui_app.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/tui_utils.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/ui/welcome.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/anti_patterns.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/auto_fixer.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/best_practices.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/code_validator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/config_validator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/exceptions.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/input_validator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/learning_integration.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/models.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/module_validator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/predictor_validator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/quality_scorer.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/report_generator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/security.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/security_validator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/signature_validator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/rlm_code/validation/validator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/__init__.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/conftest.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/fixtures/rlm_ci_baseline_generic_smoke.json +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_adk_rlm_adapter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_code_interpreter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_deepagents_adapter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_dspy_rlm_adapter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_extract_fallback.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_framework_registry_coverage.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_google_adk_adapter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_leaderboard.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_mock_interpreter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_monty_interpreter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_observability_sinks.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_p0_features.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_phase3.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_phase4.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_pure_rlm_runtime_modes.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_pydantic_ai_adapter.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_repl_history.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_security_hardening.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_session_replay.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_submit.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_task_signature.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/rlm/test_user_tools.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_anti_patterns.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_auto_fixer.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_cache.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_execution_engine.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_export_import.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_harness_registry.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_harness_runner.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_init_command.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_integration.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_learning_integration.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_mcp_utils.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_module_validator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_optimization_workflow.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_persistent_shell.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_predictor_validator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_project_scanner.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_prompt_widget.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_property_validators.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_provider_discovery.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_provider_registry.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_quality_scorer.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_report_generator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_retry.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_rlm_config.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_rlm_dspy_environment.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_rlm_observability.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_rlm_runner.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_sandbox_runtimes.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_security_validator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_session_management.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_signature_validator.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_slash_harness_command.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_slash_rlm_command.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_slash_sandbox_command.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_streaming.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_superbox.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_tui_utils.py +0 -0
- {rlm_code-0.1.6 → rlm_code-0.1.7}/tests/test_validation.py +0 -0
|
@@ -5,6 +5,19 @@ All notable changes to this project are documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.1.7] - 2026-04-30
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- HALO-style `trace_analysis` RLM environment for diagnosing agent harness failures from one-span-per-line JSONL traces.
|
|
12
|
+
- Trace sidecar indexing with dataset rollups for trace counts, span counts, error traces, services, models, agents, token totals, and sample trace ids.
|
|
13
|
+
- Bounded trace inspection actions: `get_dataset_overview`, `query_traces`, `count_traces`, `view_trace`, `search_trace`, and `view_spans`.
|
|
14
|
+
- Large-trace safeguards: per-attribute truncation, oversized trace summaries, and higher-cap selected-span reads.
|
|
15
|
+
- Tests for trace indexing, querying, searching, selected-span viewing, and trace environment actions.
|
|
16
|
+
- Trace analysis documentation under the Core Engine docs.
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
- `/rlm` command help now advertises `env=trace_analysis` for run, chat, and doctor workflows.
|
|
20
|
+
|
|
8
21
|
## [0.1.6] - 2026-02-20
|
|
9
22
|
|
|
10
23
|
### Added
|
|
@@ -56,3 +69,4 @@ Initial public release of **RLM Code**.
|
|
|
56
69
|
|
|
57
70
|
[0.1.5]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.5
|
|
58
71
|
[0.1.6]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.6
|
|
72
|
+
[0.1.7]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.7
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rlm-code
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems
|
|
5
5
|
Project-URL: Homepage, https://github.com/SuperagenticAI/rlm-code
|
|
6
6
|
Project-URL: Documentation, https://superagenticai.github.io/rlm-code/
|
|
@@ -118,20 +118,20 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
|
|
|
118
118
|
|
|
119
119
|
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
|
|
120
120
|
|
|
121
|
-
## Release v0.1.
|
|
121
|
+
## Release v0.1.7
|
|
122
122
|
|
|
123
|
-
This release adds
|
|
123
|
+
This release adds HALO-style trace analysis as a new RLM environment.
|
|
124
124
|
|
|
125
|
-
- New harness
|
|
126
|
-
-
|
|
127
|
-
-
|
|
128
|
-
-
|
|
129
|
-
- Dedicated
|
|
125
|
+
- New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
|
|
126
|
+
- Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
|
|
127
|
+
- Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
|
|
128
|
+
- `/rlm` help/docs updated for `env=trace_analysis`
|
|
129
|
+
- Dedicated trace analysis docs under the Core Engine section
|
|
130
130
|
|
|
131
131
|
Example:
|
|
132
132
|
|
|
133
133
|
```text
|
|
134
|
-
/
|
|
134
|
+
/rlm run "Find systemic harness failures trace=./traces.jsonl" env=trace_analysis steps=6
|
|
135
135
|
```
|
|
136
136
|
|
|
137
137
|
## Documentation
|
|
@@ -287,6 +287,62 @@ Notes:
|
|
|
287
287
|
- In Local/BYOK connection modes, likely coding prompts in chat can auto-route to harness.
|
|
288
288
|
- In ACP mode, auto-routing is intentionally off; use `/harness run ...` explicitly.
|
|
289
289
|
|
|
290
|
+
### 8. CodeMode with UTCP and Cloudflare MCP
|
|
291
|
+
|
|
292
|
+
Use these server entries in your project `rlm_config.yaml`:
|
|
293
|
+
|
|
294
|
+
```yaml
|
|
295
|
+
mcp_servers:
|
|
296
|
+
utcp-codemode:
|
|
297
|
+
name: utcp-codemode
|
|
298
|
+
description: "Local CodeMode MCP bridge"
|
|
299
|
+
enabled: true
|
|
300
|
+
auto_connect: false
|
|
301
|
+
timeout_seconds: 30
|
|
302
|
+
retry_attempts: 3
|
|
303
|
+
transport:
|
|
304
|
+
type: stdio
|
|
305
|
+
command: npx
|
|
306
|
+
args:
|
|
307
|
+
- "@utcp/code-mode-mcp"
|
|
308
|
+
|
|
309
|
+
cloudflare-codemode:
|
|
310
|
+
name: cloudflare-codemode
|
|
311
|
+
description: "Cloudflare MCP via remote bridge"
|
|
312
|
+
enabled: true
|
|
313
|
+
auto_connect: false
|
|
314
|
+
timeout_seconds: 30
|
|
315
|
+
retry_attempts: 3
|
|
316
|
+
transport:
|
|
317
|
+
type: stdio
|
|
318
|
+
command: npx
|
|
319
|
+
args:
|
|
320
|
+
- "mcp-remote"
|
|
321
|
+
- "https://mcp.cloudflare.com/mcp"
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
UTCP path (native CodeMode in current release):
|
|
325
|
+
|
|
326
|
+
```text
|
|
327
|
+
/mcp-connect utcp-codemode
|
|
328
|
+
/mcp-tools utcp-codemode
|
|
329
|
+
/harness run "analyze this repo, find TODO/FIXME, and create report.json" steps=3 mcp=on strategy=codemode mcp_server=utcp-codemode
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
Cloudflare path (recommended strategy today):
|
|
333
|
+
|
|
334
|
+
```text
|
|
335
|
+
/mcp-connect cloudflare-codemode
|
|
336
|
+
/mcp-tools cloudflare-codemode
|
|
337
|
+
/harness run "list available tools and run one safe read-only action, then summarize in 3 bullets" steps=3 mcp=on strategy=tool_call mcp_server=cloudflare-codemode
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
Notes:
|
|
341
|
+
|
|
342
|
+
- On first Cloudflare connect, `mcp-remote` may ask for interactive authentication.
|
|
343
|
+
- In this release, `strategy=codemode` expects the `search_tools` + `call_tool_chain` bridge contract.
|
|
344
|
+
- If a remote MCP server exposes a different tool contract, use `strategy=tool_call`.
|
|
345
|
+
|
|
290
346
|
## How the RLM Loop Works
|
|
291
347
|
|
|
292
348
|
Traditional LLM usage: paste your document into the prompt, ask a question, hope the model doesn't lose details in the middle.
|
|
@@ -25,20 +25,20 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
|
|
|
25
25
|
|
|
26
26
|
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
|
|
27
27
|
|
|
28
|
-
## Release v0.1.
|
|
28
|
+
## Release v0.1.7
|
|
29
29
|
|
|
30
|
-
This release adds
|
|
30
|
+
This release adds HALO-style trace analysis as a new RLM environment.
|
|
31
31
|
|
|
32
|
-
- New harness
|
|
33
|
-
-
|
|
34
|
-
-
|
|
35
|
-
-
|
|
36
|
-
- Dedicated
|
|
32
|
+
- New `trace_analysis` environment for diagnosing agent harness failures from OTel-shaped JSONL traces
|
|
33
|
+
- Sidecar trace indexing with dataset overview, query, count, search, full-trace view, and selected-span view actions
|
|
34
|
+
- Bounded payload handling for large traces, including oversized summaries and higher-cap surgical span reads
|
|
35
|
+
- `/rlm` help/docs updated for `env=trace_analysis`
|
|
36
|
+
- Dedicated trace analysis docs under the Core Engine section
|
|
37
37
|
|
|
38
38
|
Example:
|
|
39
39
|
|
|
40
40
|
```text
|
|
41
|
-
/
|
|
41
|
+
/rlm run "Find systemic harness failures trace=./traces.jsonl" env=trace_analysis steps=6
|
|
42
42
|
```
|
|
43
43
|
|
|
44
44
|
## Documentation
|
|
@@ -194,6 +194,62 @@ Notes:
|
|
|
194
194
|
- In Local/BYOK connection modes, likely coding prompts in chat can auto-route to harness.
|
|
195
195
|
- In ACP mode, auto-routing is intentionally off; use `/harness run ...` explicitly.
|
|
196
196
|
|
|
197
|
+
### 8. CodeMode with UTCP and Cloudflare MCP
|
|
198
|
+
|
|
199
|
+
Use these server entries in your project `rlm_config.yaml`:
|
|
200
|
+
|
|
201
|
+
```yaml
|
|
202
|
+
mcp_servers:
|
|
203
|
+
utcp-codemode:
|
|
204
|
+
name: utcp-codemode
|
|
205
|
+
description: "Local CodeMode MCP bridge"
|
|
206
|
+
enabled: true
|
|
207
|
+
auto_connect: false
|
|
208
|
+
timeout_seconds: 30
|
|
209
|
+
retry_attempts: 3
|
|
210
|
+
transport:
|
|
211
|
+
type: stdio
|
|
212
|
+
command: npx
|
|
213
|
+
args:
|
|
214
|
+
- "@utcp/code-mode-mcp"
|
|
215
|
+
|
|
216
|
+
cloudflare-codemode:
|
|
217
|
+
name: cloudflare-codemode
|
|
218
|
+
description: "Cloudflare MCP via remote bridge"
|
|
219
|
+
enabled: true
|
|
220
|
+
auto_connect: false
|
|
221
|
+
timeout_seconds: 30
|
|
222
|
+
retry_attempts: 3
|
|
223
|
+
transport:
|
|
224
|
+
type: stdio
|
|
225
|
+
command: npx
|
|
226
|
+
args:
|
|
227
|
+
- "mcp-remote"
|
|
228
|
+
- "https://mcp.cloudflare.com/mcp"
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
UTCP path (native CodeMode in current release):
|
|
232
|
+
|
|
233
|
+
```text
|
|
234
|
+
/mcp-connect utcp-codemode
|
|
235
|
+
/mcp-tools utcp-codemode
|
|
236
|
+
/harness run "analyze this repo, find TODO/FIXME, and create report.json" steps=3 mcp=on strategy=codemode mcp_server=utcp-codemode
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
Cloudflare path (recommended strategy today):
|
|
240
|
+
|
|
241
|
+
```text
|
|
242
|
+
/mcp-connect cloudflare-codemode
|
|
243
|
+
/mcp-tools cloudflare-codemode
|
|
244
|
+
/harness run "list available tools and run one safe read-only action, then summarize in 3 bullets" steps=3 mcp=on strategy=tool_call mcp_server=cloudflare-codemode
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
Notes:
|
|
248
|
+
|
|
249
|
+
- On first Cloudflare connect, `mcp-remote` may ask for interactive authentication.
|
|
250
|
+
- In this release, `strategy=codemode` expects the `search_tools` + `call_tool_chain` bridge contract.
|
|
251
|
+
- If a remote MCP server exposes a different tool contract, use `strategy=tool_call`.
|
|
252
|
+
|
|
197
253
|
## How the RLM Loop Works
|
|
198
254
|
|
|
199
255
|
Traditional LLM usage: paste your document into the prompt, ask a question, hope the model doesn't lose details in the middle.
|
|
@@ -1684,7 +1684,7 @@ class SlashCommandHandler:
|
|
|
1684
1684
|
Manage RLM runs.
|
|
1685
1685
|
|
|
1686
1686
|
Usage:
|
|
1687
|
-
/rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
|
|
1687
|
+
/rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm|trace_analysis] [sub=provider/model]
|
|
1688
1688
|
/rlm bench [list|preset=name] [mode=native|harness|direct-llm] [strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] [pack=path[,path2]] [limit=N] [steps=N] [timeout=N] [branch=N] [framework=<see /rlm frameworks>] [env=generic|dspy|pure_rlm] [sub=provider/model]
|
|
1689
1689
|
/rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N]
|
|
1690
1690
|
/rlm bench validate [candidate=<id|path|latest>] [baseline=<id|path|previous>] [min_reward_delta=N] [min_completion_delta=N] [max_steps_increase=N] [--json]
|
|
@@ -1696,8 +1696,8 @@ class SlashCommandHandler:
|
|
|
1696
1696
|
/rlm status [run_id]
|
|
1697
1697
|
/rlm abort [run_id|all]
|
|
1698
1698
|
/rlm replay [run_id|latest]
|
|
1699
|
-
/rlm doctor [env=generic|dspy|pure_rlm] [--json]
|
|
1700
|
-
/rlm chat <message> [session=name] [env=generic|dspy|pure_rlm] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [sub=provider/model]
|
|
1699
|
+
/rlm doctor [env=generic|dspy|pure_rlm|trace_analysis] [--json]
|
|
1700
|
+
/rlm chat <message> [session=name] [env=generic|dspy|pure_rlm|trace_analysis] [branch=N] [depth=N] [children=N] [parallel=N] [budget=N] [framework=<see /rlm frameworks>] [sub=provider/model]
|
|
1701
1701
|
/rlm chat status [session=name]
|
|
1702
1702
|
/rlm chat reset [session=name]
|
|
1703
1703
|
/rlm observability
|
|
@@ -1708,14 +1708,14 @@ class SlashCommandHandler:
|
|
|
1708
1708
|
console.print("[bold cyan]🧠 RLM Commands[/bold cyan]")
|
|
1709
1709
|
console.print(
|
|
1710
1710
|
" [yellow]/rlm run <task> [steps=N] [timeout=N] [branch=N] [depth=N] [children=N] "
|
|
1711
|
-
f"[parallel=N] [budget=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] "
|
|
1711
|
+
f"[parallel=N] [budget=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm|trace_analysis] "
|
|
1712
1712
|
"[sub=provider/model][/yellow]"
|
|
1713
1713
|
)
|
|
1714
1714
|
console.print(
|
|
1715
1715
|
" [yellow]/rlm bench [list|preset=name] [mode=native|harness|direct-llm] "
|
|
1716
1716
|
"[strategy=tool_call|codemode] [mcp=on|off] [mcp_server=name] "
|
|
1717
1717
|
"[pack=path[,path2]] [limit=N] [steps=N] "
|
|
1718
|
-
f"[timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm] [sub=provider/model][/yellow]"
|
|
1718
|
+
f"[timeout=N] [branch=N] [framework={framework_opts}] [env=generic|dspy|pure_rlm|trace_analysis] [sub=provider/model][/yellow]"
|
|
1719
1719
|
)
|
|
1720
1720
|
console.print(
|
|
1721
1721
|
" [yellow]/rlm bench compare [candidate=<id|path|latest>] [baseline=<id|path|previous>] "
|
|
@@ -1741,9 +1741,9 @@ class SlashCommandHandler:
|
|
|
1741
1741
|
console.print(" [yellow]/rlm status [run_id][/yellow]")
|
|
1742
1742
|
console.print(" [yellow]/rlm abort [run_id|all][/yellow]")
|
|
1743
1743
|
console.print(" [yellow]/rlm replay [run_id|latest][/yellow]")
|
|
1744
|
-
console.print(" [yellow]/rlm doctor [env=generic|dspy|pure_rlm] [--json][/yellow]")
|
|
1744
|
+
console.print(" [yellow]/rlm doctor [env=generic|dspy|pure_rlm|trace_analysis] [--json][/yellow]")
|
|
1745
1745
|
console.print(
|
|
1746
|
-
" [yellow]/rlm chat <message> [session=name] [env=generic|dspy|pure_rlm] [branch=N] [depth=N] "
|
|
1746
|
+
" [yellow]/rlm chat <message> [session=name] [env=generic|dspy|pure_rlm|trace_analysis] [branch=N] [depth=N] "
|
|
1747
1747
|
f"[children=N] [parallel=N] [budget=N] [framework={framework_opts}] "
|
|
1748
1748
|
"[sub=provider/model][/yellow]"
|
|
1749
1749
|
)
|
|
@@ -2135,7 +2135,7 @@ class SlashCommandHandler:
|
|
|
2135
2135
|
task = " ".join(task_tokens).strip()
|
|
2136
2136
|
if not task:
|
|
2137
2137
|
show_error_message(
|
|
2138
|
-
"Usage: /rlm run <task> [steps=N] [timeout=N] [env=generic|dspy|pure_rlm] "
|
|
2138
|
+
"Usage: /rlm run <task> [steps=N] [timeout=N] [env=generic|dspy|pure_rlm|trace_analysis] "
|
|
2139
2139
|
"[depth=N] [children=N] [parallel=N] [budget=N] "
|
|
2140
2140
|
f"[framework={framework_opts}] "
|
|
2141
2141
|
"[branch=N] [sub=provider/model]"
|
|
@@ -15,6 +15,7 @@ from .environments import (
|
|
|
15
15
|
DSPyCodingRLMEnvironment,
|
|
16
16
|
GenericRLMEnvironment,
|
|
17
17
|
RLMEnvironment,
|
|
18
|
+
TraceAnalysisEnvironment,
|
|
18
19
|
)
|
|
19
20
|
from .pure_rlm_environment import PureRLMConfig, PureRLMEnvironment
|
|
20
21
|
|
|
@@ -276,6 +277,8 @@ class ActionPlannerMixin:
|
|
|
276
277
|
)
|
|
277
278
|
if isinstance(env, DSPyCodingRLMEnvironment):
|
|
278
279
|
return DSPyCodingRLMEnvironment(workdir=workdir, reward_profile=self.reward_profile)
|
|
280
|
+
if isinstance(env, TraceAnalysisEnvironment):
|
|
281
|
+
return TraceAnalysisEnvironment(workdir=workdir, reward_profile=self.reward_profile)
|
|
279
282
|
if isinstance(env, GenericRLMEnvironment):
|
|
280
283
|
return GenericRLMEnvironment(workdir=workdir, reward_profile=self.reward_profile)
|
|
281
284
|
# Fallback to generic environment in preview if an unknown env type appears.
|
|
@@ -286,6 +286,251 @@ class GenericRLMEnvironment:
|
|
|
286
286
|
return "Execution failed without stderr."
|
|
287
287
|
|
|
288
288
|
|
|
289
|
+
class TraceAnalysisEnvironment(GenericRLMEnvironment):
|
|
290
|
+
"""HALO-style trace analysis environment over one-span-per-line JSONL traces."""
|
|
291
|
+
|
|
292
|
+
name = "trace_analysis"
|
|
293
|
+
|
|
294
|
+
def __init__(
|
|
295
|
+
self,
|
|
296
|
+
workdir: Path | None = None,
|
|
297
|
+
reward_profile: RLMRewardProfile | dict[str, Any] | None = None,
|
|
298
|
+
):
|
|
299
|
+
super().__init__(workdir=workdir, reward_profile=reward_profile)
|
|
300
|
+
self._trace_path: Path | None = None
|
|
301
|
+
self._store: Any | None = None
|
|
302
|
+
|
|
303
|
+
def system_prompt(self) -> str:
|
|
304
|
+
return (
|
|
305
|
+
"You are an RLM planner specialized for analyzing agent execution traces.\n"
|
|
306
|
+
"Return ONLY valid JSON object with keys:\n"
|
|
307
|
+
"{"
|
|
308
|
+
'"action": "set_trace_path" | "get_dataset_overview" | "query_traces" | '
|
|
309
|
+
'"count_traces" | "view_trace" | "search_trace" | "view_spans" | "final", '
|
|
310
|
+
'"trace_path": "<path to JSONL traces>", '
|
|
311
|
+
'"filters": {"has_errors": true, "model_names": ["..."], "service_names": ["..."], '
|
|
312
|
+
'"agent_names": ["..."], "project_id": "..."}, '
|
|
313
|
+
'"trace_id": "<trace id>", '
|
|
314
|
+
'"span_ids": ["<span id>"], '
|
|
315
|
+
'"pattern": "<literal substring>", '
|
|
316
|
+
'"limit": <integer>, '
|
|
317
|
+
'"offset": <integer>, '
|
|
318
|
+
'"rationale": "<brief reason>", '
|
|
319
|
+
'"done": true|false, '
|
|
320
|
+
'"final_response": "<required when action=final>"'
|
|
321
|
+
"}\n"
|
|
322
|
+
"Rules:\n"
|
|
323
|
+
"- Load a trace file first if one is not already active.\n"
|
|
324
|
+
"- Always begin analysis with get_dataset_overview.\n"
|
|
325
|
+
"- Use query_traces to choose real trace ids; never invent trace ids.\n"
|
|
326
|
+
"- For large traces, prefer search_trace followed by view_spans.\n"
|
|
327
|
+
"- Identify systemic harness failures, not one-off anomalies.\n"
|
|
328
|
+
"- Output JSON only."
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
def planner_prompt(
|
|
332
|
+
self, task: str, memory: list[str], trajectory: list[dict[str, Any]], step_index: int
|
|
333
|
+
) -> str:
|
|
334
|
+
inferred = self._extract_trace_path(task)
|
|
335
|
+
if inferred is not None and inferred != self._trace_path:
|
|
336
|
+
try:
|
|
337
|
+
self._load_store(inferred)
|
|
338
|
+
except Exception:
|
|
339
|
+
# Surface the failure through the prompt; execute_action will return
|
|
340
|
+
# the structured error if the planner attempts to use the path.
|
|
341
|
+
self._trace_path = inferred
|
|
342
|
+
self._store = None
|
|
343
|
+
|
|
344
|
+
base = super().planner_prompt(task, memory, trajectory, step_index)
|
|
345
|
+
active = str(self._trace_path) if self._trace_path is not None else "(none)"
|
|
346
|
+
overview = ""
|
|
347
|
+
if self._store is not None:
|
|
348
|
+
try:
|
|
349
|
+
data = self._store.get_overview({})
|
|
350
|
+
overview = (
|
|
351
|
+
f"\nActive trace overview: traces={data['total_traces']} "
|
|
352
|
+
f"spans={data['total_spans']} errors={data['error_trace_count']} "
|
|
353
|
+
f"sample_trace_ids={data['sample_trace_ids'][:5]}"
|
|
354
|
+
)
|
|
355
|
+
except Exception:
|
|
356
|
+
overview = ""
|
|
357
|
+
return (
|
|
358
|
+
f"{base}\n\n"
|
|
359
|
+
f"Trace analysis environment.\n"
|
|
360
|
+
f"Active trace path: {active}\n"
|
|
361
|
+
"If the task includes trace=<path> or trace_path=<path>, use that file.\n"
|
|
362
|
+
"Goal: produce a concise evidence report of repeated harness failure modes "
|
|
363
|
+
"with concrete trace ids/spans and suggested harness changes."
|
|
364
|
+
f"{overview}"
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
def execute_action(
|
|
368
|
+
self,
|
|
369
|
+
action: dict[str, Any],
|
|
370
|
+
execution_engine: Any,
|
|
371
|
+
exec_timeout: int,
|
|
372
|
+
llm_connector: Any | None = None,
|
|
373
|
+
) -> EnvironmentActionResult:
|
|
374
|
+
action_name = str(action.get("action", "")).strip().lower()
|
|
375
|
+
if action_name == "final":
|
|
376
|
+
return super().execute_action(
|
|
377
|
+
action,
|
|
378
|
+
execution_engine,
|
|
379
|
+
exec_timeout,
|
|
380
|
+
llm_connector=llm_connector,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
try:
|
|
384
|
+
if action_name == "set_trace_path":
|
|
385
|
+
store = self._store_from_action(action, required_path=True)
|
|
386
|
+
return EnvironmentActionResult(
|
|
387
|
+
observation={
|
|
388
|
+
"success": True,
|
|
389
|
+
"trace_path": str(store.trace_path),
|
|
390
|
+
"index_path": str(store.index_path),
|
|
391
|
+
"overview": store.get_overview({}),
|
|
392
|
+
},
|
|
393
|
+
reward=0.55,
|
|
394
|
+
memory_note=f"Loaded trace dataset: {store.trace_path}",
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
store = self._store_from_action(action, required_path=False)
|
|
398
|
+
filters = action.get("filters") if isinstance(action.get("filters"), dict) else {}
|
|
399
|
+
|
|
400
|
+
if action_name == "get_dataset_overview":
|
|
401
|
+
return self._ok(
|
|
402
|
+
observation=store.get_overview(filters),
|
|
403
|
+
reward=0.45,
|
|
404
|
+
memory_note="Loaded trace dataset overview.",
|
|
405
|
+
)
|
|
406
|
+
if action_name == "query_traces":
|
|
407
|
+
return self._ok(
|
|
408
|
+
observation=store.query_traces(
|
|
409
|
+
filters,
|
|
410
|
+
limit=self._int_arg(action, "limit", 50, minimum=1, maximum=200),
|
|
411
|
+
offset=self._int_arg(action, "offset", 0, minimum=0, maximum=1_000_000),
|
|
412
|
+
),
|
|
413
|
+
reward=0.5,
|
|
414
|
+
memory_note="Queried trace summaries.",
|
|
415
|
+
)
|
|
416
|
+
if action_name == "count_traces":
|
|
417
|
+
return self._ok(
|
|
418
|
+
observation=store.count_traces(filters),
|
|
419
|
+
reward=0.35,
|
|
420
|
+
memory_note="Counted traces matching filters.",
|
|
421
|
+
)
|
|
422
|
+
if action_name == "view_trace":
|
|
423
|
+
trace_id = self._required_str(action, "trace_id")
|
|
424
|
+
return self._ok(
|
|
425
|
+
observation=store.view_trace(trace_id),
|
|
426
|
+
reward=0.65,
|
|
427
|
+
memory_note=f"Viewed trace {trace_id}.",
|
|
428
|
+
)
|
|
429
|
+
if action_name == "search_trace":
|
|
430
|
+
trace_id = self._required_str(action, "trace_id")
|
|
431
|
+
pattern = self._required_str(action, "pattern")
|
|
432
|
+
return self._ok(
|
|
433
|
+
observation=store.search_trace(
|
|
434
|
+
trace_id,
|
|
435
|
+
pattern,
|
|
436
|
+
limit=self._int_arg(action, "limit", 100, minimum=1, maximum=500),
|
|
437
|
+
),
|
|
438
|
+
reward=0.65,
|
|
439
|
+
memory_note=f"Searched trace {trace_id} for {pattern!r}.",
|
|
440
|
+
)
|
|
441
|
+
if action_name == "view_spans":
|
|
442
|
+
trace_id = self._required_str(action, "trace_id")
|
|
443
|
+
span_ids = action.get("span_ids")
|
|
444
|
+
if not isinstance(span_ids, list) or not span_ids:
|
|
445
|
+
raise ValueError("view_spans requires non-empty span_ids list")
|
|
446
|
+
return self._ok(
|
|
447
|
+
observation=store.view_spans(trace_id, [str(item) for item in span_ids]),
|
|
448
|
+
reward=0.7,
|
|
449
|
+
memory_note=f"Viewed selected spans for trace {trace_id}.",
|
|
450
|
+
)
|
|
451
|
+
except Exception as exc:
|
|
452
|
+
return EnvironmentActionResult(
|
|
453
|
+
observation={"success": False, "error": f"{type(exc).__name__}: {exc}"},
|
|
454
|
+
reward=-0.25,
|
|
455
|
+
memory_note=f"Trace analysis action failed: {type(exc).__name__}.",
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
return EnvironmentActionResult(
|
|
459
|
+
observation={"success": False, "error": f"Unsupported action '{action_name}'."},
|
|
460
|
+
reward=-0.2,
|
|
461
|
+
memory_note="Planner produced unsupported trace action.",
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
def doctor_checks(self) -> list[EnvironmentDoctorCheck]:
|
|
465
|
+
checks = super().doctor_checks()
|
|
466
|
+
checks.append(
|
|
467
|
+
EnvironmentDoctorCheck(
|
|
468
|
+
name="trace_analysis",
|
|
469
|
+
status="pass",
|
|
470
|
+
detail="Trace analysis environment is available.",
|
|
471
|
+
)
|
|
472
|
+
)
|
|
473
|
+
return checks
|
|
474
|
+
|
|
475
|
+
def _ok(self, *, observation: dict[str, Any], reward: float, memory_note: str) -> EnvironmentActionResult:
|
|
476
|
+
payload = {"success": True, **observation}
|
|
477
|
+
return EnvironmentActionResult(
|
|
478
|
+
observation=payload,
|
|
479
|
+
reward=reward,
|
|
480
|
+
memory_note=memory_note,
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
def _store_from_action(self, action: dict[str, Any], *, required_path: bool):
|
|
484
|
+
raw = action.get("trace_path") or action.get("path")
|
|
485
|
+
if isinstance(raw, str) and raw.strip():
|
|
486
|
+
return self._load_store(Path(raw.strip()).expanduser())
|
|
487
|
+
if self._store is not None:
|
|
488
|
+
return self._store
|
|
489
|
+
if required_path:
|
|
490
|
+
raise ValueError("trace_path is required")
|
|
491
|
+
raise ValueError("no trace dataset loaded; pass trace_path or use set_trace_path first")
|
|
492
|
+
|
|
493
|
+
def _load_store(self, trace_path: Path):
|
|
494
|
+
from ..traces import TraceStore
|
|
495
|
+
|
|
496
|
+
resolved = trace_path if trace_path.is_absolute() else (self.workdir / trace_path)
|
|
497
|
+
store = TraceStore.load(resolved)
|
|
498
|
+
self._trace_path = resolved.resolve()
|
|
499
|
+
self._store = store
|
|
500
|
+
return store
|
|
501
|
+
|
|
502
|
+
@staticmethod
|
|
503
|
+
def _extract_trace_path(task: str) -> Path | None:
|
|
504
|
+
match = re.search(r"(?:^|\s)(?:trace|trace_path)=([^\s]+)", task)
|
|
505
|
+
if not match:
|
|
506
|
+
return None
|
|
507
|
+
raw = match.group(1).strip().strip("\"'")
|
|
508
|
+
return Path(raw).expanduser() if raw else None
|
|
509
|
+
|
|
510
|
+
@staticmethod
|
|
511
|
+
def _required_str(action: dict[str, Any], key: str) -> str:
|
|
512
|
+
value = action.get(key)
|
|
513
|
+
if not isinstance(value, str) or not value.strip():
|
|
514
|
+
raise ValueError(f"{key} is required")
|
|
515
|
+
return value.strip()
|
|
516
|
+
|
|
517
|
+
@staticmethod
|
|
518
|
+
def _int_arg(
|
|
519
|
+
action: dict[str, Any],
|
|
520
|
+
key: str,
|
|
521
|
+
default: int,
|
|
522
|
+
*,
|
|
523
|
+
minimum: int,
|
|
524
|
+
maximum: int,
|
|
525
|
+
) -> int:
|
|
526
|
+
value = action.get(key, default)
|
|
527
|
+
try:
|
|
528
|
+
parsed = int(value)
|
|
529
|
+
except Exception:
|
|
530
|
+
parsed = default
|
|
531
|
+
return max(minimum, min(maximum, parsed))
|
|
532
|
+
|
|
533
|
+
|
|
289
534
|
class DSPyCodingRLMEnvironment(GenericRLMEnvironment):
|
|
290
535
|
"""DSPy-focused environment with file edit + tests + DSPy-aware scoring."""
|
|
291
536
|
|
|
@@ -38,6 +38,7 @@ from .environments import (
|
|
|
38
38
|
GenericRLMEnvironment,
|
|
39
39
|
RLMEnvironment,
|
|
40
40
|
RLMRewardProfile,
|
|
41
|
+
TraceAnalysisEnvironment,
|
|
41
42
|
)
|
|
42
43
|
from .events import RLMEventBus
|
|
43
44
|
from .frameworks import FrameworkAdapterRegistry, FrameworkEpisodeResult
|
|
@@ -279,6 +280,18 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
|
|
|
279
280
|
workdir=self.workdir,
|
|
280
281
|
reward_profile=self.reward_profile,
|
|
281
282
|
),
|
|
283
|
+
"trace_analysis": TraceAnalysisEnvironment(
|
|
284
|
+
workdir=self.workdir,
|
|
285
|
+
reward_profile=self.reward_profile,
|
|
286
|
+
),
|
|
287
|
+
"trace-analysis": TraceAnalysisEnvironment(
|
|
288
|
+
workdir=self.workdir,
|
|
289
|
+
reward_profile=self.reward_profile,
|
|
290
|
+
),
|
|
291
|
+
"traces": TraceAnalysisEnvironment(
|
|
292
|
+
workdir=self.workdir,
|
|
293
|
+
reward_profile=self.reward_profile,
|
|
294
|
+
),
|
|
282
295
|
"framework": DSPyCodingRLMEnvironment(
|
|
283
296
|
workdir=self.workdir,
|
|
284
297
|
reward_profile=self.reward_profile,
|