rlm-code 0.1.8__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rlm_code-0.1.8 → rlm_code-0.1.9}/.gitignore +1 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/CHANGELOG.md +12 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/PKG-INFO +9 -10
- {rlm_code-0.1.8 → rlm_code-0.1.9}/README.md +8 -9
- {rlm_code-0.1.8 → rlm_code-0.1.9}/pyproject.toml +1 -1
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/__init__.py +1 -1
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/__init__.py +1 -1
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/runner.py +97 -1
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/session_replay.py +34 -6
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/visualizer.py +23 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/tui_app.py +87 -6
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_session_replay.py +56 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_rlm_runner.py +33 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/LICENSE +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/NOTICE +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/agent.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/agents/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/agents/rlm_agent.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/callbacks/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/callbacks/code_execution.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/cli.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/code_executor.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/events.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/base.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/lazy.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/loader.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/parsers/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/parsers/base.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/parsers/pdf.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/parsers/text.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/sources/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/sources/base.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/sources/gcs.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/files/sources/local.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/llm.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/logging/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/logging/rlm_logger.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/logging/verbose.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/main.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/prompts.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/repl/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/repl/local_repl.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/repl/safe_builtins.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/templates/index.html +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/tools/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/types.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/usage.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/adk_rlm/web.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/eval/packs/README.md +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/__main__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/config_command.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/create_command.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/demo_command.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/export_command.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/init_command.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/interactive_command.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/mcp_command.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/models_command.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/nl_command_router.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/optimize_command.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/run_command.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/commands/slash_commands.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/config.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/debug_logger.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/directory_utils.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/exceptions.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/logging.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/venv_utils.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/core/version_checker.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/examples/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/examples/phase2_demo.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/examples/phase3_demo.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/examples/phase4_demo.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/examples/pure_rlm_demo.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/execution/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/execution/engine.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/execution/sandbox.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/export/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/export/handler.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/export/package_builder.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/generators/evaluation_generator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/generators/gepa_generator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/harness/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/harness/registry.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/harness/runner.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/main.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/client_manager.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/config.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/exceptions.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/retry.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/server/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/server/rlm_server.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/server/tools.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/session_wrapper.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/transports/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/transports/factory.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/transports/sse_transport.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/transports/stdio_transport.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/transports/websocket_transport.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/mcp/utils.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/cache.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/code_generator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/dspy_reference_loader.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/llm_connector.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/model_manager.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/providers/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/providers/acp_discovery.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/providers/local_discovery.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/providers/model_catalog.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/providers/registry.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/streaming.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/models/task_collector.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/optimization/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/optimization/data_collector.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/optimization/executor.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/optimization/workflow_manager.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/project/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/project/context_manager.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/project/dspy_md_generator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/project/initializer.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/project/scanner.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/py.typed +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/action_planner.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/approval/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/approval/audit.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/approval/gate.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/approval/handlers.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/approval/policy.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/benchmark_manager.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/benchmarks.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/chat_session.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/code_interpreter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/comparison.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/config_schema.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/context_store.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/delegation.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/docker_interpreter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/environments.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/events.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/adk_rlm_adapter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/base.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/deepagents_adapter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/dspy_rlm_adapter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/google_adk_adapter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/pydantic_ai_adapter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/registry.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/leaderboard.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/memory_compaction.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/mock_interpreter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/monty_interpreter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/observability.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/observability_sinks.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/policies/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/policies/action_policies.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/policies/base.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/policies/compaction_policies.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/policies/registry.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/policies/reward_policies.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/policies/termination_policies.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/pure_rlm_environment.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/repl_types.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/theme.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/widgets/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/widgets/animated.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/widgets/panels.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/task_signature.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/termination.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/rlm/trajectory.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/apple_container_runtime.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/base.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/daytona_runtime.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/e2b_runtime.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/modal_runtime.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/command_runtime.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/docker_runtime.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/local_runtime.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/monty_runtime.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/registry.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/sandbox/superbox.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/session/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/session/state_manager.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/.env.example +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/adapters.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/async_streaming.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/complete_programs.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/dspy_config_example.yaml +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/evaluation.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/industry_templates.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/optimizers.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/retrievers.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/templates/rlm_benchmarks_example.yaml +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/tests/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/tests/rlm/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/tests/rlm/test_phase2.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/tests/rlm/test_pure_rlm.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/traces/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/traces/index.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/traces/models.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/traces/store.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/agent_collab_view.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/animations.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/conversation.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/design_system.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/diff_viewer.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/notifications.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/persistent_shell.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/prompt_widget.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/prompts.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/pty_terminal.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/resizable_divider.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/thinking_display.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/tui_utils.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/ui/welcome.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/anti_patterns.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/auto_fixer.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/best_practices.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/code_validator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/config_validator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/exceptions.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/input_validator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/learning_integration.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/models.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/module_validator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/predictor_validator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/quality_scorer.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/report_generator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/security.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/security_validator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/signature_validator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/rlm_code/validation/validator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/__init__.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/conftest.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/fixtures/rlm_ci_baseline_generic_smoke.json +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_adk_rlm_adapter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_code_interpreter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_deepagents_adapter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_dspy_rlm_adapter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_extract_fallback.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_framework_registry_coverage.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_google_adk_adapter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_leaderboard.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_mock_interpreter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_monty_interpreter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_observability_sinks.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_p0_features.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_phase3.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_phase4.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_pure_rlm_runtime_modes.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_pydantic_ai_adapter.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_repl_history.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_security_hardening.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_submit.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_task_signature.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/rlm/test_user_tools.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_anti_patterns.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_auto_fixer.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_cache.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_execution_engine.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_export_import.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_harness_registry.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_harness_runner.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_init_command.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_integration.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_learning_integration.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_mcp_utils.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_module_validator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_optimization_workflow.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_persistent_shell.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_predictor_validator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_project_scanner.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_prompt_widget.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_property_validators.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_provider_discovery.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_provider_registry.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_quality_scorer.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_report_generator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_retry.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_rlm_config.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_rlm_dspy_environment.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_rlm_observability.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_sandbox_runtimes.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_security_validator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_session_management.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_signature_validator.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_slash_harness_command.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_slash_rlm_command.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_slash_sandbox_command.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_streaming.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_superbox.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_trace_analysis.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_tui_utils.py +0 -0
- {rlm_code-0.1.8 → rlm_code-0.1.9}/tests/test_validation.py +0 -0
|
@@ -5,6 +5,17 @@ All notable changes to this project are documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.1.9] - 2026-06-26
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Pure RLM runner context initialization from explicit workspace file references in the task, with compact repository snapshot fallback.
|
|
12
|
+
- Context-load events for Pure RLM runs, including loaded file names and total context characters.
|
|
13
|
+
- Runner JSONL replay coverage for action code, observations, success state, token counts, and cumulative reward.
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
- TUI trajectory and replay views now show Pure RLM signals including REPL code, stdout/stderr previews, `llm_query` counts, executed code blocks, finalization status, and REPL variables.
|
|
17
|
+
- Run visualization now includes richer Pure RLM previews for completed runs.
|
|
18
|
+
|
|
8
19
|
## [0.1.8] - 2026-05-01
|
|
9
20
|
|
|
10
21
|
### Added
|
|
@@ -76,5 +87,6 @@ Initial public release of **RLM Code**.
|
|
|
76
87
|
|
|
77
88
|
[0.1.5]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.5
|
|
78
89
|
[0.1.6]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.6
|
|
90
|
+
[0.1.9]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.9
|
|
79
91
|
[0.1.8]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.8
|
|
80
92
|
[0.1.7]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.7
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rlm-code
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.9
|
|
4
4
|
Summary: RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems
|
|
5
5
|
Project-URL: Homepage, https://github.com/SuperagenticAI/rlm-code
|
|
6
6
|
Project-URL: Documentation, https://superagenticai.github.io/rlm-code/
|
|
@@ -118,21 +118,20 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
|
|
|
118
118
|
|
|
119
119
|
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
|
|
120
120
|
|
|
121
|
-
## Release v0.1.
|
|
121
|
+
## Release v0.1.9
|
|
122
122
|
|
|
123
|
-
This release
|
|
123
|
+
This release improves Pure RLM repository runs and makes completed trajectories more inspectable from the TUI and replay views.
|
|
124
124
|
|
|
125
|
-
-
|
|
126
|
-
-
|
|
127
|
-
-
|
|
128
|
-
-
|
|
129
|
-
-
|
|
130
|
-
- Dedicated trace analysis docs under the Core Engine section
|
|
125
|
+
- Pure RLM runs now initialize `context` from explicit workspace files mentioned in the task, with a compact repository snapshot fallback
|
|
126
|
+
- Runner events now record context-load metadata for Pure RLM runs
|
|
127
|
+
- Legacy runner JSONL step events replay with action code, observations, success, token counts, and cumulative reward
|
|
128
|
+
- Run visualization now includes REPL code previews, stdout/stderr previews, `llm_query` counts, executed code blocks, finalization status, and REPL variables
|
|
129
|
+
- TUI trajectory and replay views now surface Pure RLM signals directly for completed runs
|
|
131
130
|
|
|
132
131
|
Example:
|
|
133
132
|
|
|
134
133
|
```text
|
|
135
|
-
/rlm run "
|
|
134
|
+
/rlm run "Validate pure_rlm_environment.py and cite context, REPL, llm_query, and FINAL evidence" env=pure_rlm steps=6
|
|
136
135
|
```
|
|
137
136
|
|
|
138
137
|
## Documentation
|
|
@@ -25,21 +25,20 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
|
|
|
25
25
|
|
|
26
26
|
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
|
|
27
27
|
|
|
28
|
-
## Release v0.1.
|
|
28
|
+
## Release v0.1.9
|
|
29
29
|
|
|
30
|
-
This release
|
|
30
|
+
This release improves Pure RLM repository runs and makes completed trajectories more inspectable from the TUI and replay views.
|
|
31
31
|
|
|
32
|
-
-
|
|
33
|
-
-
|
|
34
|
-
-
|
|
35
|
-
-
|
|
36
|
-
-
|
|
37
|
-
- Dedicated trace analysis docs under the Core Engine section
|
|
32
|
+
- Pure RLM runs now initialize `context` from explicit workspace files mentioned in the task, with a compact repository snapshot fallback
|
|
33
|
+
- Runner events now record context-load metadata for Pure RLM runs
|
|
34
|
+
- Legacy runner JSONL step events replay with action code, observations, success, token counts, and cumulative reward
|
|
35
|
+
- Run visualization now includes REPL code previews, stdout/stderr previews, `llm_query` counts, executed code blocks, finalization status, and REPL variables
|
|
36
|
+
- TUI trajectory and replay views now surface Pure RLM signals directly for completed runs
|
|
38
37
|
|
|
39
38
|
Example:
|
|
40
39
|
|
|
41
40
|
```text
|
|
42
|
-
/rlm run "
|
|
41
|
+
/rlm run "Validate pure_rlm_environment.py and cite context, REPL, llm_query, and FINAL evidence" env=pure_rlm steps=6
|
|
43
42
|
```
|
|
44
43
|
|
|
45
44
|
## Documentation
|
|
@@ -9,6 +9,7 @@ from __future__ import annotations
|
|
|
9
9
|
|
|
10
10
|
import hashlib
|
|
11
11
|
import json
|
|
12
|
+
import re
|
|
12
13
|
import threading
|
|
13
14
|
import time
|
|
14
15
|
from dataclasses import asdict, dataclass, is_dataclass
|
|
@@ -29,7 +30,7 @@ from .benchmark_manager import (
|
|
|
29
30
|
)
|
|
30
31
|
from .benchmarks import RLMBenchmarkCase, load_benchmark_packs
|
|
31
32
|
from .chat_session import ChatSessionMixin
|
|
32
|
-
from .context_store import LazyFileContext
|
|
33
|
+
from .context_store import ContextRef, LazyFileContext
|
|
33
34
|
from .delegation import DelegationMixin
|
|
34
35
|
from .environments import (
|
|
35
36
|
DSPyCodingRLMEnvironment,
|
|
@@ -467,6 +468,93 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
|
|
|
467
468
|
allow_unsafe_exec=(selected_backend == "exec" and self._pure_rlm_allow_unsafe_exec),
|
|
468
469
|
)
|
|
469
470
|
|
|
471
|
+
def _extract_task_file_refs(self, task: str, limit: int = 12) -> list[ContextRef]:
|
|
472
|
+
"""Find explicit workspace file references mentioned in a task string."""
|
|
473
|
+
candidates = re.findall(
|
|
474
|
+
r"(?<![\w.-])(?:[\w.-]+/)*[\w.-]+\.(?:py|md|toml|yaml|yml|json|txt|js|jsx|ts|tsx)",
|
|
475
|
+
task,
|
|
476
|
+
)
|
|
477
|
+
seen: set[str] = set()
|
|
478
|
+
refs: list[ContextRef] = []
|
|
479
|
+
for candidate in candidates:
|
|
480
|
+
normalized = candidate.strip().strip("`'\".,:;)")
|
|
481
|
+
if not normalized or normalized in seen:
|
|
482
|
+
continue
|
|
483
|
+
seen.add(normalized)
|
|
484
|
+
refs.append(ContextRef(path=normalized))
|
|
485
|
+
if len(refs) >= limit:
|
|
486
|
+
break
|
|
487
|
+
return refs
|
|
488
|
+
|
|
489
|
+
def _build_pure_rlm_initial_context(self, task: str) -> dict[str, str]:
|
|
490
|
+
"""
|
|
491
|
+
Build a small real-code context for Pure RLM runs.
|
|
492
|
+
|
|
493
|
+
The direct PureRLMEnvironment API expects context to be initialized
|
|
494
|
+
explicitly. Runner/TUI users expect `/rlm run ... env=pure_rlm` to
|
|
495
|
+
start with useful workspace data, so we seed `context` with explicit
|
|
496
|
+
files named in the task, falling back to a compact repository snapshot.
|
|
497
|
+
"""
|
|
498
|
+
refs = self._extract_task_file_refs(task)
|
|
499
|
+
if not refs:
|
|
500
|
+
refs = self.context_store.discover(limit=12)
|
|
501
|
+
|
|
502
|
+
context: dict[str, str] = {}
|
|
503
|
+
for ref in refs:
|
|
504
|
+
snippet = self.context_store.read(ref, max_chars=12000)
|
|
505
|
+
if snippet:
|
|
506
|
+
context[ref.path] = snippet
|
|
507
|
+
|
|
508
|
+
if context:
|
|
509
|
+
return context
|
|
510
|
+
|
|
511
|
+
discovered = self.context_store.discover(limit=80)
|
|
512
|
+
tree = "\n".join(ref.path for ref in discovered)
|
|
513
|
+
return {
|
|
514
|
+
"_workspace": (
|
|
515
|
+
f"Workspace: {self.workdir}\n"
|
|
516
|
+
"No explicit file snippets were loaded. Available files:\n"
|
|
517
|
+
f"{tree}"
|
|
518
|
+
).strip()
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
def _initialize_pure_rlm_run_context(
|
|
522
|
+
self,
|
|
523
|
+
env: RLMEnvironment,
|
|
524
|
+
task: str,
|
|
525
|
+
*,
|
|
526
|
+
run_id: str,
|
|
527
|
+
run_path: Path,
|
|
528
|
+
) -> int:
|
|
529
|
+
"""Initialize `context` for Pure RLM runs and persist a context event."""
|
|
530
|
+
if env.name != "pure_rlm" or not hasattr(env, "initialize_context"):
|
|
531
|
+
return 0
|
|
532
|
+
|
|
533
|
+
context = self._build_pure_rlm_initial_context(task)
|
|
534
|
+
env.initialize_context(
|
|
535
|
+
context,
|
|
536
|
+
description="Workspace files selected for this Pure RLM run",
|
|
537
|
+
additional_vars={"query": task},
|
|
538
|
+
)
|
|
539
|
+
context_event = {
|
|
540
|
+
"type": "context",
|
|
541
|
+
"run_id": run_id,
|
|
542
|
+
"environment": env.name,
|
|
543
|
+
"timestamp": self._utc_now(),
|
|
544
|
+
"context_files": list(context.keys()),
|
|
545
|
+
"context_chars": sum(len(value) for value in context.values()),
|
|
546
|
+
}
|
|
547
|
+
self._append_event(run_path, context_event)
|
|
548
|
+
self._emit_runtime_event(
|
|
549
|
+
"context_load",
|
|
550
|
+
{
|
|
551
|
+
"run_id": run_id,
|
|
552
|
+
"files": len(context),
|
|
553
|
+
"chars": context_event["context_chars"],
|
|
554
|
+
},
|
|
555
|
+
)
|
|
556
|
+
return len(context)
|
|
557
|
+
|
|
470
558
|
def run_task(
|
|
471
559
|
self,
|
|
472
560
|
task: str,
|
|
@@ -596,6 +684,12 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
|
|
|
596
684
|
final_response = ""
|
|
597
685
|
cancelled = False
|
|
598
686
|
trajectory: list[dict[str, Any]] = []
|
|
687
|
+
context_files = self._initialize_pure_rlm_run_context(
|
|
688
|
+
env,
|
|
689
|
+
cleaned_task,
|
|
690
|
+
run_id=run_id,
|
|
691
|
+
run_path=run_path,
|
|
692
|
+
)
|
|
599
693
|
usage_start = self._usage_snapshot()
|
|
600
694
|
self.observability.on_run_start(
|
|
601
695
|
run_id,
|
|
@@ -616,6 +710,7 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
|
|
|
616
710
|
"parent_run_id": _parent_run_id,
|
|
617
711
|
"pure_rlm_backend": self._pure_rlm_backend if env.name == "pure_rlm" else None,
|
|
618
712
|
"pure_rlm_strict": strict_pure_mode if env.name == "pure_rlm" else None,
|
|
713
|
+
"context_files": context_files if env.name == "pure_rlm" else None,
|
|
619
714
|
},
|
|
620
715
|
)
|
|
621
716
|
self._emit_runtime_event(
|
|
@@ -627,6 +722,7 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
|
|
|
627
722
|
"framework": native_framework,
|
|
628
723
|
"depth": _depth,
|
|
629
724
|
"parent_run_id": _parent_run_id,
|
|
725
|
+
"context_files": context_files if env.name == "pure_rlm" else None,
|
|
630
726
|
},
|
|
631
727
|
)
|
|
632
728
|
|
|
@@ -1035,14 +1035,30 @@ def _convert_legacy_step(data: dict[str, Any]) -> SessionEvent:
|
|
|
1035
1035
|
step_type = data.get("type", "")
|
|
1036
1036
|
|
|
1037
1037
|
if step_type == "step":
|
|
1038
|
+
observation = data.get("observation", {})
|
|
1039
|
+
observation_dict = observation if isinstance(observation, dict) else {}
|
|
1040
|
+
action = data.get("action", {})
|
|
1041
|
+
action_dict = action if isinstance(action, dict) else {}
|
|
1042
|
+
success = observation_dict.get("success")
|
|
1043
|
+
if success is None:
|
|
1044
|
+
success = not bool(observation_dict.get("error") or observation_dict.get("stderr"))
|
|
1045
|
+
usage = data.get("usage", {})
|
|
1046
|
+
usage_dict = usage if isinstance(usage, dict) else {}
|
|
1038
1047
|
return SessionEvent(
|
|
1039
1048
|
event_type=SessionEventType.STEP_END,
|
|
1040
1049
|
timestamp=data.get("timestamp", _utc_now()),
|
|
1041
|
-
step=data.get("step", 0),
|
|
1050
|
+
step=int(data.get("step", 0) or 0),
|
|
1042
1051
|
data={
|
|
1043
|
-
"
|
|
1044
|
-
"
|
|
1052
|
+
"step": int(data.get("step", 0) or 0),
|
|
1053
|
+
"timestamp": data.get("timestamp", _utc_now()),
|
|
1054
|
+
"action": action_dict,
|
|
1055
|
+
"observation": observation_dict,
|
|
1045
1056
|
"reward": data.get("reward", 0.0),
|
|
1057
|
+
"success": bool(success),
|
|
1058
|
+
"tokens_used": int(
|
|
1059
|
+
usage_dict.get("prompt_tokens", 0) or 0
|
|
1060
|
+
)
|
|
1061
|
+
+ int(usage_dict.get("completion_tokens", 0) or 0),
|
|
1046
1062
|
},
|
|
1047
1063
|
run_id=data.get("run_id", ""),
|
|
1048
1064
|
depth=data.get("depth", 0),
|
|
@@ -1125,12 +1141,18 @@ def _build_snapshot_from_events(
|
|
|
1125
1141
|
|
|
1126
1142
|
elif event.event_type == SessionEventType.STEP_END:
|
|
1127
1143
|
# Build StepState from accumulated data
|
|
1144
|
+
if "step" not in current_step_data:
|
|
1145
|
+
current_step_data = {
|
|
1146
|
+
"step": int(event.data.get("step", event.step) or 0),
|
|
1147
|
+
"timestamp": str(event.data.get("timestamp", event.timestamp) or ""),
|
|
1148
|
+
}
|
|
1128
1149
|
if "step" in current_step_data:
|
|
1129
1150
|
# Merge any additional data from STEP_END event
|
|
1130
1151
|
if "action" in event.data:
|
|
1131
1152
|
action = event.data["action"]
|
|
1132
1153
|
current_step_data.setdefault("action_type", action.get("action", ""))
|
|
1133
1154
|
current_step_data.setdefault("action_code", action.get("code", ""))
|
|
1155
|
+
current_step_data.setdefault("action_rationale", action.get("reasoning", ""))
|
|
1134
1156
|
current_step_data.setdefault("raw_action", action)
|
|
1135
1157
|
if "observation" in event.data:
|
|
1136
1158
|
obs = event.data["observation"]
|
|
@@ -1138,12 +1160,16 @@ def _build_snapshot_from_events(
|
|
|
1138
1160
|
current_step_data.setdefault("error", obs.get("error", obs.get("stderr", "")))
|
|
1139
1161
|
current_step_data.setdefault("raw_observation", obs)
|
|
1140
1162
|
if "reward" in event.data:
|
|
1163
|
+
reward = float(event.data.get("reward", 0.0) or 0.0)
|
|
1164
|
+
cumulative = event.data.get("cumulative_reward")
|
|
1165
|
+
if cumulative is None:
|
|
1166
|
+
cumulative = total_reward + reward
|
|
1141
1167
|
current_step_data.setdefault("reward", event.data["reward"])
|
|
1142
|
-
current_step_data.setdefault(
|
|
1143
|
-
"cumulative_reward", event.data.get("cumulative_reward", 0.0)
|
|
1144
|
-
)
|
|
1168
|
+
current_step_data.setdefault("cumulative_reward", cumulative)
|
|
1145
1169
|
if "success" in event.data:
|
|
1146
1170
|
current_step_data.setdefault("success", event.data["success"])
|
|
1171
|
+
if "tokens_used" in event.data:
|
|
1172
|
+
current_step_data.setdefault("tokens_used", event.data["tokens_used"])
|
|
1147
1173
|
|
|
1148
1174
|
step_state = StepState(
|
|
1149
1175
|
step=current_step_data.get("step", 0),
|
|
@@ -1163,6 +1189,8 @@ def _build_snapshot_from_events(
|
|
|
1163
1189
|
raw_observation=current_step_data.get("raw_observation", {}),
|
|
1164
1190
|
)
|
|
1165
1191
|
steps.append(step_state)
|
|
1192
|
+
total_reward = float(step_state.cumulative_reward)
|
|
1193
|
+
total_tokens += int(step_state.tokens_used or 0)
|
|
1166
1194
|
current_step_data = {}
|
|
1167
1195
|
|
|
1168
1196
|
elif event.event_type == SessionEventType.MEMORY_UPDATE:
|
|
@@ -62,6 +62,16 @@ def build_run_visualization(
|
|
|
62
62
|
"success": observation_dict.get("success") if "success" in observation_dict else None,
|
|
63
63
|
"path": str(observation_dict.get("path") or ""),
|
|
64
64
|
"children_executed": int(observation_dict.get("children_executed") or 0),
|
|
65
|
+
"planner_preview": _clip_text(str(step.get("planner_raw") or ""), limit=260),
|
|
66
|
+
"code_preview": _clip_text(_action_code(step), limit=260),
|
|
67
|
+
"stdout_preview": _clip_text(str(observation_dict.get("stdout") or ""), limit=260),
|
|
68
|
+
"stderr_preview": _clip_text(str(observation_dict.get("stderr") or ""), limit=180),
|
|
69
|
+
"llm_calls_made": int(observation_dict.get("llm_calls_made") or 0),
|
|
70
|
+
"code_blocks_executed": int(observation_dict.get("code_blocks_executed") or 0),
|
|
71
|
+
"final_detected": bool(observation_dict.get("final_detected", False)),
|
|
72
|
+
"repl_variables": list(observation_dict.get("repl_variables") or [])[:20]
|
|
73
|
+
if isinstance(observation_dict.get("repl_variables"), list)
|
|
74
|
+
else [],
|
|
65
75
|
}
|
|
66
76
|
error = _extract_error(step)
|
|
67
77
|
if error:
|
|
@@ -190,6 +200,19 @@ def _action_name(step: dict[str, Any]) -> str:
|
|
|
190
200
|
return "unknown"
|
|
191
201
|
|
|
192
202
|
|
|
203
|
+
def _action_code(step: dict[str, Any]) -> str:
|
|
204
|
+
action = step.get("action")
|
|
205
|
+
if not isinstance(action, dict):
|
|
206
|
+
return ""
|
|
207
|
+
code = action.get("code")
|
|
208
|
+
if isinstance(code, str) and code.strip():
|
|
209
|
+
return code
|
|
210
|
+
blocks = action.get("_code_blocks")
|
|
211
|
+
if isinstance(blocks, list):
|
|
212
|
+
return "\n\n".join(str(block) for block in blocks if str(block).strip())
|
|
213
|
+
return ""
|
|
214
|
+
|
|
215
|
+
|
|
193
216
|
def _extract_error(step: dict[str, Any]) -> str:
|
|
194
217
|
observation = step.get("observation")
|
|
195
218
|
if not isinstance(observation, dict):
|
|
@@ -2403,14 +2403,40 @@ def run_textual_tui(config_manager: ConfigManager) -> None:
|
|
|
2403
2403
|
if not timeline:
|
|
2404
2404
|
target.update("[dim]No steps recorded in this run.[/dim]")
|
|
2405
2405
|
return
|
|
2406
|
-
lines = [
|
|
2406
|
+
lines = [
|
|
2407
|
+
f"[bold cyan]Trajectory[/bold cyan] [dim]{viz.get('run_id', '')}[/dim]",
|
|
2408
|
+
"[bold cyan]Step Action Reward Success RLM signals[/bold cyan]",
|
|
2409
|
+
]
|
|
2407
2410
|
for entry in timeline:
|
|
2408
2411
|
step = entry.get("step", "?")
|
|
2409
2412
|
action = str(entry.get("action", "?"))[:14].ljust(14)
|
|
2410
2413
|
reward = entry.get("reward", 0.0)
|
|
2411
2414
|
cum = entry.get("cumulative_reward", 0.0)
|
|
2412
|
-
|
|
2413
|
-
|
|
2415
|
+
success = entry.get("success")
|
|
2416
|
+
if success is None:
|
|
2417
|
+
ok = "[dim]-[/dim]"
|
|
2418
|
+
else:
|
|
2419
|
+
ok = "[green]Y[/green]" if success else "[red]N[/red]"
|
|
2420
|
+
signals: list[str] = []
|
|
2421
|
+
if entry.get("code_blocks_executed"):
|
|
2422
|
+
signals.append(f"code={entry.get('code_blocks_executed')}")
|
|
2423
|
+
if entry.get("llm_calls_made"):
|
|
2424
|
+
signals.append(f"llm={entry.get('llm_calls_made')}")
|
|
2425
|
+
if entry.get("final_detected"):
|
|
2426
|
+
signals.append("[green]FINAL[/green]")
|
|
2427
|
+
variables = entry.get("repl_variables") or []
|
|
2428
|
+
if variables:
|
|
2429
|
+
preview_vars = ", ".join(str(item) for item in variables[:5])
|
|
2430
|
+
signals.append(f"vars={preview_vars}")
|
|
2431
|
+
signal_text = " ".join(signals) if signals else "[dim]-[/dim]"
|
|
2432
|
+
lines.append(f" {step:<4} {action} {reward:+.3f} ({cum:.3f}) {ok} {signal_text}")
|
|
2433
|
+
|
|
2434
|
+
code_preview = str(entry.get("code_preview") or "").strip()
|
|
2435
|
+
stdout_preview = str(entry.get("stdout_preview") or "").strip()
|
|
2436
|
+
if code_preview:
|
|
2437
|
+
lines.append(f" [magenta]code[/magenta] {code_preview}")
|
|
2438
|
+
if stdout_preview:
|
|
2439
|
+
lines.append(f" [blue]out [/blue] {stdout_preview}")
|
|
2414
2440
|
target.update("\n".join(lines))
|
|
2415
2441
|
|
|
2416
2442
|
def _apply_view_mode(self) -> None:
|
|
@@ -2842,21 +2868,76 @@ def run_textual_tui(config_manager: ConfigManager) -> None:
|
|
|
2842
2868
|
if self._session_replayer is None:
|
|
2843
2869
|
return
|
|
2844
2870
|
try:
|
|
2871
|
+
state = None
|
|
2845
2872
|
if button_id == "replay_start_btn":
|
|
2846
2873
|
self._session_replayer.goto_start()
|
|
2847
2874
|
elif button_id == "replay_back_btn":
|
|
2848
|
-
self._session_replayer.step_backward()
|
|
2875
|
+
state = self._session_replayer.step_backward()
|
|
2849
2876
|
elif button_id == "replay_fwd_btn":
|
|
2850
|
-
self._session_replayer.step_forward()
|
|
2877
|
+
state = self._session_replayer.step_forward()
|
|
2851
2878
|
elif button_id == "replay_end_btn":
|
|
2852
2879
|
self._session_replayer.goto_end()
|
|
2880
|
+
state = self._session_replayer.get_current_state()
|
|
2853
2881
|
# Update position display
|
|
2854
2882
|
cur = self._session_replayer.current_step
|
|
2855
2883
|
total = self._session_replayer.total_steps
|
|
2856
2884
|
self.query_one("#replay_position", Static).update(f"Step {cur}/{total}")
|
|
2885
|
+
if state is None:
|
|
2886
|
+
state = self._session_replayer.get_current_state()
|
|
2887
|
+
self._render_replay_step_detail(state)
|
|
2857
2888
|
except Exception:
|
|
2858
2889
|
pass
|
|
2859
2890
|
|
|
2891
|
+
def _render_replay_step_detail(self, state: Any | None) -> None:
|
|
2892
|
+
"""Render the current replay step with pure-RLM-specific details."""
|
|
2893
|
+
try:
|
|
2894
|
+
target = self.query_one("#replay_step_detail", Static)
|
|
2895
|
+
except Exception:
|
|
2896
|
+
return
|
|
2897
|
+
if state is None:
|
|
2898
|
+
target.update("[dim]Replay is at the start or end of the run.[/dim]")
|
|
2899
|
+
return
|
|
2900
|
+
|
|
2901
|
+
raw_observation = getattr(state, "raw_observation", {}) or {}
|
|
2902
|
+
raw_action = getattr(state, "raw_action", {}) or {}
|
|
2903
|
+
lines = [
|
|
2904
|
+
f"[bold cyan]Step {getattr(state, 'step', '?')}[/bold cyan] "
|
|
2905
|
+
f"action=[bold]{getattr(state, 'action_type', '') or raw_action.get('action', '')}[/bold] "
|
|
2906
|
+
f"reward={float(getattr(state, 'reward', 0.0) or 0.0):+.3f}",
|
|
2907
|
+
]
|
|
2908
|
+
code = str(getattr(state, "action_code", "") or raw_action.get("code", "") or "").strip()
|
|
2909
|
+
if code:
|
|
2910
|
+
lines.append("")
|
|
2911
|
+
lines.append("[magenta]REPL code[/magenta]")
|
|
2912
|
+
lines.append(code[:1800])
|
|
2913
|
+
|
|
2914
|
+
stdout = str(getattr(state, "output", "") or raw_observation.get("stdout", "") or "").strip()
|
|
2915
|
+
stderr = str(getattr(state, "error", "") or raw_observation.get("stderr", "") or "").strip()
|
|
2916
|
+
if stdout:
|
|
2917
|
+
lines.append("")
|
|
2918
|
+
lines.append("[blue]Observation stdout[/blue]")
|
|
2919
|
+
lines.append(stdout[:1800])
|
|
2920
|
+
if stderr:
|
|
2921
|
+
lines.append("")
|
|
2922
|
+
lines.append("[red]Observation stderr[/red]")
|
|
2923
|
+
lines.append(stderr[:1000])
|
|
2924
|
+
|
|
2925
|
+
signals: list[str] = []
|
|
2926
|
+
if raw_observation.get("code_blocks_executed"):
|
|
2927
|
+
signals.append(f"code_blocks={raw_observation.get('code_blocks_executed')}")
|
|
2928
|
+
if raw_observation.get("llm_calls_made"):
|
|
2929
|
+
signals.append(f"llm_calls={raw_observation.get('llm_calls_made')}")
|
|
2930
|
+
if raw_observation.get("final_detected"):
|
|
2931
|
+
signals.append("FINAL detected")
|
|
2932
|
+
variables = raw_observation.get("repl_variables")
|
|
2933
|
+
if isinstance(variables, list) and variables:
|
|
2934
|
+
signals.append("vars=" + ", ".join(str(item) for item in variables[:12]))
|
|
2935
|
+
if signals:
|
|
2936
|
+
lines.append("")
|
|
2937
|
+
lines.append("[green]RLM signals[/green] " + " ".join(signals))
|
|
2938
|
+
|
|
2939
|
+
target.update("\n".join(lines))
|
|
2940
|
+
|
|
2860
2941
|
def _refresh_research_dashboard(self, run_path: Path) -> None:
|
|
2861
2942
|
"""Populate the Research dashboard from a completed run trace."""
|
|
2862
2943
|
try:
|
|
@@ -2904,7 +2985,7 @@ def run_textual_tui(config_manager: ConfigManager) -> None:
|
|
|
2904
2985
|
chart.values = [pt.get("cumulative_reward", 0.0) for pt in reward_curve]
|
|
2905
2986
|
|
|
2906
2987
|
self.query_one("#replay_step_detail", Static).update(
|
|
2907
|
-
"[dim]Use < > buttons to step through the run.[/dim]"
|
|
2988
|
+
"[dim]Use < > buttons to step through the run. Each step will show REPL code, observations, and pure-RLM signals.[/dim]"
|
|
2908
2989
|
)
|
|
2909
2990
|
self._set_research_sub_view("replay")
|
|
2910
2991
|
except Exception as exc:
|
|
@@ -761,6 +761,62 @@ class TestLoadSession:
|
|
|
761
761
|
replayer = load_session(jsonl_path)
|
|
762
762
|
assert replayer.total_steps >= 1
|
|
763
763
|
|
|
764
|
+
def test_load_runner_jsonl_step_events(self):
|
|
765
|
+
"""Runner JSONL step/final events should replay with useful state."""
|
|
766
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
767
|
+
jsonl_path = Path(tmpdir) / "runner.jsonl"
|
|
768
|
+
events = [
|
|
769
|
+
{
|
|
770
|
+
"type": "step",
|
|
771
|
+
"run_id": "run_demo",
|
|
772
|
+
"environment": "pure_rlm",
|
|
773
|
+
"task": "Validate pure RLM",
|
|
774
|
+
"timestamp": "2026-06-25T10:00:01+00:00",
|
|
775
|
+
"step": 1,
|
|
776
|
+
"action": {
|
|
777
|
+
"action": "run_repl",
|
|
778
|
+
"code": "print(context.keys())",
|
|
779
|
+
"reasoning": "Inspect context",
|
|
780
|
+
},
|
|
781
|
+
"observation": {
|
|
782
|
+
"success": True,
|
|
783
|
+
"stdout": "dict_keys(['a.py'])",
|
|
784
|
+
"llm_calls_made": 1,
|
|
785
|
+
"code_blocks_executed": 1,
|
|
786
|
+
"repl_variables": ["context", "answer"],
|
|
787
|
+
},
|
|
788
|
+
"reward": 0.4,
|
|
789
|
+
"usage": {"prompt_tokens": 10, "completion_tokens": 5},
|
|
790
|
+
},
|
|
791
|
+
{
|
|
792
|
+
"type": "final",
|
|
793
|
+
"run_id": "run_demo",
|
|
794
|
+
"environment": "pure_rlm",
|
|
795
|
+
"task": "Validate pure RLM",
|
|
796
|
+
"timestamp": "2026-06-25T10:00:02+00:00",
|
|
797
|
+
"completed": True,
|
|
798
|
+
"steps": 1,
|
|
799
|
+
"total_reward": 0.4,
|
|
800
|
+
"final_response": "Yes",
|
|
801
|
+
"usage": {"prompt_tokens": 10, "completion_tokens": 5},
|
|
802
|
+
},
|
|
803
|
+
]
|
|
804
|
+
with jsonl_path.open("w") as f:
|
|
805
|
+
for event in events:
|
|
806
|
+
f.write(json.dumps(event) + "\n")
|
|
807
|
+
|
|
808
|
+
replayer = load_session(jsonl_path)
|
|
809
|
+
|
|
810
|
+
assert replayer.total_steps == 1
|
|
811
|
+
assert replayer.snapshot.completed is True
|
|
812
|
+
assert replayer.snapshot.final_answer == "Yes"
|
|
813
|
+
step = replayer.step_forward()
|
|
814
|
+
assert step is not None
|
|
815
|
+
assert step.action_type == "run_repl"
|
|
816
|
+
assert step.action_code == "print(context.keys())"
|
|
817
|
+
assert step.output == "dict_keys(['a.py'])"
|
|
818
|
+
assert step.raw_observation["llm_calls_made"] == 1
|
|
819
|
+
|
|
764
820
|
|
|
765
821
|
class TestCreateRecorder:
|
|
766
822
|
"""Tests for create_recorder convenience function."""
|
|
@@ -1414,6 +1414,39 @@ def test_rlm_pure_strict_blocks_delegate_actions(tmp_path):
|
|
|
1414
1414
|
assert "delegate action is disabled" in str(observation)
|
|
1415
1415
|
|
|
1416
1416
|
|
|
1417
|
+
def test_rlm_pure_run_initializes_context_from_task_files(tmp_path):
|
|
1418
|
+
source = tmp_path / "demo_module.py"
|
|
1419
|
+
source.write_text("VALUE = 42\n", encoding="utf-8")
|
|
1420
|
+
connector = _FakeConnector(
|
|
1421
|
+
responses=[
|
|
1422
|
+
'```repl\nfinal_answer = list(context.keys())\nFINAL_VAR("final_answer")\n```',
|
|
1423
|
+
]
|
|
1424
|
+
)
|
|
1425
|
+
runner = RLMRunner(
|
|
1426
|
+
llm_connector=connector,
|
|
1427
|
+
execution_engine=_ConfigurableExecutionEngine(pure_rlm_backend="exec"),
|
|
1428
|
+
run_dir=tmp_path / "runs",
|
|
1429
|
+
workdir=tmp_path,
|
|
1430
|
+
)
|
|
1431
|
+
|
|
1432
|
+
result = runner.run_task(
|
|
1433
|
+
"Inspect demo_module.py",
|
|
1434
|
+
max_steps=1,
|
|
1435
|
+
exec_timeout=5,
|
|
1436
|
+
environment="pure_rlm",
|
|
1437
|
+
)
|
|
1438
|
+
|
|
1439
|
+
assert result.completed is True
|
|
1440
|
+
assert "demo_module.py" in result.final_response
|
|
1441
|
+
events = runner.load_run_events(result.run_id)
|
|
1442
|
+
context_event = next(event for event in events if event.get("type") == "context")
|
|
1443
|
+
assert context_event["context_files"] == ["demo_module.py"]
|
|
1444
|
+
step_event = next(event for event in events if event.get("type") == "step")
|
|
1445
|
+
observation = step_event.get("observation", {})
|
|
1446
|
+
assert observation.get("final_detected") is True
|
|
1447
|
+
assert "context" in observation.get("repl_variables", [])
|
|
1448
|
+
|
|
1449
|
+
|
|
1417
1450
|
def test_rlm_runner_blocks_exec_without_unsafe_opt_in(tmp_path):
|
|
1418
1451
|
engine = _ConfigurableExecutionEngine(
|
|
1419
1452
|
pure_rlm_backend="exec",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|