rlm-code 0.1.7__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rlm_code-0.1.7 → rlm_code-0.1.9}/.gitignore +1 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/CHANGELOG.md +20 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/PKG-INFO +9 -9
- {rlm_code-0.1.7 → rlm_code-0.1.9}/README.md +8 -8
- {rlm_code-0.1.7 → rlm_code-0.1.9}/pyproject.toml +1 -1
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/__init__.py +1 -1
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/__init__.py +1 -1
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/environments.py +32 -1
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/runner.py +97 -1
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/session_replay.py +34 -6
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/visualizer.py +23 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/traces/store.py +226 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/tui_app.py +87 -6
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_session_replay.py +56 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_rlm_runner.py +33 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_trace_analysis.py +29 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/LICENSE +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/NOTICE +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/agent.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/agents/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/agents/rlm_agent.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/callbacks/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/callbacks/code_execution.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/cli.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/code_executor.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/events.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/base.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/lazy.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/loader.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/parsers/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/parsers/base.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/parsers/pdf.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/parsers/text.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/sources/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/sources/base.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/sources/gcs.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/files/sources/local.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/llm.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/logging/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/logging/rlm_logger.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/logging/verbose.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/main.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/prompts.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/repl/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/repl/local_repl.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/repl/safe_builtins.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/templates/index.html +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/tools/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/types.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/usage.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/adk_rlm/web.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/eval/packs/README.md +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/__main__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/config_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/create_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/demo_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/export_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/init_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/interactive_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/mcp_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/models_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/nl_command_router.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/optimize_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/run_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/commands/slash_commands.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/config.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/debug_logger.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/directory_utils.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/exceptions.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/logging.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/venv_utils.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/core/version_checker.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/examples/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/examples/phase2_demo.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/examples/phase3_demo.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/examples/phase4_demo.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/examples/pure_rlm_demo.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/execution/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/execution/engine.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/execution/sandbox.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/export/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/export/handler.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/export/package_builder.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/generators/evaluation_generator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/generators/gepa_generator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/harness/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/harness/registry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/harness/runner.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/main.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/client_manager.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/config.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/exceptions.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/retry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/server/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/server/rlm_server.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/server/tools.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/session_wrapper.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/transports/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/transports/factory.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/transports/sse_transport.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/transports/stdio_transport.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/transports/websocket_transport.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/mcp/utils.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/cache.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/code_generator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/dspy_reference_loader.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/llm_connector.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/model_manager.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/providers/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/providers/acp_discovery.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/providers/local_discovery.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/providers/model_catalog.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/providers/registry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/streaming.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/models/task_collector.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/optimization/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/optimization/data_collector.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/optimization/executor.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/optimization/workflow_manager.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/project/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/project/context_manager.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/project/dspy_md_generator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/project/initializer.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/project/scanner.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/py.typed +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/action_planner.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/approval/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/approval/audit.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/approval/gate.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/approval/handlers.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/approval/policy.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/benchmark_manager.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/benchmarks.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/chat_session.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/code_interpreter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/comparison.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/config_schema.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/context_store.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/delegation.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/docker_interpreter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/events.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/adk_rlm_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/base.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/deepagents_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/dspy_rlm_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/google_adk_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/pydantic_ai_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/frameworks/registry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/leaderboard.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/memory_compaction.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/mock_interpreter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/monty_interpreter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/observability.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/observability_sinks.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/policies/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/policies/action_policies.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/policies/base.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/policies/compaction_policies.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/policies/registry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/policies/reward_policies.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/policies/termination_policies.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/pure_rlm_environment.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/repl_types.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/theme.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/widgets/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/widgets/animated.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/research_tui/widgets/panels.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/task_signature.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/termination.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/rlm/trajectory.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/apple_container_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/base.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/daytona_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/e2b_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/cloud/modal_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/command_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/docker_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/local_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/monty_runtime.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/runtimes/registry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/sandbox/superbox.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/session/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/session/state_manager.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/.env.example +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/adapters.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/async_streaming.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/complete_programs.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/dspy_config_example.yaml +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/evaluation.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/industry_templates.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/optimizers.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/retrievers.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/templates/rlm_benchmarks_example.yaml +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/tests/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/tests/rlm/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/tests/rlm/test_phase2.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/tests/rlm/test_pure_rlm.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/traces/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/traces/index.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/traces/models.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/agent_collab_view.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/animations.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/conversation.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/design_system.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/diff_viewer.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/notifications.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/persistent_shell.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/prompt_widget.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/prompts.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/pty_terminal.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/resizable_divider.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/thinking_display.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/tui_utils.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/ui/welcome.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/anti_patterns.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/auto_fixer.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/best_practices.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/code_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/config_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/exceptions.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/input_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/learning_integration.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/models.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/module_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/predictor_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/quality_scorer.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/report_generator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/security.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/security_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/signature_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/rlm_code/validation/validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/__init__.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/conftest.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/fixtures/rlm_ci_baseline_generic_smoke.json +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_adk_rlm_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_code_interpreter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_deepagents_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_dspy_rlm_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_extract_fallback.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_framework_registry_coverage.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_google_adk_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_leaderboard.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_mock_interpreter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_monty_interpreter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_observability_sinks.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_p0_features.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_phase3.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_phase4.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_pure_rlm_runtime_modes.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_pydantic_ai_adapter.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_repl_history.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_security_hardening.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_submit.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_task_signature.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/rlm/test_user_tools.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_anti_patterns.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_auto_fixer.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_cache.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_execution_engine.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_export_import.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_harness_registry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_harness_runner.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_init_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_integration.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_learning_integration.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_mcp_utils.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_module_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_optimization_workflow.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_persistent_shell.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_predictor_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_project_scanner.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_prompt_widget.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_property_validators.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_provider_discovery.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_provider_registry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_quality_scorer.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_report_generator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_retry.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_rlm_config.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_rlm_dspy_environment.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_rlm_observability.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_sandbox_runtimes.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_security_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_session_management.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_signature_validator.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_slash_harness_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_slash_rlm_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_slash_sandbox_command.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_streaming.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_superbox.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_tui_utils.py +0 -0
- {rlm_code-0.1.7 → rlm_code-0.1.9}/tests/test_validation.py +0 -0
|
@@ -5,6 +5,24 @@ All notable changes to this project are documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.1.9] - 2026-06-26
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Pure RLM runner context initialization from explicit workspace file references in the task, with compact repository snapshot fallback.
|
|
12
|
+
- Context-load events for Pure RLM runs, including loaded file names and total context characters.
|
|
13
|
+
- Runner JSONL replay coverage for action code, observations, success state, token counts, and cumulative reward.
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
- TUI trajectory and replay views now show Pure RLM signals including REPL code, stdout/stderr previews, `llm_query` counts, executed code blocks, finalization status, and REPL variables.
|
|
17
|
+
- Run visualization now includes richer Pure RLM previews for completed runs.
|
|
18
|
+
|
|
19
|
+
## [0.1.8] - 2026-05-01
|
|
20
|
+
|
|
21
|
+
### Added
|
|
22
|
+
- AHE-style layered trace evidence corpus export from `TraceStore`.
|
|
23
|
+
- New `trace_analysis` action `export_evidence_corpus` for writing `overview.md`, per-trace detail reports, `index.json`, and optional processed raw JSONL spans.
|
|
24
|
+
- Evidence corpus tests covering direct store export and environment action export.
|
|
25
|
+
|
|
8
26
|
## [0.1.7] - 2026-04-30
|
|
9
27
|
|
|
10
28
|
### Added
|
|
@@ -69,4 +87,6 @@ Initial public release of **RLM Code**.
|
|
|
69
87
|
|
|
70
88
|
[0.1.5]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.5
|
|
71
89
|
[0.1.6]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.6
|
|
90
|
+
[0.1.9]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.9
|
|
91
|
+
[0.1.8]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.8
|
|
72
92
|
[0.1.7]: https://github.com/SuperagenticAI/rlm-code/releases/tag/v0.1.7
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rlm-code
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.9
|
|
4
4
|
Summary: RLM Code: Research Playground & Evaluation OS for Recursive Language Model Agentic Systems
|
|
5
5
|
Project-URL: Homepage, https://github.com/SuperagenticAI/rlm-code
|
|
6
6
|
Project-URL: Documentation, https://superagenticai.github.io/rlm-code/
|
|
@@ -118,20 +118,20 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
|
|
|
118
118
|
|
|
119
119
|
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
|
|
120
120
|
|
|
121
|
-
## Release v0.1.
|
|
121
|
+
## Release v0.1.9
|
|
122
122
|
|
|
123
|
-
This release
|
|
123
|
+
This release improves Pure RLM repository runs and makes completed trajectories more inspectable from the TUI and replay views.
|
|
124
124
|
|
|
125
|
-
-
|
|
126
|
-
-
|
|
127
|
-
-
|
|
128
|
-
-
|
|
129
|
-
-
|
|
125
|
+
- Pure RLM runs now initialize `context` from explicit workspace files mentioned in the task, with a compact repository snapshot fallback
|
|
126
|
+
- Runner events now record context-load metadata for Pure RLM runs
|
|
127
|
+
- Legacy runner JSONL step events replay with action code, observations, success, token counts, and cumulative reward
|
|
128
|
+
- Run visualization now includes REPL code previews, stdout/stderr previews, `llm_query` counts, executed code blocks, finalization status, and REPL variables
|
|
129
|
+
- TUI trajectory and replay views now surface Pure RLM signals directly for completed runs
|
|
130
130
|
|
|
131
131
|
Example:
|
|
132
132
|
|
|
133
133
|
```text
|
|
134
|
-
/rlm run "
|
|
134
|
+
/rlm run "Validate pure_rlm_environment.py and cite context, REPL, llm_query, and FINAL evidence" env=pure_rlm steps=6
|
|
135
135
|
```
|
|
136
136
|
|
|
137
137
|
## Documentation
|
|
@@ -25,20 +25,20 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
|
|
|
25
25
|
|
|
26
26
|
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
|
|
27
27
|
|
|
28
|
-
## Release v0.1.
|
|
28
|
+
## Release v0.1.9
|
|
29
29
|
|
|
30
|
-
This release
|
|
30
|
+
This release improves Pure RLM repository runs and makes completed trajectories more inspectable from the TUI and replay views.
|
|
31
31
|
|
|
32
|
-
-
|
|
33
|
-
-
|
|
34
|
-
-
|
|
35
|
-
-
|
|
36
|
-
-
|
|
32
|
+
- Pure RLM runs now initialize `context` from explicit workspace files mentioned in the task, with a compact repository snapshot fallback
|
|
33
|
+
- Runner events now record context-load metadata for Pure RLM runs
|
|
34
|
+
- Legacy runner JSONL step events replay with action code, observations, success, token counts, and cumulative reward
|
|
35
|
+
- Run visualization now includes REPL code previews, stdout/stderr previews, `llm_query` counts, executed code blocks, finalization status, and REPL variables
|
|
36
|
+
- TUI trajectory and replay views now surface Pure RLM signals directly for completed runs
|
|
37
37
|
|
|
38
38
|
Example:
|
|
39
39
|
|
|
40
40
|
```text
|
|
41
|
-
/rlm run "
|
|
41
|
+
/rlm run "Validate pure_rlm_environment.py and cite context, REPL, llm_query, and FINAL evidence" env=pure_rlm steps=6
|
|
42
42
|
```
|
|
43
43
|
|
|
44
44
|
## Documentation
|
|
@@ -306,8 +306,10 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
|
|
|
306
306
|
"Return ONLY valid JSON object with keys:\n"
|
|
307
307
|
"{"
|
|
308
308
|
'"action": "set_trace_path" | "get_dataset_overview" | "query_traces" | '
|
|
309
|
-
'"count_traces" | "view_trace" | "search_trace" | "view_spans" |
|
|
309
|
+
'"count_traces" | "view_trace" | "search_trace" | "view_spans" | '
|
|
310
|
+
'"export_evidence_corpus" | "final", '
|
|
310
311
|
'"trace_path": "<path to JSONL traces>", '
|
|
312
|
+
'"output_dir": "<directory for exported evidence corpus>", '
|
|
311
313
|
'"filters": {"has_errors": true, "model_names": ["..."], "service_names": ["..."], '
|
|
312
314
|
'"agent_names": ["..."], "project_id": "..."}, '
|
|
313
315
|
'"trace_id": "<trace id>", '
|
|
@@ -324,6 +326,7 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
|
|
|
324
326
|
"- Always begin analysis with get_dataset_overview.\n"
|
|
325
327
|
"- Use query_traces to choose real trace ids; never invent trace ids.\n"
|
|
326
328
|
"- For large traces, prefer search_trace followed by view_spans.\n"
|
|
329
|
+
"- Use export_evidence_corpus when the caller needs files for MetaHarness or another coding agent.\n"
|
|
327
330
|
"- Identify systemic harness failures, not one-off anomalies.\n"
|
|
328
331
|
"- Output JSON only."
|
|
329
332
|
)
|
|
@@ -448,6 +451,21 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
|
|
|
448
451
|
reward=0.7,
|
|
449
452
|
memory_note=f"Viewed selected spans for trace {trace_id}.",
|
|
450
453
|
)
|
|
454
|
+
if action_name == "export_evidence_corpus":
|
|
455
|
+
output_dir = self._required_str(action, "output_dir")
|
|
456
|
+
resolved_output = Path(output_dir).expanduser()
|
|
457
|
+
if not resolved_output.is_absolute():
|
|
458
|
+
resolved_output = self.workdir / resolved_output
|
|
459
|
+
return self._ok(
|
|
460
|
+
observation=store.export_evidence_corpus(
|
|
461
|
+
resolved_output,
|
|
462
|
+
filters,
|
|
463
|
+
limit=self._int_arg(action, "limit", 100, minimum=1, maximum=1000),
|
|
464
|
+
include_raw=self._bool_arg(action, "include_raw", True),
|
|
465
|
+
),
|
|
466
|
+
reward=0.75,
|
|
467
|
+
memory_note="Exported layered trace evidence corpus.",
|
|
468
|
+
)
|
|
451
469
|
except Exception as exc:
|
|
452
470
|
return EnvironmentActionResult(
|
|
453
471
|
observation={"success": False, "error": f"{type(exc).__name__}: {exc}"},
|
|
@@ -530,6 +548,19 @@ class TraceAnalysisEnvironment(GenericRLMEnvironment):
|
|
|
530
548
|
parsed = default
|
|
531
549
|
return max(minimum, min(maximum, parsed))
|
|
532
550
|
|
|
551
|
+
@staticmethod
|
|
552
|
+
def _bool_arg(action: dict[str, Any], key: str, default: bool) -> bool:
|
|
553
|
+
value = action.get(key, default)
|
|
554
|
+
if isinstance(value, bool):
|
|
555
|
+
return value
|
|
556
|
+
if isinstance(value, str):
|
|
557
|
+
normalized = value.strip().lower()
|
|
558
|
+
if normalized in {"1", "true", "yes", "on"}:
|
|
559
|
+
return True
|
|
560
|
+
if normalized in {"0", "false", "no", "off"}:
|
|
561
|
+
return False
|
|
562
|
+
return default
|
|
563
|
+
|
|
533
564
|
|
|
534
565
|
class DSPyCodingRLMEnvironment(GenericRLMEnvironment):
|
|
535
566
|
"""DSPy-focused environment with file edit + tests + DSPy-aware scoring."""
|
|
@@ -9,6 +9,7 @@ from __future__ import annotations
|
|
|
9
9
|
|
|
10
10
|
import hashlib
|
|
11
11
|
import json
|
|
12
|
+
import re
|
|
12
13
|
import threading
|
|
13
14
|
import time
|
|
14
15
|
from dataclasses import asdict, dataclass, is_dataclass
|
|
@@ -29,7 +30,7 @@ from .benchmark_manager import (
|
|
|
29
30
|
)
|
|
30
31
|
from .benchmarks import RLMBenchmarkCase, load_benchmark_packs
|
|
31
32
|
from .chat_session import ChatSessionMixin
|
|
32
|
-
from .context_store import LazyFileContext
|
|
33
|
+
from .context_store import ContextRef, LazyFileContext
|
|
33
34
|
from .delegation import DelegationMixin
|
|
34
35
|
from .environments import (
|
|
35
36
|
DSPyCodingRLMEnvironment,
|
|
@@ -467,6 +468,93 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
|
|
|
467
468
|
allow_unsafe_exec=(selected_backend == "exec" and self._pure_rlm_allow_unsafe_exec),
|
|
468
469
|
)
|
|
469
470
|
|
|
471
|
+
def _extract_task_file_refs(self, task: str, limit: int = 12) -> list[ContextRef]:
|
|
472
|
+
"""Find explicit workspace file references mentioned in a task string."""
|
|
473
|
+
candidates = re.findall(
|
|
474
|
+
r"(?<![\w.-])(?:[\w.-]+/)*[\w.-]+\.(?:py|md|toml|yaml|yml|json|txt|js|jsx|ts|tsx)",
|
|
475
|
+
task,
|
|
476
|
+
)
|
|
477
|
+
seen: set[str] = set()
|
|
478
|
+
refs: list[ContextRef] = []
|
|
479
|
+
for candidate in candidates:
|
|
480
|
+
normalized = candidate.strip().strip("`'\".,:;)")
|
|
481
|
+
if not normalized or normalized in seen:
|
|
482
|
+
continue
|
|
483
|
+
seen.add(normalized)
|
|
484
|
+
refs.append(ContextRef(path=normalized))
|
|
485
|
+
if len(refs) >= limit:
|
|
486
|
+
break
|
|
487
|
+
return refs
|
|
488
|
+
|
|
489
|
+
def _build_pure_rlm_initial_context(self, task: str) -> dict[str, str]:
|
|
490
|
+
"""
|
|
491
|
+
Build a small real-code context for Pure RLM runs.
|
|
492
|
+
|
|
493
|
+
The direct PureRLMEnvironment API expects context to be initialized
|
|
494
|
+
explicitly. Runner/TUI users expect `/rlm run ... env=pure_rlm` to
|
|
495
|
+
start with useful workspace data, so we seed `context` with explicit
|
|
496
|
+
files named in the task, falling back to a compact repository snapshot.
|
|
497
|
+
"""
|
|
498
|
+
refs = self._extract_task_file_refs(task)
|
|
499
|
+
if not refs:
|
|
500
|
+
refs = self.context_store.discover(limit=12)
|
|
501
|
+
|
|
502
|
+
context: dict[str, str] = {}
|
|
503
|
+
for ref in refs:
|
|
504
|
+
snippet = self.context_store.read(ref, max_chars=12000)
|
|
505
|
+
if snippet:
|
|
506
|
+
context[ref.path] = snippet
|
|
507
|
+
|
|
508
|
+
if context:
|
|
509
|
+
return context
|
|
510
|
+
|
|
511
|
+
discovered = self.context_store.discover(limit=80)
|
|
512
|
+
tree = "\n".join(ref.path for ref in discovered)
|
|
513
|
+
return {
|
|
514
|
+
"_workspace": (
|
|
515
|
+
f"Workspace: {self.workdir}\n"
|
|
516
|
+
"No explicit file snippets were loaded. Available files:\n"
|
|
517
|
+
f"{tree}"
|
|
518
|
+
).strip()
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
def _initialize_pure_rlm_run_context(
|
|
522
|
+
self,
|
|
523
|
+
env: RLMEnvironment,
|
|
524
|
+
task: str,
|
|
525
|
+
*,
|
|
526
|
+
run_id: str,
|
|
527
|
+
run_path: Path,
|
|
528
|
+
) -> int:
|
|
529
|
+
"""Initialize `context` for Pure RLM runs and persist a context event."""
|
|
530
|
+
if env.name != "pure_rlm" or not hasattr(env, "initialize_context"):
|
|
531
|
+
return 0
|
|
532
|
+
|
|
533
|
+
context = self._build_pure_rlm_initial_context(task)
|
|
534
|
+
env.initialize_context(
|
|
535
|
+
context,
|
|
536
|
+
description="Workspace files selected for this Pure RLM run",
|
|
537
|
+
additional_vars={"query": task},
|
|
538
|
+
)
|
|
539
|
+
context_event = {
|
|
540
|
+
"type": "context",
|
|
541
|
+
"run_id": run_id,
|
|
542
|
+
"environment": env.name,
|
|
543
|
+
"timestamp": self._utc_now(),
|
|
544
|
+
"context_files": list(context.keys()),
|
|
545
|
+
"context_chars": sum(len(value) for value in context.values()),
|
|
546
|
+
}
|
|
547
|
+
self._append_event(run_path, context_event)
|
|
548
|
+
self._emit_runtime_event(
|
|
549
|
+
"context_load",
|
|
550
|
+
{
|
|
551
|
+
"run_id": run_id,
|
|
552
|
+
"files": len(context),
|
|
553
|
+
"chars": context_event["context_chars"],
|
|
554
|
+
},
|
|
555
|
+
)
|
|
556
|
+
return len(context)
|
|
557
|
+
|
|
470
558
|
def run_task(
|
|
471
559
|
self,
|
|
472
560
|
task: str,
|
|
@@ -596,6 +684,12 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
|
|
|
596
684
|
final_response = ""
|
|
597
685
|
cancelled = False
|
|
598
686
|
trajectory: list[dict[str, Any]] = []
|
|
687
|
+
context_files = self._initialize_pure_rlm_run_context(
|
|
688
|
+
env,
|
|
689
|
+
cleaned_task,
|
|
690
|
+
run_id=run_id,
|
|
691
|
+
run_path=run_path,
|
|
692
|
+
)
|
|
599
693
|
usage_start = self._usage_snapshot()
|
|
600
694
|
self.observability.on_run_start(
|
|
601
695
|
run_id,
|
|
@@ -616,6 +710,7 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
|
|
|
616
710
|
"parent_run_id": _parent_run_id,
|
|
617
711
|
"pure_rlm_backend": self._pure_rlm_backend if env.name == "pure_rlm" else None,
|
|
618
712
|
"pure_rlm_strict": strict_pure_mode if env.name == "pure_rlm" else None,
|
|
713
|
+
"context_files": context_files if env.name == "pure_rlm" else None,
|
|
619
714
|
},
|
|
620
715
|
)
|
|
621
716
|
self._emit_runtime_event(
|
|
@@ -627,6 +722,7 @@ class RLMRunner(BenchmarkManagerMixin, ChatSessionMixin, DelegationMixin, Action
|
|
|
627
722
|
"framework": native_framework,
|
|
628
723
|
"depth": _depth,
|
|
629
724
|
"parent_run_id": _parent_run_id,
|
|
725
|
+
"context_files": context_files if env.name == "pure_rlm" else None,
|
|
630
726
|
},
|
|
631
727
|
)
|
|
632
728
|
|
|
@@ -1035,14 +1035,30 @@ def _convert_legacy_step(data: dict[str, Any]) -> SessionEvent:
|
|
|
1035
1035
|
step_type = data.get("type", "")
|
|
1036
1036
|
|
|
1037
1037
|
if step_type == "step":
|
|
1038
|
+
observation = data.get("observation", {})
|
|
1039
|
+
observation_dict = observation if isinstance(observation, dict) else {}
|
|
1040
|
+
action = data.get("action", {})
|
|
1041
|
+
action_dict = action if isinstance(action, dict) else {}
|
|
1042
|
+
success = observation_dict.get("success")
|
|
1043
|
+
if success is None:
|
|
1044
|
+
success = not bool(observation_dict.get("error") or observation_dict.get("stderr"))
|
|
1045
|
+
usage = data.get("usage", {})
|
|
1046
|
+
usage_dict = usage if isinstance(usage, dict) else {}
|
|
1038
1047
|
return SessionEvent(
|
|
1039
1048
|
event_type=SessionEventType.STEP_END,
|
|
1040
1049
|
timestamp=data.get("timestamp", _utc_now()),
|
|
1041
|
-
step=data.get("step", 0),
|
|
1050
|
+
step=int(data.get("step", 0) or 0),
|
|
1042
1051
|
data={
|
|
1043
|
-
"
|
|
1044
|
-
"
|
|
1052
|
+
"step": int(data.get("step", 0) or 0),
|
|
1053
|
+
"timestamp": data.get("timestamp", _utc_now()),
|
|
1054
|
+
"action": action_dict,
|
|
1055
|
+
"observation": observation_dict,
|
|
1045
1056
|
"reward": data.get("reward", 0.0),
|
|
1057
|
+
"success": bool(success),
|
|
1058
|
+
"tokens_used": int(
|
|
1059
|
+
usage_dict.get("prompt_tokens", 0) or 0
|
|
1060
|
+
)
|
|
1061
|
+
+ int(usage_dict.get("completion_tokens", 0) or 0),
|
|
1046
1062
|
},
|
|
1047
1063
|
run_id=data.get("run_id", ""),
|
|
1048
1064
|
depth=data.get("depth", 0),
|
|
@@ -1125,12 +1141,18 @@ def _build_snapshot_from_events(
|
|
|
1125
1141
|
|
|
1126
1142
|
elif event.event_type == SessionEventType.STEP_END:
|
|
1127
1143
|
# Build StepState from accumulated data
|
|
1144
|
+
if "step" not in current_step_data:
|
|
1145
|
+
current_step_data = {
|
|
1146
|
+
"step": int(event.data.get("step", event.step) or 0),
|
|
1147
|
+
"timestamp": str(event.data.get("timestamp", event.timestamp) or ""),
|
|
1148
|
+
}
|
|
1128
1149
|
if "step" in current_step_data:
|
|
1129
1150
|
# Merge any additional data from STEP_END event
|
|
1130
1151
|
if "action" in event.data:
|
|
1131
1152
|
action = event.data["action"]
|
|
1132
1153
|
current_step_data.setdefault("action_type", action.get("action", ""))
|
|
1133
1154
|
current_step_data.setdefault("action_code", action.get("code", ""))
|
|
1155
|
+
current_step_data.setdefault("action_rationale", action.get("reasoning", ""))
|
|
1134
1156
|
current_step_data.setdefault("raw_action", action)
|
|
1135
1157
|
if "observation" in event.data:
|
|
1136
1158
|
obs = event.data["observation"]
|
|
@@ -1138,12 +1160,16 @@ def _build_snapshot_from_events(
|
|
|
1138
1160
|
current_step_data.setdefault("error", obs.get("error", obs.get("stderr", "")))
|
|
1139
1161
|
current_step_data.setdefault("raw_observation", obs)
|
|
1140
1162
|
if "reward" in event.data:
|
|
1163
|
+
reward = float(event.data.get("reward", 0.0) or 0.0)
|
|
1164
|
+
cumulative = event.data.get("cumulative_reward")
|
|
1165
|
+
if cumulative is None:
|
|
1166
|
+
cumulative = total_reward + reward
|
|
1141
1167
|
current_step_data.setdefault("reward", event.data["reward"])
|
|
1142
|
-
current_step_data.setdefault(
|
|
1143
|
-
"cumulative_reward", event.data.get("cumulative_reward", 0.0)
|
|
1144
|
-
)
|
|
1168
|
+
current_step_data.setdefault("cumulative_reward", cumulative)
|
|
1145
1169
|
if "success" in event.data:
|
|
1146
1170
|
current_step_data.setdefault("success", event.data["success"])
|
|
1171
|
+
if "tokens_used" in event.data:
|
|
1172
|
+
current_step_data.setdefault("tokens_used", event.data["tokens_used"])
|
|
1147
1173
|
|
|
1148
1174
|
step_state = StepState(
|
|
1149
1175
|
step=current_step_data.get("step", 0),
|
|
@@ -1163,6 +1189,8 @@ def _build_snapshot_from_events(
|
|
|
1163
1189
|
raw_observation=current_step_data.get("raw_observation", {}),
|
|
1164
1190
|
)
|
|
1165
1191
|
steps.append(step_state)
|
|
1192
|
+
total_reward = float(step_state.cumulative_reward)
|
|
1193
|
+
total_tokens += int(step_state.tokens_used or 0)
|
|
1166
1194
|
current_step_data = {}
|
|
1167
1195
|
|
|
1168
1196
|
elif event.event_type == SessionEventType.MEMORY_UPDATE:
|
|
@@ -62,6 +62,16 @@ def build_run_visualization(
|
|
|
62
62
|
"success": observation_dict.get("success") if "success" in observation_dict else None,
|
|
63
63
|
"path": str(observation_dict.get("path") or ""),
|
|
64
64
|
"children_executed": int(observation_dict.get("children_executed") or 0),
|
|
65
|
+
"planner_preview": _clip_text(str(step.get("planner_raw") or ""), limit=260),
|
|
66
|
+
"code_preview": _clip_text(_action_code(step), limit=260),
|
|
67
|
+
"stdout_preview": _clip_text(str(observation_dict.get("stdout") or ""), limit=260),
|
|
68
|
+
"stderr_preview": _clip_text(str(observation_dict.get("stderr") or ""), limit=180),
|
|
69
|
+
"llm_calls_made": int(observation_dict.get("llm_calls_made") or 0),
|
|
70
|
+
"code_blocks_executed": int(observation_dict.get("code_blocks_executed") or 0),
|
|
71
|
+
"final_detected": bool(observation_dict.get("final_detected", False)),
|
|
72
|
+
"repl_variables": list(observation_dict.get("repl_variables") or [])[:20]
|
|
73
|
+
if isinstance(observation_dict.get("repl_variables"), list)
|
|
74
|
+
else [],
|
|
65
75
|
}
|
|
66
76
|
error = _extract_error(step)
|
|
67
77
|
if error:
|
|
@@ -190,6 +200,19 @@ def _action_name(step: dict[str, Any]) -> str:
|
|
|
190
200
|
return "unknown"
|
|
191
201
|
|
|
192
202
|
|
|
203
|
+
def _action_code(step: dict[str, Any]) -> str:
|
|
204
|
+
action = step.get("action")
|
|
205
|
+
if not isinstance(action, dict):
|
|
206
|
+
return ""
|
|
207
|
+
code = action.get("code")
|
|
208
|
+
if isinstance(code, str) and code.strip():
|
|
209
|
+
return code
|
|
210
|
+
blocks = action.get("_code_blocks")
|
|
211
|
+
if isinstance(blocks, list):
|
|
212
|
+
return "\n\n".join(str(block) for block in blocks if str(block).strip())
|
|
213
|
+
return ""
|
|
214
|
+
|
|
215
|
+
|
|
193
216
|
def _extract_error(step: dict[str, Any]) -> str:
|
|
194
217
|
observation = step.get("observation")
|
|
195
218
|
if not isinstance(observation, dict):
|