RubyGems - roast-ai - Versions diffs - 0.4.1 → 0.4.3 - Mend

roast-ai 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/CHANGELOG.md +43 -0
data/Gemfile +0 -1
data/Gemfile.lock +48 -23
data/README.md +228 -29
data/examples/coding_agent_with_model.yml +20 -0
data/examples/coding_agent_with_retries.yml +30 -0
data/examples/grading/rb_test_runner +1 -1
data/lib/roast/errors.rb +3 -0
data/lib/roast/helpers/metadata_access.rb +39 -0
data/lib/roast/helpers/timeout_handler.rb +1 -1
data/lib/roast/tools/coding_agent.rb +99 -27
data/lib/roast/tools/grep.rb +4 -0
data/lib/roast/version.rb +1 -1
data/lib/roast/workflow/agent_step.rb +57 -4
data/lib/roast/workflow/base_workflow.rb +4 -2
data/lib/roast/workflow/command_executor.rb +3 -1
data/lib/roast/workflow/configuration_parser.rb +2 -0
data/lib/roast/workflow/each_step.rb +5 -3
data/lib/roast/workflow/input_step.rb +2 -0
data/lib/roast/workflow/interpolator.rb +23 -1
data/lib/roast/workflow/metadata_manager.rb +47 -0
data/lib/roast/workflow/output_handler.rb +1 -0
data/lib/roast/workflow/replay_handler.rb +8 -0
data/lib/roast/workflow/shell_script_step.rb +115 -0
data/lib/roast/workflow/sqlite_state_repository.rb +17 -17
data/lib/roast/workflow/state_manager.rb +8 -0
data/lib/roast/workflow/step_executor_coordinator.rb +43 -8
data/lib/roast/workflow/step_executor_with_reporting.rb +2 -2
data/lib/roast/workflow/step_loader.rb +55 -9
data/lib/roast/workflow/workflow_executor.rb +3 -4
data/lib/roast/workflow/workflow_initializer.rb +95 -4
data/lib/roast/workflow/workflow_runner.rb +2 -2
data/lib/roast.rb +2 -0
data/roast.gemspec +3 -2
metadata +36 -18
data/lib/roast/workflow/step_orchestrator.rb +0 -48

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: b31e95a8d65d0b1fa798c0baed8bcbf9249cc67fca752154865d914d01ffbd8d
-  data.tar.gz: 21205a0a50e2301dda0936e67b02d05af94085b0e383f859aec282827649ebb3
+  metadata.gz: 22264361960a52b5d21b2dcc715a77c23ad8601170dbf02daa531416b2180035
+  data.tar.gz: 581b96d84ca59f702e591f18b0ae1e10823fccaaef6ad44757f38b607d146d94
 SHA512:
-  metadata.gz: 80b5ceada69faff53f5893e9378e628474b103c3d6b60a8701be80b1bfe11cb6e4bde252670d890a2ae7e460d90d956daed7813d251cf6d84399ab70d321b0d8
-  data.tar.gz: 620e7d46fc6f7e9b10a71807f213ba101cb2bbd9a3bad073a153057bd67bc842cf74e4ffaa179596a2ed3655dc7a864c366034a37f4a720d2fd6faafa86f5541
+  metadata.gz: a1316b637cb27f19c467faebc6e21853cdac1ce69439db50e56a1bc7c8d69240f0eb7ae51e4a856d2ac0be3bf2f406b21d5a5e51131e7bcee2a5298b8b9799ae
+  data.tar.gz: d41214b70dd0d1755d7c8b49673ccc4b0296d911e97ebbc18c72c08ecf80b916277518bf0087d0286fc32a471c807f225a3262c5049381f42999ace482e196b6

data/.gitignore CHANGED Viewed

@@ -41,3 +41,4 @@ bin/thor
 gemfiles/*.lock
 bin/claude-swarm
+*.gem

data/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,49 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.4.3] - 2025-07-10
+### Changed
+- **Updated to raix-openai-eight gem** - Upgraded from `raix` to `raix-openai-eight` gem which supports OpenAI Ruby client v8.1
+## [0.4.2] - 2025-06-20
+### Added
+- **Multiline bash command support** (#289)
+  - Enhanced CommandExecutor to properly handle commands spanning multiple lines
+  - Enables sophisticated bash scripts in workflow steps
+  - Maintains backward compatibility with single-line commands
+- **Comprehensive shell security enhancements** (#289)
+  - Smart interpolation that detects shell commands and escapes dangerous characters
+  - Protection against shell injection for all major metacharacters:
+    - Backslashes (`\`) to prevent path injection
+    - Double quotes (`"`) to prevent breaking quoted contexts
+    - Dollar signs (`$`) to prevent variable expansion
+    - Backticks (`` ` ``) to prevent command substitution
+  - Context-aware escaping only in shell commands, preserving text elsewhere
+- **Early detection for missing Raix configuration** (#292)
+  - Provides helpful error messages when Raix is not properly initialized
+  - Shows example configuration for both OpenAI and OpenRouter
+  - Prevents cryptic "undefined method 'chat' for nil" errors
+- **Exit early feature for input steps** (#291)
+  - Pressing Ctrl-C during input steps now exits cleanly
+  - No more confusing stack traces when canceling input
+- **Default --dangerously-skip-permissions flag for CodingAgent** (#290)
+  - Avoids permission prompts during automated workflows
+  - Improves workflow automation experience
+### Fixed
+- Test isolation issue causing CI failures (#289)
+- Flaky test in StepExecutorRegistryTest due to executor registration conflicts (#289)
+- Shell command interpolation security vulnerabilities (#289)
+- Missing dependency declarations (cli-kit, sqlite3) (#292)
+### Changed
+- Updated cli-kit dependency to ~> 5.0 for better error handling
+- Updated sqlite3 dependency to ~> 2.6 to resolve version conflicts
+[0.4.2]: https://github.com/Shopify/roast/compare/v0.4.1...v0.4.2
 ## [0.4.1] - 2025-06-18
 ### Added

data/Gemfile CHANGED Viewed

@@ -18,6 +18,5 @@ gem "rubocop-shopify", require: false
 gem "vcr", require: false
 gem "webmock", require: false
 gem "minitest-rg"
-gem "sqlite3", "~> 1.7"
 gem "claude_swarm"

data/Gemfile.lock CHANGED Viewed

@@ -1,15 +1,16 @@
 PATH
   remote: .
   specs:
-    roast-ai (0.4.1)
+    roast-ai (0.4.3)
       activesupport (>= 7.0)
+      cli-kit (~> 5.0)
       cli-ui (= 2.3.0)
       diff-lcs (~> 1.5)
-      faraday-retry
       json-schema
       open_router (~> 0.3)
-      raix (~> 1.0)
+      raix-openai-eight (~> 1.0)
       ruby-graphviz (~> 1.2)
+      sqlite3 (~> 2.6)
       thor (~> 1.3)
       zeitwerk (~> 2.6)
@@ -36,9 +37,11 @@ GEM
     benchmark (0.4.1)
     bigdecimal (3.2.2)
     cgi (0.5.0)
-    claude_swarm (0.1.15)
+    claude_swarm (0.1.19)
       fast-mcp-annotations
       thor (~> 1.3)
+    cli-kit (5.0.1)
+      cli-ui (~> 2.0)
     cli-ui (2.3.0)
     coderay (1.1.3)
     concurrent-ruby (1.3.5)
@@ -83,21 +86,29 @@ GEM
       faraday-net_http (>= 2.0, < 3.5)
       json
       logger
-    faraday-multipart (1.1.0)
+    faraday-multipart (1.1.1)
       multipart-post (~> 2.0)
-    faraday-net_http (3.4.0)
+    faraday-net_http (3.4.1)
       net-http (>= 0.5.0)
-    faraday-retry (2.3.1)
+    faraday-retry (2.3.2)
       faraday (~> 2.0)
-    fast-mcp-annotations (1.5.2)
+    fast-mcp-annotations (1.5.3)
       addressable (~> 2.8)
       base64
       dry-schema (~> 1.14)
       json (~> 2.0)
       mime-types (~> 3.4)
-      rack (~> 3.1)
+      rack (< 3)
+    ffi (1.17.2-aarch64-linux-gnu)
+    ffi (1.17.2-aarch64-linux-musl)
+    ffi (1.17.2-arm-linux-gnu)
+    ffi (1.17.2-arm-linux-musl)
     ffi (1.17.2-arm64-darwin)
+    ffi (1.17.2-x86-linux-gnu)
+    ffi (1.17.2-x86-linux-musl)
+    ffi (1.17.2-x86_64-darwin)
     ffi (1.17.2-x86_64-linux-gnu)
+    ffi (1.17.2-x86_64-linux-musl)
     formatador (1.1.0)
     guard (2.19.1)
       formatador (>= 0.2.4)
@@ -132,8 +143,7 @@ GEM
     mime-types (3.7.0)
       logger
       mime-types-data (~> 3.2025, >= 3.2025.0507)
-    mime-types-data (3.2025.0603)
-    mini_portile2 (2.8.9)
+    mime-types-data (3.2025.0617)
     minitest (5.25.5)
     minitest-rg (5.3.0)
       minitest (~> 5.0)
@@ -151,7 +161,7 @@ GEM
       dotenv (>= 2)
       faraday (>= 1)
       faraday-multipart (>= 1)
-    ostruct (0.6.1)
+    ostruct (0.6.2)
     parallel (1.27.0)
     parser (3.3.8.0)
       ast (~> 2.4.1)
@@ -162,21 +172,21 @@ GEM
       method_source (~> 1.0)
     public_suffix (6.0.2)
     racc (1.8.1)
-    rack (3.1.16)
+    rack (2.2.17)
     rainbow (3.1.1)
-    raix (1.0.1)
+    raix-openai-eight (1.0.1)
       activesupport (>= 6.0)
       faraday-retry (~> 2.0)
       open_router (~> 0.2)
       ostruct
-      ruby-openai (~> 7)
+      ruby-openai (~> 8.1)
     rake (13.3.0)
     rb-fsevent (0.11.2)
     rb-inotify (0.11.1)
       ffi (~> 1.0)
     regexp_parser (2.10.0)
     rexml (3.4.1)
-    rubocop (1.76.0)
+    rubocop (1.77.0)
       json (~> 2.3)
       language_server-protocol (~> 3.17.0.2)
       lint_roller (~> 1.1.0)
@@ -184,17 +194,17 @@ GEM
       parser (>= 3.3.0.2)
       rainbow (>= 2.2.2, < 4.0)
       regexp_parser (>= 2.9.3, < 3.0)
-      rubocop-ast (>= 1.45.0, < 2.0)
+      rubocop-ast (>= 1.45.1, < 2.0)
       ruby-progressbar (~> 1.7)
       unicode-display_width (>= 2.4.0, < 4.0)
-    rubocop-ast (1.45.0)
+    rubocop-ast (1.45.1)
       parser (>= 3.3.7.2)
       prism (~> 1.4)
     rubocop-shopify (2.17.1)
       rubocop (~> 1.62)
     ruby-graphviz (1.2.5)
       rexml
-    ruby-openai (7.4.0)
+    ruby-openai (8.1.0)
       event_stream_parser (>= 0.3.0, < 2.0.0)
       faraday (>= 1)
       faraday-multipart (>= 1)
@@ -202,8 +212,16 @@ GEM
     ruby2_keywords (0.0.5)
     securerandom (0.4.1)
     shellany (0.0.1)
-    sqlite3 (1.7.3)
-      mini_portile2 (~> 2.8.0)
+    sqlite3 (2.7.0-aarch64-linux-gnu)
+    sqlite3 (2.7.0-aarch64-linux-musl)
+    sqlite3 (2.7.0-arm-linux-gnu)
+    sqlite3 (2.7.0-arm-linux-musl)
+    sqlite3 (2.7.0-arm64-darwin)
+    sqlite3 (2.7.0-x86-linux-gnu)
+    sqlite3 (2.7.0-x86-linux-musl)
+    sqlite3 (2.7.0-x86_64-darwin)
+    sqlite3 (2.7.0-x86_64-linux-gnu)
+    sqlite3 (2.7.0-x86_64-linux-musl)
     thor (1.3.2)
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
@@ -220,8 +238,16 @@ GEM
     zeitwerk (2.7.3)
 PLATFORMS
+  aarch64-linux-gnu
+  aarch64-linux-musl
+  arm-linux-gnu
+  arm-linux-musl
   arm64-darwin
-  x86_64-linux
+  x86-linux-gnu
+  x86-linux-musl
+  x86_64-darwin
+  x86_64-linux-gnu
+  x86_64-linux-musl
 DEPENDENCIES
   cgi
@@ -235,7 +261,6 @@ DEPENDENCIES
   rake
   roast-ai!
   rubocop-shopify
-  sqlite3 (~> 1.7)
   vcr
   webmock

data/README.md CHANGED Viewed

@@ -279,35 +279,126 @@ Roast supports several types of steps:
    ```
    Agent steps are prefixed with `^` and send the prompt content directly to the CodingAgent tool without LLM translation. This is useful when you want to give precise instructions to a coding agent without the intermediate interpretation layer. Agent steps support both file-based prompts (`fix_linting_errors/prompt.md`) and inline prompts (text with spaces).
-9. **Input step**: Interactive prompts for user input during workflow execution
+   **Session continuity for agent steps:**
+   Agent steps support two options for maintaining Claude context across steps:
+   1. **`continue: true`** - Continues from the immediately previous Claude Code session (note, if multiple Claude Code sessions are being run in parallel in the same working directory, this might not be the previous Claude Code session from this workflow)
+   2. **`resume: step_name`** - Resumes from a specific earlier step's Claude Code session
+   **Continue option:**
+   The `continue` option allows sequential agent steps to maintain a continuous conversation:
    ```yaml
    steps:
-     - analyze_code
-     - get_user_feedback:
-         prompt: "Should we proceed with the refactoring? (yes/no)"
-         type: confirm
-     - review_changes:
-         prompt: "Enter your review comments"
-         type: text
-     - select_strategy:
-         prompt: "Choose optimization strategy"
-         type: select
-         options:
-           - "Performance optimization"
-           - "Memory optimization"
-           - "Code clarity"
-     - api_configuration:
-         prompt: "Enter API key"
-         type: password
+     - ^analyze_codebase
+     - ^implement_feature
+     - ^add_tests
+   # Configuration
+   analyze_codebase:
+     continue: false  # Start fresh (default)
+   implement_feature:
+     continue: true   # Continue from immediately previous analyze_codebase step
+   add_tests:
+     continue: true   # Continue from immediately previous implement_feature step
    ```
-   Input steps pause workflow execution to collect user input. They support several types:
-   - `text`: Free-form text input (default if type not specified)
-   - `confirm`: Yes/No confirmation prompts
-   - `select`: Choice from a list of options
-   - `password`: Masked input for sensitive data
+   **Resume functionality for agent steps:**
+   Agent steps can resume from specific previous Claude Code sessions:
-   The user's input is stored in the workflow output using the step name as the key and can be accessed in subsequent steps via interpolation (e.g., `{{output.get_user_feedback}}`).
+   ```yaml
+   steps:
+     - ^analyze_codebase
+     - ^implement_feature
+     - ^polish_implementation
+   # Configuration
+   analyze_codebase:
+     continue: false  # Start fresh
+   implement_feature:
+     continue: true   # Continue from previous conversation
+   polish_implementation:
+     resume: analyze_codebase  # Resume from a specific step's session not the immediately previous one
+   ```
+   Note: Session IDs are only available when the CodingAgent is configured to output JSON format (includes `--output-format stream-json` in the command). If you are using a custom CodingAgent command that does not produce JSON output, resume functionality will not be available.
+   If `resume` is specified but the step name given does not have CodingAgent session to resume from, the CodingAgent will start Claude Code with a fresh session.
+9. **Shell script step**: Execute shell scripts directly as workflow steps
+   ```yaml
+   steps:
+     - setup_environment     # Executes setup_environment.sh
+     - run_tests             # Executes run_tests.sh
+     - cleanup
+   ```
+   Shell script steps allow you to execute `.sh` files directly as workflow steps alongside Ruby steps and AI prompts. Scripts are automatically discovered in the same locations as other step types.
+   **Configuration options:**
+   ```yaml
+   # Step configuration
+   my_script:
+     json: true              # Parse stdout as JSON
+     exit_on_error: false    # Don't fail workflow on non-zero exit
+     env:                    # Custom environment variables
+       CUSTOM_VAR: "value"
+   ```
+   **Environment integration:** Shell scripts automatically receive workflow context:
+   - `ROAST_WORKFLOW_RESOURCE`: Current workflow resource
+   - `ROAST_STEP_NAME`: Current step name
+   - `ROAST_WORKFLOW_OUTPUT`: Previous step outputs as JSON
+   **Example script (`setup_environment.sh`):**
+   ```bash
+   #!/bin/bash
+   echo "Setting up environment for: $ROAST_WORKFLOW_RESOURCE"
+   # Create a config file that subsequent steps can use
+   mkdir -p tmp
+   echo "DATABASE_URL=sqlite://test.db" > tmp/config.env
+   # Output data for the workflow (available via ROAST_WORKFLOW_OUTPUT in later steps)
+   echo '{"status": "configured", "database": "sqlite://test.db", "config_file": "tmp/config.env"}'
+   ```
+10. **Input step**: Interactive prompts for user input during workflow execution
+    ```yaml
+    steps:
+      - analyze_code
+      - get_user_feedback:
+          prompt: "Should we proceed with the refactoring? (yes/no)"
+          type: confirm
+      - review_changes:
+          prompt: "Enter your review comments"
+          type: text
+      - select_strategy:
+          prompt: "Choose optimization strategy"
+          type: select
+          options:
+            - "Performance optimization"
+            - "Memory optimization"
+            - "Code clarity"
+      - api_configuration:
+          prompt: "Enter API key"
+          type: password
+    ```
+    Input steps pause workflow execution to collect user input. They support several types:
+    - `text`: Free-form text input (default if type not specified)
+    - `confirm`: Yes/No confirmation prompts
+    - `select`: Choice from a list of options
+    - `password`: Masked input for sensitive data
+    The user's input is stored in the workflow output using the step name as the key and can be accessed in subsequent steps via interpolation (e.g., `{{output.get_user_feedback}}`).
 #### Step Configuration
@@ -705,6 +796,100 @@ For most workflows, you'll mainly use `response` to access the current step's re
 ## Advanced Features
+### Workflow Metadata
+Roast workflows maintain a metadata store that allows steps to share structured data beyond the standard output hash. This is particularly useful for tracking state that needs to persist across steps but shouldn't be part of the conversation context.
+#### Setting Metadata
+Metadata can be set by custom Ruby steps that extend `BaseStep`:
+```ruby
+# workflow/analyze_codebase.rb
+class AnalyzeCodebase < Roast::Workflow::BaseStep
+   include Roast::Helpers::MetadataAccess
+  def call
+    # Perform analysis
+    analysis_results = perform_deep_analysis
+    # Store metadata for other steps to use
+    workflow.metadata[name.to_s] ||= {}
+    workflow.metadata[name.to_s]["total_files"] = analysis_results[:file_count]
+    workflow.metadata[name.to_s]["complexity_score"] = analysis_results[:complexity]
+    workflow.metadata[name.to_s]["analysis_id"] = SecureRandom.uuid
+    # Return the normal output for the conversation
+    "Analyzed #{analysis_results[:file_count]} files with average complexity of #{analysis_results[:complexity]}"
+  end
+  private
+  def perform_deep_analysis
+    # Your analysis logic here
+    { file_count: 42, complexity: 7.5 }
+  end
+end
+```
+#### Accessing Metadata
+Metadata from previous steps can be accessed in:
+1. **Custom Ruby steps:**
+```ruby
+class GenerateReport < Roast::Workflow::BaseStep
+  def call
+    # Access metadata from a previous step
+    total_files = workflow.metadata.dig("analyze_codebase", "total_files")
+    complexity = workflow.metadata.dig("analyze_codebase", "complexity_score")
+    "Generated report for #{total_files} files with complexity score: #{complexity}"
+  end
+end
+```
+2. **Workflow configuration via interpolation:**
+```yaml
+steps:
+  - analyze_codebase
+  - validate_threshold
+  - generate_report
+# Use metadata in step configuration
+validate_threshold:
+  if: "{{metadata.analyze_codebase.complexity_score > 8.0}}"
+  then:
+    - send_alert
+    - create_ticket
+  else:
+    - mark_as_passed
+# Pass metadata to command steps
+send_alert:
+  $(slack-notify "High complexity detected: {{metadata.analyze_codebase.complexity_score}}")
+```
+3. **Prompt templates (ERB):**
+```erb
+# In analyze_codebase/output.txt
+Analysis Summary:
+Files analyzed: <%= workflow.metadata.dig(name.to_s, "total_files") %>
+Complexity score: <%= workflow.metadata.dig(name.to_s, "complexity_score") %>
+Analysis ID: <%= workflow.metadata.dig(name.to_s, "analysis_id") %>
+```
+#### Metadata Best Practices
+- **Use metadata for data that shouldn't be in the conversation**
+- **Don't duplicate output data:** Metadata complements the output hash, it doesn't replace it
+The metadata system is particularly useful for:
+- Tracking session or transaction IDs across multiple steps
+- Storing configuration or state that tools need to access
+- Passing data between steps without cluttering the AI conversation
+- Implementing complex conditional logic based on computed values
 ### Instrumentation
 Roast provides extensive instrumentation capabilities using ActiveSupport::Notifications. You can monitor workflow execution, track AI model usage, measure performance, and integrate with external monitoring systems. [Read the full instrumentation documentation](docs/INSTRUMENTATION.md).
@@ -727,11 +912,16 @@ tools:
         - yarn
   - Roast::Tools::CodingAgent:     # Optional configuration
       coding_agent_command: claude --model opus -p --allowedTools "Bash, Glob, Grep, LS, Read"
+      model: opus                  # Model to use for all CodingAgent invocations
+      retries: 3                   # Number of automatic retries on failure (default: 0)
 ```
 Currently supported configurations:
 - `Roast::Tools::Cmd` via `allowed_commands`: restricts which commands can be executed (defaults to: `pwd`, `find`, `ls`, `rake`, `ruby`, `dev`, `mkdir`)
-- `Roast::Tools::CodingAgent` via `coding_agent_command`: customizes the Claude Code CLI command used by the agent
+- `Roast::Tools::CodingAgent` via:
+  - `coding_agent_command`: customizes the Claude Code CLI command used by the agent
+  - `model`: sets the model for all CodingAgent invocations (e.g., `opus`, `sonnet`)
+  - `retries`: number of times to automatically retry if the agent encounters an error (default: 0, no retries)
 ##### Cmd Tool Configuration
@@ -959,16 +1149,25 @@ bash(command: "ps aux | grep ruby | awk '{print $2}'")
 Creates a specialized agent for complex coding tasks or long-running operations.
 ```ruby
+# Basic usage
+coding_agent(
+  prompt: "Refactor the authentication module to use JWT tokens",
+  include_context_summary: true,  # Include workflow context in the agent prompt
+  continue: true                  # Continue from previous agent session
+)
+# With automatic retries on failure
 coding_agent(
-  task: "Refactor the authentication module to use JWT tokens",
-  language: "ruby",
-  files: ["app/models/user.rb", "app/controllers/auth_controller.rb"]
+  prompt: "Implement complex feature with error handling",
+  retries: 3  # Retry up to 3 times if the agent encounters errors
 )
 ```
-- Delegates complex tasks to a specialized coding agent
+- Delegates complex tasks to a specialized coding agent (Claude Code)
 - Useful for tasks that require deep code understanding or multi-step changes
 - Can work across multiple files and languages
+- Supports automatic retries on transient failures (network issues, API errors)
+- Retries can be configured globally (see Tool Configuration) or per invocation
 ### MCP (Model Context Protocol) Tools

data/examples/coding_agent_with_model.yml ADDED Viewed

@@ -0,0 +1,20 @@
+name: CodingAgent with Model Configuration
+description: |
+  Example workflow demonstrating how to configure the CodingAgent tool
+  with specific model options like opus
+tools:
+  - Roast::Tools::CodingAgent:
+      model: opus
+      # You can also add other claude options here:
+      # temperature: 0.7
+      # max_tokens: 1000
+steps:
+  - analyze_code: |
+      Analyze the Ruby code in lib/roast/tools/coding_agent.rb
+      and explain how the CodingAgent tool works.
+  - implement_feature: |
+      Create a simple Ruby script that demonstrates using command-line
+      options similar to how CodingAgent builds its commands.

data/examples/coding_agent_with_retries.yml ADDED Viewed

@@ -0,0 +1,30 @@
+name: CodingAgent with Retries Configuration
+description: |
+  Example workflow demonstrating how to configure the CodingAgent tool
+  with automatic retries on failure. The retries option will automatically
+  retry the coding agent if it encounters an error during execution.
+  Note: this is not the same as running the step
+tools:
+  - Roast::Tools::CodingAgent:
+      retries: 2  # Automatically retry up to 2 times on failure
+steps:
+  # This step invokes the coding agent directly using the specified number of retries
+  - ^implement_a_feature: |
+      Create a Ruby script that demonstrates robust error handling.
+      The script should:
+      1. Attempt to read a file that might not exist
+      2. Handle any errors gracefully
+      3. Log the results
+  # This step invokes the general workflow LLM which can in turn invoke the coding agent.
+  # When the general LLM invokes the coding agent, it will execute with the specified number of retries.
+  - add_tests: |
+      Add test for the feature you just implemented.
+      Run the tests and iterate until they all pass.
+      Use the CodingAgent tool to write the tests.
+add_tests:
+  retries: 4

data/examples/grading/rb_test_runner CHANGED Viewed

@@ -4,7 +4,7 @@
 require "rubygems"
 require "bundler/setup"
-require_relative "../../lib/roast/helpers/minitest_coverage_runner"
+require "roast/helpers/minitest_coverage_runner"
 # Suppress fancy minitest reporting
 ENV["RM_INFO"] = "true"

data/lib/roast/errors.rb CHANGED Viewed

@@ -7,5 +7,8 @@ module Roast
     # Custom error for when API authentication fails
     class AuthenticationError < StandardError; end
+    # Exit the app, for instance via Ctrl-C during an InputStep
+    class ExitEarly < StandardError; end
   end
 end

data/lib/roast/helpers/metadata_access.rb ADDED Viewed

@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+module Roast
+  module Helpers
+    module MetadataAccess
+      def step_metadata(step_name = nil)
+        step_name ||= current_step_name
+        return {} unless step_name
+        metadata = workflow_metadata || {}
+        metadata[step_name] || {}
+      end
+      def set_current_step_metadata(key, value)
+        step_name = current_step_name
+        metadata = workflow_metadata
+        return unless step_name && metadata
+        metadata[step_name] ||= {}
+        metadata[step_name][key] = value
+      end
+      private
+      def workflow_metadata
+        metadata = Thread.current[:workflow_metadata]
+        Roast::Helpers::Logger.warn("MetadataAccess#workflow_metadata is not present") if metadata.nil?
+        metadata
+      end
+      def current_step_name
+        step_name = Thread.current[:current_step_name]
+        Roast::Helpers::Logger.warn("MetadataAccess#current_step_name is not present") if step_name.nil?
+        step_name
+      end
+    end
+  end
+end

data/lib/roast/helpers/timeout_handler.rb CHANGED Viewed

@@ -17,7 +17,7 @@ module Roast
     #   output, status = TimeoutHandler.call("pwd", timeout: 10, working_directory: "/tmp")
     class TimeoutHandler
       DEFAULT_TIMEOUT = 30
-      MAX_TIMEOUT = 300
+      MAX_TIMEOUT = 3600
       class << self
         # Execute a command with timeout using Open3 with proper process cleanup