npm - @specsage/cli - Versions diffs - 0.1.0 - Mend

@specsage/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/lib/runner.rb ADDED Viewed

@@ -0,0 +1,613 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+require 'json'
+require 'timeout'
+require 'fileutils'
+# Determine SpecSage home directory for locating resources
+SPECSAGE_HOME ||= File.expand_path('..', __dir__)
+require_relative 'step_client'
+class Runner
+  NODE_IO_TIMEOUT_SECONDS = 30
+  NODE_SHUTDOWN_TIMEOUT_SECONDS = 45
+  BROWSER_ACTIONS = %w[navigate click select keypress wait scroll accept_dialog dismiss_dialog].freeze
+  # Pattern for safe path segment: alphanumeric, underscore, hyphen only
+  # Prevents directory traversal, special chars, and filesystem issues
+  SAFE_PATH_SEGMENT = /\A[a-zA-Z0-9_-]+\z/
+  # Initialize runner with scenario data from server
+  def initialize(scenario_data, visible: false, record: false, publisher: nil, server_run_id: nil)
+    @scenario = normalize_scenario_data(scenario_data)
+    @scenario_id = @scenario['id']
+    @scenario_name = @scenario['name'] || @scenario['id'] || 'unnamed'
+    @visible = visible
+    @record = record
+    @video_data = nil # Binary video data
+    @node_stdin = nil
+    @node_stdout = nil
+    @node_wait_thread = nil
+    @next_request_id = 1
+    @node_channel_poisoned = false
+    @publisher = publisher
+    @step_client = nil
+    @server_run_id = server_run_id
+    @credentials = {} # Credentials received from server { "NAME" => "value" }
+    @max_steps = nil # Max browser actions allowed, received from server on first step
+    @temp_dir = nil # Unique temp directory for this runner's video recording
+  end
+  def run
+    log "Starting scenario run"
+    raise ArgumentError, 'server_run_id is required' unless @server_run_id
+    @step_client = StepClient.new(
+      base_url: @publisher.base_url,
+      server_run_id: @server_run_id,
+      api_key: @publisher.api_key
+    )
+    start_node_process
+    initial_state = navigate_to_base_url
+    current_screenshot_base64 = initial_state[:screenshot_base64]
+    interactive_elements = initial_state[:elements]
+    interactive_elements_by_id = build_elements_by_id(interactive_elements)
+    previous_action = nil
+    action_result = nil
+    loop do
+      # Get next action from server
+      step_result = @step_client.submit_step(
+        scenario_id: @scenario_id,
+        screenshot_base64: current_screenshot_base64,
+        elements: interactive_elements,
+        previous_action: previous_action,
+        action_result: action_result
+      )
+      # Store credentials and max_steps from server on first step only
+      # Server sends these once at scenario start, runner caches them for the duration
+      if step_result[:credentials] && !step_result[:credentials].empty?
+        @credentials = step_result[:credentials]
+      end
+      @max_steps ||= step_result[:max_steps]
+      action = step_result[:action]
+      # Enrich action with element info for logging and storage
+      if action['element_id'] && interactive_elements_by_id
+        element = interactive_elements_by_id[action['element_id']]
+        if element
+          action['element_name'] = element_display_name(element)
+          action['type'] = element['type']
+          action['role'] = element['role']
+          action['visible_text'] = element['visible_text']
+        end
+      end
+      # Log action with element details
+      safe_puts "  [Step #{step_result[:step_number]}] #{action['action']}: #{action.reject { |k, _| k == 'action' }.to_json}"
+      break if action['action'] == 'verdict'
+      # Skip browser execution for meta actions
+      unless BROWSER_ACTIONS.include?(action['action'])
+        previous_action = action
+        action_result = "Meta action: #{action['action']}"
+        next
+      end
+      result = execute_action(action)
+      # Update state from action result
+      # Note: screenshot may be nil if a dialog is blocking (can't screenshot while dialog is up)
+      # but we still need to update elements (which will contain the DIALOG pseudo-element)
+      if result[:screenshot_base64]
+        current_screenshot_base64 = result[:screenshot_base64]
+      end
+      if result[:elements]
+        interactive_elements = result[:elements]
+        interactive_elements_by_id = build_elements_by_id(interactive_elements)
+      end
+      previous_action = action
+      action_result = result[:result]
+      # Check if step count has reached max_steps limit
+      # This check happens AFTER executing the action, so max_steps=4 means 4 actions execute
+      step_number = step_result[:step_number] || 0
+      if @max_steps && step_number >= @max_steps
+        log "Step limit exceeded (#{@max_steps})."
+        send_client_verdict_if_needed('ERROR', "Step limit exceeded (#{@max_steps}).")
+        break
+      end
+      # Check if server says we should stop
+      break unless step_result[:continue]
+    end
+    stop_node_process
+    upload_video
+    cleanup_temp_dir
+  rescue StepClient::StepError => e
+    send_client_verdict_if_needed('ERROR', "Server error: #{e.message}")
+    stop_node_process
+    upload_video
+    cleanup_temp_dir
+  rescue StandardError => e
+    send_client_verdict_if_needed('ERROR', e.message)
+    stop_node_process
+    upload_video
+    cleanup_temp_dir
+  end
+  private
+  # Normalize scenario data from API format (string keys) to internal format
+  def normalize_scenario_data(data)
+    # API returns string keys, ensure consistent access
+    {
+      'id' => data['id'] || data[:id],
+      'name' => data['name'] || data[:name],
+      'base_url' => data['base_url'] || data[:base_url]
+    }
+  end
+  def start_node_process
+    node_script = File.join(SPECSAGE_HOME, 'lib', 'browser.js')
+    raise "browser.js not found at #{node_script}" unless File.exist?(node_script)
+    # Create unique temp directory for this runner's video recording
+    if @record
+      @temp_dir = build_safe_temp_dir
+      FileUtils.mkdir_p(@temp_dir)
+    end
+    args = ['node', node_script]
+    args << '--visible' if @visible
+    args << '--record' if @record
+    args.push('--temp-dir', @temp_dir) if @temp_dir
+    @node_stdin, @node_stdout, @node_stderr, @node_wait_thread = Open3.popen3(*args)
+    # Wait for ready signal from browser.js
+    wait_for_ready_signal
+  end
+  def wait_for_ready_signal(timeout: NODE_IO_TIMEOUT_SECONDS)
+    response = Timeout.timeout(timeout) do
+      line = @node_stdout.gets
+      raise 'browser.js closed stdout before sending ready signal' unless line
+      JSON.parse(line)
+    end
+    unless response['status'] == 'ready'
+      raise "Expected ready signal from browser.js, got: #{response.inspect}"
+    end
+    # Drain any stderr output from browser startup (e.g., video recording status)
+    drain_node_stderr
+  rescue Timeout::Error
+    raise "browser.js startup timed out after #{timeout}s waiting for ready signal"
+  rescue JSON::ParserError => e
+    raise "Protocol error during startup: Invalid JSON from browser.js: #{e.message}"
+  end
+  def drain_node_stderr
+    return unless @node_stderr
+    loop do
+      line = @node_stderr.read_nonblock(4096)
+      break if line.nil? || line.empty?
+      line.each_line { |l| log "Node: #{l.strip}" unless l.strip.empty? }
+    end
+  rescue IO::WaitReadable, EOFError
+    # No more data available
+  end
+  def stop_node_process
+    return unless @node_stdin || @node_wait_thread
+    pid = @node_wait_thread&.pid
+    # Attempt graceful shutdown with protocol-compliant quit (skip if channel poisoned)
+    unless @node_channel_poisoned
+      begin
+        Timeout.timeout(NODE_SHUTDOWN_TIMEOUT_SECONDS) do
+          request_id = @next_request_id
+          request = { request_id: request_id, command: 'quit', params: {} }
+          @node_stdin.puts(request.to_json)
+          @node_stdin.flush
+          response_line = @node_stdout.gets
+          if response_line
+            begin
+              response = JSON.parse(response_line)
+              video_path = response&.dig('result', 'video_path')
+              if video_path
+                if File.exist?(video_path)
+                  @video_data = File.binread(video_path)
+                  log "Video captured: #{video_path} (#{@video_data&.bytesize} bytes)"
+                  File.delete(video_path) rescue nil
+                else
+                  log "Warning: Video path returned but file does not exist: #{video_path}"
+                  # List contents of temp directory for debugging
+                  if @temp_dir && Dir.exist?(@temp_dir)
+                    files = Dir.entries(@temp_dir).reject { |f| f.start_with?('.') }
+                    log "Temp dir contents: #{files.inspect}"
+                  end
+                end
+              else
+                log "Warning: No video path in quit response (recording may not have been enabled)"
+              end
+            rescue JSON::ParserError => e
+              log "Warning: Failed to parse quit response: #{e.message}"
+            end
+          else
+            log "Warning: No response from Node process on quit"
+          end
+        end
+      rescue Timeout::Error => e
+        log "Warning: Timeout waiting for Node shutdown: #{e.message}"
+      rescue StandardError => e
+        log "Warning: Error during Node shutdown: #{e.class}: #{e.message}"
+      end
+    end
+    # Read any remaining stderr output from Node process for debugging
+    drain_node_stderr
+    # Close IO streams
+    @node_stdin&.close rescue nil
+    @node_stdout&.close rescue nil
+    @node_stderr&.close rescue nil
+    # Force kill the process if still running
+    if pid
+      begin
+        Process.kill(0, pid) # Check if process exists
+        Process.kill('TERM', pid)
+        # Wait briefly for termination
+        Timeout.timeout(2) { @node_wait_thread&.join }
+      rescue Errno::ESRCH
+        # Process already dead, OK
+      rescue Timeout::Error
+        # Force kill if TERM didn't work
+        Process.kill('KILL', pid) rescue nil
+      rescue StandardError
+        # Ignore other errors during cleanup
+      end
+    end
+    @node_wait_thread = nil
+  end
+  def send_to_node(command, params = {}, timeout: NODE_IO_TIMEOUT_SECONDS)
+    raise 'Protocol error: Node channel is poisoned, cannot send' if @node_channel_poisoned
+    request_id = @next_request_id
+    request = { request_id: request_id, command: command, params: params }
+    begin
+      response = Timeout.timeout(timeout) do
+        @node_stdin.puts(request.to_json)
+        @node_stdin.flush
+        response_line = @node_stdout.gets
+        raise 'No response from Node process' unless response_line
+        JSON.parse(response_line)
+      end
+    rescue Timeout::Error
+      poison_node_channel!
+      raise "Node process timed out after #{timeout}s waiting for response"
+    rescue JSON::ParserError => e
+      poison_node_channel!
+      raise "Protocol error: Invalid JSON from Node: #{e.message}"
+    end
+    # Validate response envelope per docs/messages.md
+    validate_node_response!(response, request_id)
+    # Increment request_id only after successful validation
+    @next_request_id += 1
+    response
+  end
+  def poison_node_channel!
+    @node_channel_poisoned = true
+    # Immediately terminate the Node process
+    stop_node_process
+  end
+  def validate_node_response!(response, expected_request_id)
+    # Check required keys
+    unless response.key?('request_id')
+      poison_node_channel!
+      raise 'Protocol error: missing request_id in Node response'
+    end
+    unless response.key?('ok') && response.key?('result')
+      poison_node_channel!
+      raise 'Protocol error: missing ok or result in Node response'
+    end
+    # Check request_id match (null from Node means parse error, treat as fatal)
+    response_id = response['request_id']
+    if response_id.nil? || response_id != expected_request_id
+      poison_node_channel!
+      raise "Protocol error: request_id mismatch - expected #{expected_request_id}, got #{response_id.inspect}"
+    end
+    # Check for error response
+    unless response['ok']
+      error = response['error'] || {}
+      error_code = error['code'] || 'UNKNOWN'
+      error_message = error['message'] || 'Unknown error from Node'
+      raise "Node error [#{error_code}]: #{error_message}"
+    end
+  end
+  def navigate_to_base_url
+    base_url = @scenario['base_url']
+    raise 'base_url not specified in scenario file' unless base_url
+    response = send_to_node('navigate', { url: base_url })
+    screenshot_base64 = response.dig('result', 'screenshot_base64')
+    elements = response.dig('result', 'elements') || []
+    { screenshot_base64: screenshot_base64, elements: elements }
+  end
+  def execute_action(action)
+    case action['action']
+    when 'navigate'
+      # Substitute credentials in URL (e.g., https://<<API_KEY>>@api.example.com)
+      url = action['url']
+      display_url = url # For logging (shows placeholders, not actual values)
+      url = substitute_credentials(url) if contains_credential_placeholder?(url)
+      response = send_to_node('navigate', { url: url })
+      screenshot_base64 = response.dig('result', 'screenshot_base64')
+      elements = response.dig('result', 'elements') || []
+      { result: "Navigated to #{display_url}", screenshot_base64: screenshot_base64, elements: elements }
+    when 'click'
+      element_id = action['element_id']
+      response = send_to_node('click_element', { element_id: element_id })
+      screenshot_base64 = response.dig('result', 'screenshot_base64')
+      elements = response.dig('result', 'elements') || []
+      { result: "Clicked element #{element_id}", screenshot_base64: screenshot_base64, elements: elements }
+    when 'select'
+      element_id = action['element_id']
+      value = action['value']
+      display_value = value # For logging (shows placeholders, not actual values)
+      value = substitute_credentials(value) if contains_credential_placeholder?(value)
+      response = send_to_node('select_option', { element_id: element_id, value: value })
+      screenshot_base64 = response.dig('result', 'screenshot_base64')
+      elements = response.dig('result', 'elements') || []
+      { result: "Selected '#{display_value}' in element #{element_id}", screenshot_base64: screenshot_base64, elements: elements }
+    when 'keypress'
+      # Substitute credential placeholders at the last moment before browser execution
+      # Supports inline placeholders: <<USERNAME>>@example.com, <<USER>>:<<PASS>>, etc.
+      keys = action['keys']
+      display_keys = keys # For logging (shows placeholders, not actual values)
+      # Skip credential substitution for special key combos like ctrl+a
+      keys = substitute_credentials(keys) if contains_credential_placeholder?(keys) && !special_key_combo?(keys)
+      response = send_to_node('keypress', { keys: keys })
+      screenshot_base64 = response.dig('result', 'screenshot_base64')
+      elements = response.dig('result', 'elements') || []
+      { result: "Pressed keys: #{display_keys}", screenshot_base64: screenshot_base64, elements: elements }
+    when 'wait'
+      response = send_to_node('wait', { ms: action['ms'] })
+      screenshot_base64 = response.dig('result', 'screenshot_base64')
+      elements = response.dig('result', 'elements') || []
+      { result: "Waited #{action['ms']}ms", screenshot_base64: screenshot_base64, elements: elements }
+    when 'scroll'
+      response = send_to_node('scroll', { direction: action['direction'] })
+      screenshot_base64 = response.dig('result', 'screenshot_base64')
+      elements = response.dig('result', 'elements') || []
+      { result: "Scrolled #{action['direction']}", screenshot_base64: screenshot_base64, elements: elements }
+    when 'accept_dialog'
+      value = action['value'] # Optional, for prompt dialogs
+      response = send_to_node('accept_dialog', { value: value })
+      screenshot_base64 = response.dig('result', 'screenshot_base64')
+      elements = response.dig('result', 'elements') || []
+      { result: "Accepted dialog#{value ? " with value '#{value}'" : ''}", screenshot_base64: screenshot_base64, elements: elements }
+    when 'dismiss_dialog'
+      response = send_to_node('dismiss_dialog', {})
+      screenshot_base64 = response.dig('result', 'screenshot_base64')
+      elements = response.dig('result', 'elements') || []
+      { result: "Dismissed dialog", screenshot_base64: screenshot_base64, elements: elements }
+    else
+      { result: "Unknown action: #{action['action']}", screenshot_base64: nil, elements: nil }
+    end
+  end
+  def build_elements_by_id(elements)
+    return {} unless elements
+    elements.each_with_object({}) { |e, h| h[e['id']] = e }
+  end
+  # Extract a human-readable display name from an element.
+  # SECURITY: Returns sanitized text safe for storage/display. The returned value
+  # originates from browser DOM and should never be marked html_safe in templates.
+  def element_display_name(element)
+    name = element['accessible_name'].to_s.strip
+    return sanitize_display_text(name) unless name.empty?
+    name = element['visible_text'].to_s.strip
+    return sanitize_display_text(name) unless name.empty?
+    # Fallback: type (e.g., "button", "input", "link") - trusted internal value
+    element['type']
+  end
+  # Sanitize untrusted display text: strip control characters, enforce max length.
+  # Mirrors Document::Test::Step.sanitize_display_text for use in runner context.
+  MAX_DISPLAY_TEXT_LENGTH = 500
+  def sanitize_display_text(text)
+    return nil if text.nil?
+    text.to_s
+        .gsub(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/, '') # Strip control chars (keep \t, \n, \r)
+        .strip
+        .slice(0, MAX_DISPLAY_TEXT_LENGTH)
+  end
+  def upload_video
+    unless @video_data
+      log "No video data to upload (recording may not have been enabled or failed)"
+      return
+    end
+    unless @step_client
+      log "Warning: No step client available for video upload"
+      return
+    end
+    log "Uploading video (#{@video_data.bytesize} bytes)..."
+    @step_client.upload_video(scenario_id: @scenario_id, video_data: @video_data)
+    log "Video uploaded successfully."
+  rescue StepClient::StepError => e
+    log "Warning: Failed to upload video: #{e.message}"
+  end
+  def cleanup_temp_dir
+    return unless @temp_dir && Dir.exist?(@temp_dir)
+    # Safety check: only delete if temp_dir is within expected base directory
+    base_tmp = File.join(SPECSAGE_HOME, 'tmp')
+    unless @temp_dir.start_with?(base_tmp + File::SEPARATOR)
+      log "Warning: Refusing to delete temp_dir outside expected location: #{@temp_dir}"
+      return
+    end
+    FileUtils.rm_rf(@temp_dir)
+    # Also clean up parent run directory if empty
+    run_dir = File.dirname(@temp_dir)
+    if Dir.exist?(run_dir) && Dir.empty?(run_dir) && run_dir.start_with?(base_tmp + File::SEPARATOR)
+      FileUtils.rmdir(run_dir)
+    end
+  rescue StandardError
+    # Ignore cleanup errors
+  end
+  # --- Safe path handling ---
+  # Build a safe temp directory path, validating that IDs are safe path segments
+  def build_safe_temp_dir
+    validate_path_segment!(@server_run_id, 'server_run_id')
+    validate_path_segment!(@scenario_id, 'scenario_id')
+    File.join(SPECSAGE_HOME, 'tmp', @server_run_id, @scenario_id)
+  end
+  # Validate that a string is safe to use as a filesystem path segment
+  # Raises if the value contains path traversal chars, slashes, or unsafe characters
+  def validate_path_segment!(value, name)
+    return if value.is_a?(String) && value.match?(SAFE_PATH_SEGMENT) && value.length <= 100
+    raise ArgumentError, "#{name} contains unsafe characters for filesystem path: #{value.inspect}. " \
+                         "Expected alphanumeric, underscore, or hyphen only."
+  end
+  # Send verdict to server for client-side errors (browser crash, etc.)
+  # Normal PASS/FAIL verdicts from the LLM are already recorded server-side
+  def send_client_verdict_if_needed(status, reason)
+    return unless @step_client
+    log "Sending client verdict to server: #{status}"
+    @step_client.set_verdict(
+      scenario_id: @scenario_id,
+      status: status,
+      reason: reason
+    )
+  rescue StepClient::StepError => e
+    log "Warning: Failed to send verdict to server: #{e.message}"
+  end
+  # --- Logging with credential protection ---
+  # Log with scenario name prefix for parallel execution clarity
+  def log(message)
+    guard_log_against_credential_leak(message)
+    puts "[#{@scenario_name}] #{message}"
+  end
+  # Safe logging that guards against credential leaks in dev/test
+  def safe_puts(message)
+    guard_log_against_credential_leak(message)
+    puts "[#{@scenario_name}] #{message}"
+  end
+  # Raise if a log message contains actual credential values
+  # Runs in ALL environments - production is where leaks matter most
+  def guard_log_against_credential_leak(message)
+    return if @credentials.nil? || @credentials.empty?
+    @credentials.each do |name, secret_value|
+      next if secret_value.nil? || secret_value.empty?
+      if message.to_s.include?(secret_value)
+        raise "SECURITY: Credential '#{name}' value leaked into log output. " \
+              "Use placeholders like <<#{name}>> in logs, not actual values."
+      end
+    end
+  end
+  # --- Credential substitution ---
+  # Substitutes <<CREDENTIAL_NAME>> placeholders in action fields.
+  # Placeholders can appear anywhere in the string (inline substitution).
+  # Only credentials in the allowlist (from server) can be substituted.
+  CREDENTIAL_PLACEHOLDER_PATTERN = /<<([A-Z][A-Z0-9_]*)>>/
+  # Special key combinations that should not be treated as credential placeholders
+  SPECIAL_KEY_COMBOS = %w[ctrl+a Ctrl+A].freeze
+  # Check if the value is a special key combo (e.g., ctrl+a)
+  def special_key_combo?(value)
+    SPECIAL_KEY_COMBOS.include?(value)
+  end
+  # Check if the value contains any credential placeholders
+  def contains_credential_placeholder?(value)
+    return false unless value.is_a?(String)
+    CREDENTIAL_PLACEHOLDER_PATTERN.match?(value)
+  end
+  # Substitute all credential placeholders in a string with their actual values
+  # Raises if any placeholder references a credential not in the allowlist
+  def substitute_credentials(value)
+    return value unless value.is_a?(String)
+    value.gsub(CREDENTIAL_PLACEHOLDER_PATTERN) do |match|
+      name = Regexp.last_match(1)
+      unless @credentials.key?(name)
+        raise "Credential '#{name}' not allowed for this scenario. " \
+              "Available credentials: #{@credentials.keys.join(', ').presence || 'none'}"
+      end
+      @credentials[name]
+    end
+  end
+end
+require 'open3'