RubyGems - openbolt - Versions diffs - 5.3.0 → 5.5.0 - Mend

openbolt 5.3.0 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/Puppetfile +6 -7
data/lib/bolt/bolt_option_parser.rb +63 -1
data/lib/bolt/cli.rb +1 -1
data/lib/bolt/config/options.rb +14 -0
data/lib/bolt/config/transport/choria.rb +74 -0
data/lib/bolt/config/transport/options.rb +108 -0
data/lib/bolt/executor.rb +2 -0
data/lib/bolt/pal/yaml_plan/transpiler.rb +1 -1
data/lib/bolt/plugin/puppetdb.rb +1 -1
data/lib/bolt/plugin.rb +1 -4
data/lib/bolt/puppetdb/config.rb +8 -0
data/lib/bolt/puppetdb/instance.rb +1 -0
data/lib/bolt/result_set.rb +1 -1
data/lib/bolt/transport/choria/agent_discovery.rb +137 -0
data/lib/bolt/transport/choria/bolt_tasks.rb +248 -0
data/lib/bolt/transport/choria/client.rb +281 -0
data/lib/bolt/transport/choria/command_builders.rb +199 -0
data/lib/bolt/transport/choria/helpers.rb +197 -0
data/lib/bolt/transport/choria/shell.rb +560 -0
data/lib/bolt/transport/choria.rb +218 -0
data/lib/bolt/transport/winrm/connection.rb +13 -3
data/lib/bolt/version.rb +1 -1
data/lib/mcollective/agent/shell.ddl +154 -0
metadata +35 -14
data/lib/bolt/plugin/puppet_connect_data.rb +0 -85
data/modules/puppet_connect/plans/test_input_data.pp +0 -94

data/lib/bolt/transport/choria/bolt_tasks.rb ADDED Viewed

@@ -0,0 +1,248 @@
+# frozen_string_literal: true
+module Bolt
+  module Transport
+    class Choria
+      # Run a task via the bolt_tasks agent. Groups targets by implementation
+      # to support mixed-platform batches. Starts all groups before polling any
+      # of them so tasks execute concurrently on nodes across implementations.
+      #
+      # @param targets [Array<Bolt::Target>] Targets that have the bolt_tasks agent
+      # @param task [Bolt::Task] Task to execute
+      # @param arguments [Hash] Task parameter names to values
+      # @param result_opts [Hash] Options passed through to emit_results (:action, :name, :position)
+      # @param callback [Proc] Called with :node_start and :node_result events
+      # @return [Array<Bolt::Result>] Results for all targets
+      def run_task_via_bolt_tasks(targets, task, arguments, result_opts, &callback)
+        logger.debug { "Running task #{task.name} via bolt_tasks agent on #{target_count(targets)}" }
+        results = []
+        # Start all implementation groups. Each gets its own download +
+        # run_no_wait sequence. Tasks begin executing on nodes as soon as
+        # run_no_wait returns.
+        started_groups = []
+        targets.group_by { |target| select_implementation(target, task) }.each do |implementation, impl_targets|
+          start_result = download_and_start_task(impl_targets, task, implementation,
+                                                 arguments, result_opts, &callback)
+          results += start_result[:failed_results]
+          started_groups << start_result if start_result[:task_id]
+        end
+        # Poll each group. Tasks are already running concurrently on nodes,
+        # so wall time is dominated by the longest task, not the sum.
+        # Each group has a different task_id, so they must be polled separately.
+        started_groups.each do |group|
+          output_by_target = poll_task_status(group[:targets], group[:task_id], task)
+          results += emit_results(output_by_target, **result_opts, &callback)
+        end
+        results
+      end
+      # Download task files from the server and start execution for one
+      # implementation group via bolt_tasks.download and bolt_tasks.run_no_wait.
+      #
+      # @param targets [Array<Bolt::Target>] Targets sharing the same implementation
+      # @param task [Bolt::Task] Task being executed
+      # @param implementation [Hash] Task implementation with 'path', 'name', 'input_method', 'files' keys
+      # @param arguments [Hash] Task parameter names to values
+      # @param result_opts [Hash] Options passed through to emit_results (:action, :name, :position)
+      # @param callback [Proc] Called with :node_start and :node_result events
+      # @return [Hash] with keys:
+      #   - :failed_results [Array<Bolt::Result>] Error results from setup phase
+      #   - :targets [Array<Bolt::Target>] Targets that successfully started
+      #   - :task_id [String, nil] Shared task ID for polling, nil if nothing started
+      def download_and_start_task(targets, task, implementation, arguments, result_opts, &callback)
+        environment = targets.first.options['puppet-environment']
+        input_method = implementation['input_method']
+        impl_files = [{ 'name' => File.basename(implementation['name']), 'path' => implementation['path'] }] +
+                     (implementation['files'] || [])
+        file_specs_json = impl_files.map { |file| task_file_spec(file, task.module_name, environment) }.to_json
+        # The failed_results reference will get updated and if we ever end up without
+        # any targets left to act on, we can return it immediately.
+        failed_results = []
+        none_started_result = { failed_results: failed_results, targets: [], task_id: nil }
+        # Download task files
+        logger.debug { "Downloading task #{task.name} files via bolt_tasks to #{target_count(targets)}" }
+        response = rpc_request('bolt_tasks', targets, 'bolt_tasks.download') do |client|
+          client.download(task: task.name, files: file_specs_json, environment: environment)
+        end
+        # The bolt_tasks agent uses reply.fail! with statuscode 1 for download
+        # failures, which rpc_request routes to :responded since statuscode 0-1
+        # means the action completed. Check rpc_statuscodes to catch these and
+        # report the download failure clearly instead of letting run_no_wait
+        # fail with a confusing "task not available" error.
+        dl_errors = response[:errors]
+        response[:rpc_statuscodes].each do |target, code|
+          next if code.zero? || dl_errors.key?(target)
+          dl_errors[target] = error_output(
+            "bolt_tasks.download on #{target.safe_name} failed to download task files",
+            'bolt/choria-download-failed'
+          )
+        end
+        # Must use concat rather than += to preserve reference to failed_results for early return
+        failed_results.concat(emit_results(dl_errors, **result_opts, &callback))
+        remaining = response[:responded].keys - dl_errors.keys
+        return none_started_result if remaining.empty?
+        # Start task execution
+        logger.debug { "Starting task #{task.name} on #{target_count(remaining)}" }
+        response = rpc_request('bolt_tasks', remaining, 'bolt_tasks.run_no_wait') do |client|
+          client.run_no_wait(task: task.name, input_method: input_method,
+                             files: file_specs_json, input: arguments.to_json)
+        end
+        failed_results.concat(emit_results(response[:errors], **result_opts, &callback))
+        return none_started_result if response[:responded].empty?
+        # Extract the shared task_id (all targets get the same one from
+        # the single run_no_wait call that fanned out to all of them)
+        task_id = response[:responded].values.first&.dig(:task_id)
+        unless task_id
+          no_id_errors = response[:responded].each_with_object({}) do |(target, _), errors|
+            errors[target] = error_output(
+              "bolt_tasks.run_no_wait on #{target.safe_name} succeeded but returned no task_id",
+              'bolt/choria-missing-task-id'
+            )
+          end
+          failed_results.concat(emit_results(no_id_errors, **result_opts, &callback))
+          return none_started_result
+        end
+        logger.debug { "Started task #{task.name} on #{target_count(response[:responded])}, task_id: #{task_id}" }
+        { failed_results: failed_results, targets: response[:responded].keys, task_id: task_id }
+      end
+      # Poll bolt_tasks.task_status until all targets complete or timeout.
+      #
+      # @param targets [Array<Bolt::Target>] Targets that were started successfully
+      # @param task_id [String] Shared task ID from run_no_wait
+      # @param task [Bolt::Task] Task being polled (used for timeout and error messages)
+      # @return [Hash{Bolt::Target => Hash}] Output hash for every target (success and error)
+      def poll_task_status(targets, task_id, task)
+        timeout = targets.first.options['task-timeout']
+        poll_result = poll_with_retries(targets, timeout, 'bolt_tasks.task_status') do |remaining|
+          response = rpc_request('bolt_tasks', remaining, 'bolt_tasks.task_status') do |client|
+            client.task_status(task_id: task_id)
+          end
+          next { rpc_failed: true, done: {} } if response[:rpc_failed]
+          done = response[:errors].dup
+          response[:responded].each do |target, data|
+            if data.nil?
+              done[target] = error_output(
+                "bolt_tasks.task_status on #{target.safe_name} returned success but no data",
+                'bolt/choria-missing-data'
+              )
+              next
+            end
+            next unless data[:completed]
+            done[target] = extract_task_output(data, target)
+          end
+          { rpc_failed: false, done: done }
+        end
+        remaining_errors = poll_result[:remaining].each_with_object({}) do |target, errors|
+          errors[target] =
+            if poll_result[:rpc_persistent_failure]
+              error_output("RPC requests to poll task status on #{target.safe_name} failed persistently",
+                           'bolt/choria-poll-failed')
+            else
+              error_output("Task #{task.name} timed out after #{timeout} seconds on #{target.safe_name}",
+                           'bolt/choria-task-timeout')
+            end
+        end
+        poll_result[:completed].merge(remaining_errors)
+      end
+      # Extract stdout, stderr, and exitcode from a bolt_tasks task_status response.
+      #
+      # @param data [Hash] Task_status response data with :stdout, :stderr, :exitcode keys
+      # @param target [Bolt::Target] Target for logging and stdout unwrapping context
+      # @return [Hash] Output hash from output() or error_output()
+      def extract_task_output(data, target)
+        exitcode = exitcode_from(data, target, 'task')
+        output(stdout: unwrap_bolt_tasks_stdout(data[:stdout]),
+               stderr: data[:stderr], exitcode: exitcode)
+      end
+      # Build a file spec hash for the bolt_tasks download action. Computes
+      # the Puppet Server file_content URI based on the file's module-relative path.
+      #
+      # @param file [Hash] With 'name' (module-relative path) and 'path' (local absolute path)
+      # @param module_name [String] Task's module name (used for simple task files)
+      # @param environment [String] Puppet environment name for the URI params
+      # @return [Hash] File spec with 'filename', 'sha256', 'size_bytes', and 'uri' keys
+      def task_file_spec(file, module_name, environment)
+        file_name = file['name']
+        validate_file_name!(file_name)
+        file_path = file['path']
+        parts = file_name.split('/', 3)
+        path = if parts.length == 3
+                 mod, subdir, rest = parts
+                 case subdir
+                 when 'files'
+                   "/puppet/v3/file_content/modules/#{mod}/#{rest}"
+                 when 'lib'
+                   "/puppet/v3/file_content/plugins/#{mod}/#{rest}"
+                 else
+                   "/puppet/v3/file_content/tasks/#{mod}/#{rest}"
+                 end
+               else
+                 "/puppet/v3/file_content/tasks/#{module_name}/#{file_name}"
+               end
+        {
+          'filename' => file_name,
+          'sha256' => Digest::SHA256.file(file_path).hexdigest,
+          'size_bytes' => File.size(file_path),
+          'uri' => {
+            'path' => path,
+            'params' => { 'environment' => environment }
+          }
+        }
+      end
+      # Fix double-encoding in the bolt_tasks agent's wrapper error path.
+      #
+      # Normally, create_task_stdout returns a Hash and reply_task_status
+      # calls .to_json on it, producing a single JSON string like:
+      #   '{"_output":"hello world"}'
+      #
+      # But for wrapper errors, create_task_stdout returns an already
+      # JSON-encoded String. reply_task_status still calls .to_json on
+      # it, encoding it a second time. The result is a JSON string whose
+      # value is itself a JSON string:
+      #   '"{\\"_error\\":{\\"kind\\":\\"choria.tasks/wrapper-error\\",...}}"'
+      #
+      # We parse one layer of JSON. In the normal case, that produces a
+      # Hash and we return the original string. In the double-encoded
+      # case, it produces a String (the inner JSON), which we return so
+      # Result.for_task can parse it.
+      #
+      # @param agent_stdout [String, nil] JSON-encoded stdout from the bolt_tasks agent
+      # @return [String, nil] JSON string suitable for Result.for_task
+      def unwrap_bolt_tasks_stdout(agent_stdout)
+        return agent_stdout unless agent_stdout.is_a?(String)
+        parsed = begin
+          JSON.parse(agent_stdout)
+        rescue JSON::ParserError
+          return agent_stdout
+        end
+        # Normal case: parsed is a Hash, return the original JSON string.
+        # Double-encoded case: parsed is a String (the inner JSON), return it.
+        parsed.is_a?(String) ? parsed : agent_stdout
+      end
+    end
+  end
+end

data/lib/bolt/transport/choria/client.rb ADDED Viewed

@@ -0,0 +1,281 @@
+# frozen_string_literal: true
+module Bolt
+  module Transport
+    class Choria
+      # Number of consecutive RPC poll failures before giving up and marking
+      # all remaining targets as failed. Used by both polling loops
+      # (poll_task_status and wait_for_shell_results).
+      RPC_FAILURE_RETRIES = 3
+      # One-time setup of the local MCollective client connection to the
+      # NATS broker. MCollective::Config.loadconfig must only be called
+      # once since it loads plugins via PluginManager.loadclass, and a
+      # second call raises "Plugin already loaded".
+      #
+      # The @client_configured flag is checked twice: once before taking
+      # the mutex (fast path to avoid lock overhead on every call after
+      # setup) and once inside (handles the race where two batch threads
+      # both see false simultaneously and try to configure concurrently).
+      #
+      # This function is idempotent, so it should be called before any
+      # operation that needs the client connection to ensure it is configured
+      # correctly.
+      #
+      # @param target [Bolt::Target] Any target in the batch (used to read transport options)
+      def configure_client(target)
+        return if @client_configured
+        @config_mutex.synchronize do
+          return if @client_configured
+          # If a previous attempt failed after partially initializing
+          # MCollective (e.g., plugins loaded but NATS connector failed),
+          # retrying loadconfig would hit "Plugin already loaded" errors.
+          # Re-raise the original error so the caller gets a clear message.
+          raise @config_error if @config_error
+          # We do the require here because this is a pretty meaty library, and
+          # no need to load it when OpenBolt starts up if the user isn't using
+          # the Choria transport.
+          require 'mcollective'
+          opts = target.options
+          config = MCollective::Config.instance
+          unless config.configured
+            config_file = opts['config-file'] || MCollective::Util.config_file_for_user
+            unless File.readable?(config_file)
+              msg = if opts['config-file']
+                      "Choria config file not found or not readable: #{config_file}"
+                    else
+                      "Could not find a readable Choria client config file. " \
+                      "Searched: #{MCollective::Util.config_paths_for_user.join(', ')}. " \
+                      "Set the 'config-file' option in the Choria transport configuration."
+                    end
+              raise Bolt::Error.new(msg, 'bolt/choria-config-not-found')
+            end
+            begin
+              config.loadconfig(config_file)
+            rescue StandardError => e
+              @config_error = Bolt::Error.new(
+                "Choria client configuration failed: #{e.class}: #{e.message}",
+                'bolt/choria-config-failed'
+              )
+              raise @config_error
+            end
+            logger.debug { "Loaded Choria client config from #{config_file}" }
+          end
+          if opts['mcollective-certname']
+            ENV['MCOLLECTIVE_CERTNAME'] = opts['mcollective-certname']
+            logger.debug { "MCOLLECTIVE_CERTNAME set to #{opts['mcollective-certname']}" }
+          end
+          if opts['brokers']
+            brokers = Array(opts['brokers']).map { |broker| broker.include?(':') ? broker : "#{broker}:4222" }
+            config.pluginconf['choria.middleware_hosts'] = brokers.join(',')
+            logger.debug { "Choria brokers overridden: #{brokers.join(', ')}" }
+          end
+          if opts['ssl-ca'] && opts['ssl-cert'] && opts['ssl-key']
+            unreadable = %w[ssl-ca ssl-cert ssl-key].find { |key| !File.readable?(opts[key]) }
+            if unreadable
+              raise Bolt::Error.new(
+                "File for #{unreadable} is not readable: #{opts[unreadable]}",
+                'bolt/choria-config-failed'
+              )
+            end
+            config.pluginconf['security.provider'] = 'file'
+            config.pluginconf['security.file.ca'] = opts['ssl-ca']
+            config.pluginconf['security.file.certificate'] = opts['ssl-cert']
+            config.pluginconf['security.file.key'] = opts['ssl-key']
+            logger.debug { "Using file-based TLS security provider with given SSL override(s)" }
+          end
+          @default_collective = config.main_collective
+          @client_configured = true
+        end
+      end
+      # Create an MCollective::RPC::Client for one or more targets.
+      # Accepts a single target or an array. Uses MCollective's direct
+      # addressing mode (client.discover(nodes:)) to skip broadcast
+      # discovery and send requests directly to the specified nodes.
+      #
+      # Note that when the client is created, if the shell agent isn't already
+      # installed on the OpenBolt controller node, then the shell DDL that we
+      # bundle with OpenBolt at lib/mcollective/agent/shell.ddl
+      # automatically gets loaded since it's on the $LOAD_PATH and in the
+      # right place for MCollective's plugin loading. The bolt_tasks
+      # DDL is already included in the choria-mcorpc-support gem.
+      #
+      # @param agent_name [String] MCollective agent name (e.g. 'shell', 'bolt_tasks')
+      # @param targets [Bolt::Target, Array<Bolt::Target>] One or more targets to address
+      # @param timeout [Numeric] RPC call timeout in seconds
+      # @return [MCollective::RPC::Client] Configured client with direct addressing enabled
+      def create_rpc_client(agent_name, targets, timeout)
+        targets = Array(targets)
+        options = MCollective::Util.default_options
+        options[:timeout] = timeout
+        options[:verbose] = false
+        options[:connection_timeout] = targets.first.options['broker-timeout']
+        collective = collective_for(targets.first)
+        options[:collective] = collective if collective
+        client = MCollective::RPC::Client.new(agent_name, options: options)
+        client.progress = false
+        identities = targets.map { |target| choria_identity(target) }.uniq
+        client.discover(nodes: identities)
+        client
+      end
+      # Make a batched RPC call and split results into responded and errors.
+      # Yields the RPC client so the caller specifies which action to invoke.
+      #
+      # Results are split based on MCollective RPC statuscodes:
+      # - statuscode 0: action completed successfully (:responded)
+      # - statuscode 1 (RPCAborted): action completed but reported a
+      #   problem (:responded). The data is preserved rather than
+      #   discarded because some agents (notably bolt_tasks) use
+      #   statuscode 1 for application-level failures where the
+      #   response data is still valid and meaningful (e.g., a task
+      #   that ran but exited non-zero). Callers must handle this
+      #   case and not assume :responded means success.
+      # - statuscode 2-5: RPC infrastructure error (:errors)
+      # - no response: target didn't reply (:errors)
+      # - exception: total RPC failure (rpc_failed: true)
+      #
+      # Serialized by @rpc_mutex because MCollective's NATS connector is a
+      # singleton with a shared receive queue. Concurrent RPC calls cause
+      # reply channel collisions, cross-thread message confusion, and subscription
+      # conflicts. See choria-transport-dev.md for the full explanation.
+      #
+      # @param agent [String] MCollective agent name (e.g. 'shell', 'bolt_tasks', 'rpcutil')
+      # @param targets [Bolt::Target, Array<Bolt::Target>] One or more targets to address
+      # @param context [String] Human-readable label for logging (e.g. 'shell.start')
+      # @yield [MCollective::RPC::Client] The configured RPC client to invoke an action on
+      # @return [Hash] with keys:
+      #   - :responded [Hash] Targets where the action completed (statuscode 0-1),
+      #     mapped to their response data
+      #   - :errors [Hash] Targets with RPC errors or no response, mapped to error output hashes
+      #   - :rpc_failed [Boolean] True when the entire RPC call failed
+      #   - :rpc_statuscodes [Hash] Per-target MCollective RPC statuscodes.
+      #     Includes all targets that responded (both :responded and :errors).
+      #     Not populated when rpc_failed is true (no individual responses).
+      def rpc_request(agent, targets, context)
+        targets = Array(targets)
+        rpc_results = @rpc_mutex.synchronize do
+          rpc_timeout = targets.first.options['rpc-timeout']
+          client = create_rpc_client(agent, targets, rpc_timeout)
+          yield(client)
+        end
+        by_sender = index_results_by_sender(rpc_results, targets, context)
+        responded = {}
+        errors = {}
+        rpc_statuscodes = {}
+        targets.each do |target|
+          rpc_result = by_sender[choria_identity(target)]
+          if rpc_result.nil?
+            errors[target] = error_output(
+              "No response from #{target.safe_name} for #{context}",
+              'bolt/choria-no-response'
+            )
+          elsif rpc_result[:statuscode] > 1
+            rpc_statuscodes[target] = rpc_result[:statuscode]
+            errors[target] = error_output(
+              "#{context} on #{target.safe_name} returned RPC error: " \
+              "#{rpc_result[:statusmsg]} (code #{rpc_result[:statuscode]})",
+              'bolt/choria-rpc-error'
+            )
+          else
+            rpc_statuscodes[target] = rpc_result[:statuscode]
+            if rpc_result[:statuscode] == 1
+              logger.warn { "#{context} on #{target.safe_name} had RPC status code #{rpc_result[:statuscode]}: #{rpc_result[:statusmsg]}" }
+            end
+            responded[target] = rpc_result[:data]
+          end
+        end
+        { responded: responded, errors: errors, rpc_failed: false, rpc_statuscodes: rpc_statuscodes }
+      rescue StandardError => e
+        raise if e.is_a?(Bolt::Error)
+        logger.warn { "#{context} RPC call failed: #{e.class}: #{e.message}" }
+        errors = targets.each_with_object({}) do |target, errs|
+          errs[target] = error_output("#{context} failed on #{target.safe_name}: #{e.message}",
+                                      'bolt/choria-rpc-failed')
+        end
+        { responded: {}, errors: errors, rpc_failed: true, rpc_statuscodes: {} }
+      end
+      # Configure the client, discover agents, partition targets by agent
+      # availability, and emit errors for incapable targets.
+      #
+      # @param targets [Array<Bolt::Target>] Targets to prepare
+      # @param agent_name [String] Required agent name (e.g. 'shell', 'bolt_tasks')
+      # @param result_opts [Hash] Options passed through to emit_results (:action, :name, :position)
+      # @param callback [Proc] Called with :node_start and :node_result events
+      # @return [Array] Two-element array:
+      #   - [Array<Bolt::Target>] Targets that have the required agent
+      #   - [Array<Bolt::Result>] Error results for targets that lack the agent
+      def prepare_targets(targets, agent_name, result_opts, &callback)
+        configure_client(targets.first)
+        discover_agents(targets)
+        capable, incapable = targets.partition { |target| has_agent?(target, agent_name) }
+        agent_errors = incapable.each_with_object({}) do |target, errors|
+          msg = if @agent_cache[choria_identity(target)].nil?
+                  "No agent information available for #{target.safe_name} (node did not respond to discovery)"
+                else
+                  "The '#{agent_name}' agent is not available on #{target.safe_name}."
+                end
+          errors[target] = error_output(msg, 'bolt/choria-agent-not-available')
+        end
+        incapable_results = emit_results(agent_errors, fire_node_start: true, **result_opts, &callback)
+        [capable, incapable_results]
+      end
+      # Index RPC results by sender, keeping only the first response per
+      # sender and only from the set of expected identities. Logs and discards
+      # responses from unexpected senders and duplicates.
+      #
+      # @param results [Array<Hash>] Raw MCollective RPC result hashes with :sender keys
+      # @param targets [Array<Bolt::Target>] Expected targets (used to build the allowed sender set)
+      # @param context [String] Human-readable label for log messages
+      # @return [Hash{String => Hash}] Sender identity to first valid RPC result hash
+      def index_results_by_sender(results, targets, context)
+        expected = targets.to_set { |target| choria_identity(target) }
+        by_sender = {}
+        results.each do |result|
+          sender = result[:sender]
+          unless sender
+            logger.warn { "Discarding #{context} response with nil sender" }
+            next
+          end
+          unless expected.include?(sender)
+            logger.warn { "Discarding #{context} response from unexpected sender '#{sender}'" }
+            next
+          end
+          if by_sender.key?(sender)
+            if result[:data] == by_sender[sender][:data]
+              logger.debug { "Ignoring duplicate #{context} response from #{sender}" }
+            else
+              logger.warn { "Ignoring duplicate #{context} response from #{sender} with different data" }
+            end
+            next
+          end
+          by_sender[sender] = result
+        end
+        by_sender
+      end
+    end
+  end
+end