RubyGems - ruby_reactor - Versions diffs - 0.2.0 → 0.3.1 - Mend

ruby_reactor 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +4 -4
data/README.md +132 -0
data/Rakefile +2 -2
data/documentation/data_pipelines.md +90 -84
data/documentation/testing.md +812 -0
data/lib/ruby_reactor/configuration.rb +1 -1
data/lib/ruby_reactor/context.rb +13 -5
data/lib/ruby_reactor/context_serializer.rb +70 -4
data/lib/ruby_reactor/dsl/map_builder.rb +6 -2
data/lib/ruby_reactor/dsl/reactor.rb +3 -2
data/lib/ruby_reactor/error/step_failure_error.rb +5 -2
data/lib/ruby_reactor/executor/result_handler.rb +9 -2
data/lib/ruby_reactor/executor/retry_manager.rb +26 -8
data/lib/ruby_reactor/executor/step_executor.rb +24 -99
data/lib/ruby_reactor/executor.rb +3 -13
data/lib/ruby_reactor/map/collector.rb +72 -33
data/lib/ruby_reactor/map/dispatcher.rb +162 -0
data/lib/ruby_reactor/map/element_executor.rb +103 -114
data/lib/ruby_reactor/map/execution.rb +18 -4
data/lib/ruby_reactor/map/helpers.rb +4 -3
data/lib/ruby_reactor/map/result_enumerator.rb +105 -0
data/lib/ruby_reactor/reactor.rb +174 -16
data/lib/ruby_reactor/rspec/helpers.rb +17 -0
data/lib/ruby_reactor/rspec/matchers.rb +256 -0
data/lib/ruby_reactor/rspec/step_executor_patch.rb +85 -0
data/lib/ruby_reactor/rspec/test_subject.rb +625 -0
data/lib/ruby_reactor/rspec.rb +18 -0
data/lib/ruby_reactor/{async_router.rb → sidekiq_adapter.rb} +15 -10
data/lib/ruby_reactor/sidekiq_workers/worker.rb +1 -3
data/lib/ruby_reactor/step/compose_step.rb +0 -1
data/lib/ruby_reactor/step/map_step.rb +52 -27
data/lib/ruby_reactor/storage/redis_adapter.rb +59 -0
data/lib/ruby_reactor/template/dynamic_source.rb +32 -0
data/lib/ruby_reactor/version.rb +1 -1
data/lib/ruby_reactor/web/api.rb +32 -24
data/lib/ruby_reactor.rb +70 -10
metadata +12 -3

data/lib/ruby_reactor/map/dispatcher.rb ADDED Viewed

@@ -0,0 +1,162 @@
+# frozen_string_literal: true
+module RubyReactor
+  module Map
+    class Dispatcher
+      extend Helpers
+      def self.perform(arguments)
+        arguments = arguments.transform_keys(&:to_sym)
+        parent_reactor_class_name = arguments[:parent_reactor_class_name]
+        storage = RubyReactor.configuration.storage_adapter
+        # Load parent context to resolve source
+        parent_context = load_parent_context_from_storage(
+          arguments[:parent_context_id],
+          parent_reactor_class_name,
+          storage
+        )
+        # Initialize metadata if first run
+        initialize_map_metadata(arguments, storage) unless arguments[:continuation]
+        # Resolve Source
+        # We need to resolve the source to know what we are iterating.
+        # Strict "Array Only" rule means we expect an Array-like object or we handle the
+        # "Query Builder" result if user used it.
+        source = resolve_source(arguments, parent_context)
+        # Dispatch next batch
+        dispatch_batch(source, arguments, parent_context, storage)
+      end
+      def self.initialize_map_metadata(arguments, storage)
+        map_id = arguments[:map_id]
+        reactor_class_name = arguments[:parent_reactor_class_name]
+        # Reset or set initial offset. Use NX to act as a mutex/guard against duplicate initialization.
+        storage.set_map_offset_if_not_exists(map_id, 0, reactor_class_name)
+      end
+      def self.resolve_source(arguments, context)
+        # Arguments has :source which is a Template::Input or similar.
+        # We need to resolve it against the context.
+        source_template = arguments[:source]
+        # Fallback: look up from step config if missing (e.g. called from ElementExecutor)
+        if source_template.nil? && context
+          step_name = arguments[:step_name]
+          step_config = context.reactor_class.steps[step_name.to_sym]
+          source_template = step_config.arguments[:source][:source]
+        end
+        # If source is packaged in arguments as a value (deserialized)
+        return source_template if source_template.is_a?(Array)
+        # Resolve template
+        return source_template.resolve(context) if source_template.respond_to?(:resolve)
+        source_template
+      end
+      def self.dispatch_batch(source, arguments, parent_context, storage)
+        map_id = arguments[:map_id]
+        reactor_class_name = arguments[:parent_reactor_class_name]
+        # Fail Fast Check
+        if arguments[:fail_fast]
+          failed_context_id = storage.retrieve_map_failed_context_id(map_id, reactor_class_name)
+          return if failed_context_id
+        end
+        batch_size = arguments[:batch_size] || source.size # Default to all if no batch_size (async=true only)
+        # Atomically reserve a batch
+        new_offset = storage.increment_map_offset(map_id, batch_size, reactor_class_name)
+        current_offset = new_offset - batch_size
+        batch_elements = if source.is_a?(Array)
+                           source.slice(current_offset, batch_size) || []
+                         elsif source.respond_to?(:offset) && source.respond_to?(:limit)
+                           # Optimized for ActiveRecord and similar query builders
+                           source.offset(current_offset).limit(batch_size).to_a
+                         else
+                           # Fallback for generic Enumerable
+                           # This is inefficient for huge sets if not Array, but compliant
+                           source.drop(current_offset).take(batch_size)
+                         end
+        return if batch_elements.empty?
+        # Queue Jobs
+        queue_options = {
+          map_id: map_id,
+          arguments: arguments,
+          context: parent_context,
+          reactor_class_info: resolve_reactor_class_info(arguments, parent_context),
+          step_name: arguments[:step_name]
+        }
+        batch_elements.each_with_index do |element, i|
+          absolute_index = current_offset + i
+          queue_element_job(element, absolute_index, queue_options)
+        end
+      end
+      def self.queue_element_job(element, index, options)
+        arguments = options[:arguments]
+        context = options[:context]
+        # Resolve mappings
+        mappings_template = arguments[:argument_mappings]
+        # Fallback: look up from step config if missing (e.g. called from ElementExecutor)
+        if mappings_template.nil? && context
+          step_name = options[:step_name] || arguments[:step_name]
+          step_config = context.reactor_class.steps[step_name.to_sym]
+          mappings_template = step_config.arguments[:argument_mappings]
+        end
+        mappings = if mappings_template.respond_to?(:resolve)
+                     mappings_template.resolve(context)
+                   else
+                     mappings_template || {}
+                   end
+        # Fix for weird structure observed in fallback (wrapped in :source -> Template::Value)
+        if mappings.key?(:source) && mappings[:source].respond_to?(:value) && mappings[:source].value.is_a?(Hash)
+          mappings = mappings[:source].value
+        end
+        mapped_inputs = build_element_inputs(mappings, context, element)
+        serialized_inputs = ContextSerializer.serialize_value(mapped_inputs)
+        RubyReactor.configuration.async_router.perform_map_element_async(
+          map_id: options[:map_id],
+          element_id: "#{options[:map_id]}:#{index}",
+          index: index,
+          serialized_inputs: serialized_inputs,
+          reactor_class_info: options[:reactor_class_info],
+          strict_ordering: arguments[:strict_ordering],
+          parent_context_id: context.context_id,
+          parent_reactor_class_name: context.reactor_class.name,
+          step_name: options[:step_name].to_s,
+          batch_size: arguments[:batch_size], # Passed to worker so it knows to trigger next batch?
+          fail_fast: arguments[:fail_fast]
+        )
+      end
+      def self.resolve_reactor_class_info(arguments, context)
+        mapped_reactor_class = arguments[:mapped_reactor_class]
+        step_name = arguments[:step_name]
+        if mapped_reactor_class.respond_to?(:name)
+          { "type" => "class", "name" => mapped_reactor_class.name }
+        else
+          { "type" => "inline", "parent" => context.reactor_class.name, "step" => step_name.to_s }
+        end
+      end
+    end
+  end
+end

data/lib/ruby_reactor/map/element_executor.rb CHANGED Viewed

@@ -5,153 +5,142 @@ module RubyReactor
     class ElementExecutor
       extend Helpers
-      # rubocop:disable Metrics/MethodLength
       def self.perform(arguments)
         arguments = arguments.transform_keys(&:to_sym)
-        map_id = arguments[:map_id]
-        _element_id = arguments[:element_id]
-        index = arguments[:index]
-        serialized_inputs = arguments[:serialized_inputs]
-        reactor_class_info = arguments[:reactor_class_info]
-        strict_ordering = arguments[:strict_ordering]
-        parent_context_id = arguments[:parent_context_id]
-        parent_reactor_class_name = arguments[:parent_reactor_class_name]
-        step_name = arguments[:step_name]
-        batch_size = arguments[:batch_size]
-        # rubocop:enable Metrics/MethodLength
-        serialized_context = arguments[:serialized_context]
-        if serialized_context
-          context = ContextSerializer.deserialize(serialized_context)
+        context = hydrate_or_create_context(arguments)
+        storage = RubyReactor.configuration.storage_adapter
+        storage.store_map_element_context_id(arguments[:map_id], context.context_id,
+                                             arguments[:parent_reactor_class_name])
+        return if check_fail_fast?(arguments, storage)
+        executor = Executor.new(context.reactor_class, {}, context)
+        arguments[:serialized_context] ? executor.resume_execution : executor.execute
+        handle_result(executor.result, arguments, context, storage, executor)
+        finalize_execution(arguments, storage)
+      end
+      def self.load_parent_context(arguments, reactor_class_name, storage)
+        parent_context_data = storage.retrieve_context(arguments[:parent_context_id], reactor_class_name)
+        parent_reactor_class = Object.const_get(reactor_class_name)
+        parent_context = Context.new(
+          ContextSerializer.deserialize_value(parent_context_data["inputs"]),
+          parent_reactor_class
+        )
+        parent_context.context_id = arguments[:parent_context_id]
+        parent_context
+      end
+      # Legacy helpers resolved_next_element, build_serialized_inputs, queue_element_job
+      # are REMOVED as they are no longer used for self-queuing.
+      # Basic helper to build inputs for the CURRENT element (still needed for perform)
+      # Wait, perform uses `serialized_inputs` passed to it.
+      # We don't need `build_element_inputs` here?
+      # `perform` uses `params[:serialized_inputs]`.
+      # So we can remove input building helpers too?
+      # Let's check if they are used elsewhere.
+      # `resolve_reactor_class` is used in `perform`.
+      # `build_element_inputs` is likely in Helpers or mixed in?
+      # rubocop:disable Style/IdenticalConditionalBranches
+      def self.hydrate_or_create_context(arguments)
+        if arguments[:serialized_context]
+          context = ContextSerializer.deserialize(arguments[:serialized_context])
           context.map_metadata = arguments
-          reactor_class = context.reactor_class
-        else
-          # Deserialize inputs
-          inputs = ContextSerializer.deserialize_value(serialized_inputs)
-          # Resolve reactor class
-          reactor_class = resolve_reactor_class(reactor_class_info)
+          if context.inputs.empty? && arguments[:serialized_inputs]
+            context.inputs = ContextSerializer.deserialize_value(arguments[:serialized_inputs])
+          end
+          context
+        else
+          inputs = ContextSerializer.deserialize_value(arguments[:serialized_inputs])
+          reactor_class = resolve_reactor_class(arguments[:reactor_class_info])
-          # Create context
           context = Context.new(inputs, reactor_class)
-          context.parent_context_id = parent_context_id
+          context.parent_context_id = arguments[:parent_context_id]
           context.map_metadata = arguments
+          context
         end
+      end
+      # rubocop:enable Style/IdenticalConditionalBranches
-        storage = RubyReactor.configuration.storage_adapter
-        storage.store_map_element_context_id(map_id, context.context_id, parent_reactor_class_name)
-        # Execute
-        executor = Executor.new(reactor_class, {}, context)
+      def self.check_fail_fast?(arguments, storage)
+        return false unless arguments[:fail_fast]
-        if serialized_context
-          executor.resume_execution
-        else
-          executor.execute
-        end
+        map_id = arguments[:map_id]
+        parent_reactor_class_name = arguments[:parent_reactor_class_name]
-        result = executor.result
+        failed_context_id = storage.retrieve_map_failed_context_id(map_id, parent_reactor_class_name)
+        return false unless failed_context_id
-        if result.is_a?(RetryQueuedResult)
-          queue_next_batch(arguments) if batch_size
-          return
-        end
+        # Skip execution
+        finalize_execution(arguments, storage)
+        true
+      end
-        # Store result
+      def self.handle_result(result, arguments, context, storage, executor)
+        return if result.is_a?(RetryQueuedResult)
-        # Store result
+        map_id = arguments[:map_id]
+        index = arguments[:index]
+        parent_class = arguments[:parent_reactor_class_name] # Using short name for variable
         if result.success?
-          storage.store_map_result(map_id, index, result.value, parent_reactor_class_name,
-                                   strict_ordering: strict_ordering)
+          storage.store_map_result(map_id, index, ContextSerializer.serialize_value(result.value),
+                                   parent_class, strict_ordering: arguments[:strict_ordering])
         else
-          # Store error
-          storage.store_map_result(map_id, index, { _error: result.error }, parent_reactor_class_name,
-                                   strict_ordering: strict_ordering)
+          executor.undo_all
+          storage.store_map_result(map_id, index, { _error: result.error }, parent_class,
+                                   strict_ordering: arguments[:strict_ordering])
+          if arguments[:fail_fast]
+            storage.store_map_failed_context_id(map_id, context.context_id, parent_class)
+            # FAST FAIL: Trigger Collector immediately to cancel/fail the map execution
+            RubyReactor.configuration.async_router.perform_map_collection_async(
+              parent_context_id: arguments[:parent_context_id],
+              map_id: map_id,
+              parent_reactor_class_name: parent_class,
+              step_name: arguments[:step_name],
+              strict_ordering: arguments[:strict_ordering],
+              timeout: 3600
+            )
+          end
         end
+      end
-        # Decrement counter
-        new_count = storage.decrement_map_counter(map_id, parent_reactor_class_name)
+      def self.finalize_execution(arguments, storage)
+        map_id = arguments[:map_id]
+        parent_class = arguments[:parent_reactor_class_name]
-        queue_next_batch(arguments) if batch_size
+        new_count = storage.decrement_map_counter(map_id, parent_class)
+        trigger_next_batch_if_needed(arguments, arguments[:index], arguments[:batch_size])
         return unless new_count.zero?
-        # Trigger collection
         RubyReactor.configuration.async_router.perform_map_collection_async(
-          parent_context_id: parent_context_id,
+          parent_context_id: arguments[:parent_context_id],
           map_id: map_id,
-          parent_reactor_class_name: parent_reactor_class_name,
-          step_name: step_name,
-          strict_ordering: strict_ordering,
+          parent_reactor_class_name: parent_class,
+          step_name: arguments[:step_name],
+          strict_ordering: arguments[:strict_ordering],
           timeout: 3600
         )
       end
-      def self.queue_next_batch(arguments)
-        storage = RubyReactor.configuration.storage_adapter
-        map_id = arguments[:map_id]
-        reactor_class_name = arguments[:parent_reactor_class_name]
-        next_index = storage.increment_last_queued_index(map_id, reactor_class_name)
-        total_count = storage.retrieve_map_metadata(map_id, reactor_class_name)["count"]
-        return unless next_index < total_count
-        parent_context = load_parent_context(arguments, reactor_class_name, storage)
-        element = resolve_next_element(arguments, parent_context, next_index)
-        serialized_inputs = build_serialized_inputs(arguments, parent_context, element)
+      def self.trigger_next_batch_if_needed(arguments, index, batch_size)
+        return unless batch_size && ((index + 1) % batch_size).zero?
-        queue_element_job(arguments, map_id, next_index, serialized_inputs, reactor_class_name)
+        # Trigger Dispatcher for next batch
+        next_batch_args = arguments.dup
+        # Ensure we don't carry over temporary execution flags if any
+        next_batch_args[:continuation] = true
+        RubyReactor::Map::Dispatcher.perform(next_batch_args)
       end
-      def self.load_parent_context(arguments, reactor_class_name, storage)
-        parent_context_data = storage.retrieve_context(arguments[:parent_context_id], reactor_class_name)
-        parent_reactor_class = Object.const_get(reactor_class_name)
-        parent_context = Context.new(
-          ContextSerializer.deserialize_value(parent_context_data["inputs"]),
-          parent_reactor_class
-        )
-        parent_context.context_id = arguments[:parent_context_id]
-        parent_context
-      end
-      def self.resolve_next_element(arguments, parent_context, next_index)
-        parent_reactor_class = parent_context.reactor_class
-        step_config = parent_reactor_class.steps[arguments[:step_name].to_sym]
-        source_template = step_config.arguments[:source][:source]
-        source = source_template.resolve(parent_context)
-        source[next_index]
-      end
-      def self.build_serialized_inputs(arguments, parent_context, element)
-        parent_reactor_class = parent_context.reactor_class
-        step_config = parent_reactor_class.steps[arguments[:step_name].to_sym]
-        mappings_template = step_config.arguments[:argument_mappings][:source]
-        mappings = mappings_template.resolve(parent_context) || {}
-        mapped_inputs = build_element_inputs(mappings, parent_context, element)
-        ContextSerializer.serialize_value(mapped_inputs)
-      end
-      def self.queue_element_job(arguments, map_id, next_index, serialized_inputs, reactor_class_name)
-        RubyReactor.configuration.async_router.perform_map_element_async(
-          map_id: map_id,
-          element_id: "#{map_id}:#{next_index}",
-          index: next_index,
-          serialized_inputs: serialized_inputs,
-          reactor_class_info: arguments[:reactor_class_info],
-          strict_ordering: arguments[:strict_ordering],
-          parent_context_id: arguments[:parent_context_id],
-          parent_reactor_class_name: reactor_class_name,
-          step_name: arguments[:step_name],
-          batch_size: arguments[:batch_size]
-        )
-      end
-      private_class_method :queue_next_batch, :load_parent_context,
-                           :resolve_next_element, :build_serialized_inputs, :queue_element_job
+      private_class_method :load_parent_context, :trigger_next_batch_if_needed
     end
   end
 end

data/lib/ruby_reactor/map/execution.rb CHANGED Viewed

@@ -21,7 +21,8 @@ module RubyReactor
           storage_options: {
             map_id: arguments[:map_id], storage: storage,
             parent_reactor_class_name: arguments[:parent_reactor_class_name],
-            strict_ordering: arguments[:strict_ordering]
+            strict_ordering: arguments[:strict_ordering],
+            fail_fast: arguments[:fail_fast]
           }
         )
@@ -30,7 +31,14 @@ module RubyReactor
       end
       def self.execute_all_elements(source:, mappings:, reactor_class:, parent_context:, storage_options:)
+        # rubocop:disable Metrics/BlockLength
         source.map.with_index do |element, index|
+          if storage_options[:fail_fast]
+            failed_context_id = storage_options[:storage].retrieve_map_failed_context_id(
+              storage_options[:map_id], storage_options[:parent_reactor_class_name]
+            )
+            next if failed_context_id
+          end
           element_inputs = build_element_inputs(mappings, parent_context, element)
           # Manually create and link context to ensure parent_context_id is set
@@ -56,21 +64,27 @@ module RubyReactor
           store_result(result, index, storage_options)
+          if result.failure? && storage_options[:fail_fast]
+            storage_options[:storage].store_map_failed_context_id(
+              storage_options[:map_id], child_context.context_id, storage_options[:parent_reactor_class_name]
+            )
+          end
           result
-        end
+        end.compact
+        # rubocop:enable Metrics/BlockLength
       end
       def self.link_contexts(child_context, parent_context)
         child_context.parent_context = parent_context
         child_context.root_context = parent_context.root_context || parent_context
-        child_context.test_mode = parent_context.test_mode
         child_context.inline_async_execution = parent_context.inline_async_execution
       end
       def self.store_result(result, index, options)
         value = result.success? ? result.value : { _error: result.error }
         options[:storage].store_map_result(
-          options[:map_id], index, value, options[:parent_reactor_class_name],
+          options[:map_id], index, ContextSerializer.serialize_value(value), options[:parent_reactor_class_name],
           strict_ordering: options[:strict_ordering]
         )
       end

data/lib/ruby_reactor/map/helpers.rb CHANGED Viewed

@@ -54,16 +54,17 @@ module RubyReactor
       # Resumes parent reactor execution after map completion
       def resume_parent_execution(parent_context, step_name, final_result, storage)
         executor = RubyReactor::Executor.new(parent_context.reactor_class, {}, parent_context)
+        step_name_sym = step_name.to_sym
         if final_result.failure?
-          step_name_sym = step_name.to_sym
           parent_context.current_step = step_name_sym
           error = RubyReactor::Error::StepFailureError.new(
             final_result.error,
             step: step_name_sym,
             context: parent_context,
-            original_error: final_result.error.is_a?(Exception) ? final_result.error : nil
+            original_error: final_result.error.is_a?(Exception) ? final_result.error : nil,
+            exception_class: final_result.respond_to?(:exception_class) ? final_result.exception_class : nil
           )
           # Pass backtrace if available
@@ -77,7 +78,7 @@ module RubyReactor
           # Manually update context status since we're not running executor loop
           executor.send(:update_context_status, failure_response)
         else
-          parent_context.set_result(step_name.to_sym, final_result.value)
+          parent_context.set_result(step_name_sym, final_result.value)
           # Manually update execution trace to reflect completion
           # This is necessary because resume_execution continues from the NEXT step

data/lib/ruby_reactor/map/result_enumerator.rb ADDED Viewed

@@ -0,0 +1,105 @@
+# frozen_string_literal: true
+module RubyReactor
+  module Map
+    class ResultEnumerator
+      include Enumerable
+      DEFAULT_BATCH_SIZE = 1000
+      attr_reader :map_id, :reactor_class_name, :strict_ordering, :batch_size
+      def initialize(map_id, reactor_class_name, strict_ordering: true, batch_size: DEFAULT_BATCH_SIZE)
+        @map_id = map_id
+        @reactor_class_name = reactor_class_name
+        @strict_ordering = strict_ordering
+        @batch_size = batch_size
+        @storage = RubyReactor.configuration.storage_adapter
+      end
+      def each
+        return enum_for(:each) unless block_given?
+        if @strict_ordering
+          count.times do |i|
+            yield self[i]
+          end
+        else
+          offset = 0
+          loop do
+            results = @storage.retrieve_map_results_batch(
+              @map_id,
+              @reactor_class_name,
+              offset: offset,
+              limit: @batch_size,
+              strict_ordering: @strict_ordering
+            )
+            break if results.empty?
+            results.each { |result| yield wrap_result(result) }
+            offset += results.size
+            break if results.size < @batch_size
+          end
+        end
+      end
+      def count
+        @count ||= @storage.count_map_results(@map_id, @reactor_class_name)
+      end
+      alias size count
+      alias length count
+      def empty?
+        count.zero?
+      end
+      def any?
+        !empty?
+      end
+      def [](index)
+        return nil if index.negative? || index >= count
+        results = @storage.retrieve_map_results_batch(
+          @map_id,
+          @reactor_class_name,
+          offset: index,
+          limit: 1,
+          strict_ordering: @strict_ordering
+        )
+        return nil if results.empty?
+        wrap_result(results.first)
+      end
+      def first
+        self[0]
+      end
+      def last
+        self[count - 1]
+      end
+      def successes
+        lazy.select { |result| result.is_a?(RubyReactor::Success) }.map(&:value)
+      end
+      def failures
+        lazy.select { |result| result.is_a?(RubyReactor::Failure) }.map(&:error)
+      end
+      private
+      def wrap_result(result)
+        if result.is_a?(Hash) && result.key?("_error")
+          RubyReactor::Failure.new(result["_error"])
+        else
+          RubyReactor::Success.new(ContextSerializer.deserialize_value(result))
+        end
+      end
+    end
+  end
+end