RubyGems - fractor - Versions diffs - 0.1.0 → 0.1.1 - Mend

fractor 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +4 -4
data/.rubocop.yml +2 -0
data/.rubocop_todo.yml +82 -0
data/README.adoc +281 -41
data/examples/hierarchical_hasher/README.adoc +75 -0
data/examples/hierarchical_hasher/hierarchical_hasher.rb +150 -0
data/examples/multi_work_type/README.adoc +45 -0
data/examples/multi_work_type/multi_work_type.rb +319 -0
data/examples/pipeline_processing/README.adoc +44 -0
data/examples/pipeline_processing/pipeline_processing.rb +216 -0
data/examples/producer_subscriber/README.adoc +92 -0
data/examples/producer_subscriber/producer_subscriber.rb +256 -0
data/examples/scatter_gather/README.adoc +43 -0
data/examples/scatter_gather/scatter_gather.rb +327 -0
data/examples/simple/sample.rb +101 -0
data/examples/specialized_workers/README.adoc +45 -0
data/examples/specialized_workers/specialized_workers.rb +395 -0
data/lib/fractor/result_aggregator.rb +10 -1
data/lib/fractor/supervisor.rb +167 -70
data/lib/fractor/version.rb +1 -1
data/lib/fractor.rb +7 -9
metadata +16 -5
data/examples/hierarchical_hasher.rb +0 -158
data/examples/producer_subscriber.rb +0 -300
data/sample.rb +0 -64

data/examples/pipeline_processing/pipeline_processing.rb ADDED Viewed

@@ -0,0 +1,216 @@
+# frozen_string_literal: true
+require_relative "../../lib/fractor"
+module PipelineProcessing
+  # Work that carries both the data and stage information
+  class MediaWork < Fractor::Work
+    def initialize(data, stage = :resize, metadata = {})
+      super({
+        data: data,
+        stage: stage,
+        metadata: metadata
+      })
+    end
+    def data
+      input[:data]
+    end
+    def stage
+      input[:stage]
+    end
+    def metadata
+      input[:metadata]
+    end
+    def to_s
+      "MediaWork: stage=#{stage}, metadata=#{metadata}, data_size=#{begin
+        data.bytesize
+      rescue StandardError
+        "unknown"
+      end}"
+    end
+  end
+  # Worker for all pipeline stages
+  class PipelineWorker < Fractor::Worker
+    def process(work)
+      # Process based on the stage
+      result = case work.stage
+               when :resize then process_resize(work)
+               when :filter then process_filter(work)
+               when :compress then process_compress(work)
+               when :tag then process_tag(work)
+               else
+                 return Fractor::WorkResult.new(
+                   error: "Unknown stage: #{work.stage}",
+                   work: work
+                 )
+               end
+      # Determine the next stage
+      stages = %i[resize filter compress tag]
+      current_index = stages.index(work.stage)
+      next_stage = current_index < stages.size - 1 ? stages[current_index + 1] : nil
+      # Update metadata with processing information
+      updated_metadata = work.metadata.merge(
+        "#{work.stage}_completed" => true,
+        "#{work.stage}_time" => Time.now.to_s
+      )
+      # Return the result with next stage information
+      Fractor::WorkResult.new(
+        result: {
+          processed_data: result,
+          current_stage: work.stage,
+          next_stage: next_stage,
+          metadata: updated_metadata
+        },
+        work: work
+      )
+    end
+    private
+    def process_resize(work)
+      sleep(rand(0.01..0.05)) # Simulate processing time
+      "Resized image: #{work.data} (#{rand(800..1200)}x#{rand(600..900)})"
+    end
+    def process_filter(work)
+      sleep(rand(0.01..0.05)) # Simulate processing time
+      filters = %w[sepia grayscale vibrance contrast]
+      "Applied #{filters.sample} filter to: #{work.data}"
+    end
+    def process_compress(work)
+      sleep(rand(0.01..0.05)) # Simulate processing time
+      "Compressed image: #{work.data} (reduced by #{rand(30..70)}%)"
+    end
+    def process_tag(work)
+      sleep(rand(0.01..0.05)) # Simulate processing time
+      tags = %w[landscape portrait nature urban abstract]
+      selected_tags = tags.sample(rand(1..3))
+      "Tagged image: #{work.data} (tags: #{selected_tags.join(", ")})"
+    end
+  end
+  # Controller class that manages the pipeline flow
+  class MediaPipeline
+    attr_reader :results
+    def initialize(worker_count = 4)
+      @supervisor = Fractor::Supervisor.new(
+        worker_pools: [
+          { worker_class: PipelineWorker, num_workers: worker_count }
+        ]
+      )
+      # Register callback to handle pipeline stage transitions
+      @supervisor.results.on_new_result do |result|
+        next_stage = result.result[:next_stage]
+        if next_stage
+          # Create new work for the next stage
+          new_work = MediaWork.new(
+            result.result[:processed_data],
+            next_stage,
+            result.result[:metadata]
+          )
+          @supervisor.add_work_item(new_work)
+        end
+      end
+      @results = {
+        completed: [],
+        in_progress: []
+      }
+    end
+    def process_images(images)
+      # Create initial work items for the first stage (resize)
+      initial_work_items = images.map do |image|
+        MediaWork.new(
+          image,
+          :resize,
+          { original_filename: image, started_at: Time.now.to_s }
+        )
+      end
+      # Add the work items and run the pipeline
+      @supervisor.add_work_items(initial_work_items)
+      @supervisor.run
+      # Analyze results - collect completed ones (those that reached the final stage)
+      @supervisor.results.results.each do |result|
+        if result.result[:next_stage].nil?
+          @results[:completed] << result.result
+        else
+          @results[:in_progress] << result.result
+        end
+      end
+      # Return summary
+      {
+        total_images: images.size,
+        completed: @results[:completed].size,
+        in_progress: @results[:in_progress].size,
+        results: @results[:completed]
+      }
+    end
+  end
+end
+# Example usage
+if __FILE__ == $PROGRAM_NAME
+  puts "Starting Pipeline Processing Example"
+  puts "====================================="
+  puts "This example demonstrates a media processing pipeline with multiple stages:"
+  puts "1. Resize - Adjusts image dimensions"
+  puts "2. Filter - Applies visual filters"
+  puts "3. Compress - Optimizes file size"
+  puts "4. Tag - Analyzes and adds metadata tags"
+  puts
+  # Simulate some images to process
+  images = [
+    "sunset.jpg",
+    "mountains.png",
+    "beach.jpg",
+    "city_skyline.jpg",
+    "forest.png"
+  ]
+  worker_count = 4
+  puts "Processing #{images.size} images with #{worker_count} workers..."
+  puts
+  start_time = Time.now
+  pipeline = PipelineProcessing::MediaPipeline.new(worker_count)
+  result = pipeline.process_images(images)
+  end_time = Time.now
+  puts "Pipeline Results:"
+  puts "----------------"
+  puts "Total images: #{result[:total_images]}"
+  puts "Completed: #{result[:completed]}"
+  puts "In progress: #{result[:in_progress]}"
+  puts
+  puts "Processed Images:"
+  result[:results].each_with_index do |image_result, index|
+    puts "Image #{index + 1}: #{image_result[:processed_data]}"
+    puts "  Processing path:"
+    image_result[:metadata].each do |key, value|
+      next unless key.to_s.end_with?("_completed") || key.to_s.end_with?("_time")
+      puts "    #{key}: #{value}"
+    end
+    puts
+  end
+  puts "Processing completed in #{end_time - start_time} seconds"
+end

data/examples/producer_subscriber/README.adoc ADDED Viewed

@@ -0,0 +1,92 @@
+= Producer-Subscriber Example
+:toc: macro
+:toc-title: Table of Contents
+:toclevels: 3
+toc::[]
+== Overview
+The Producer-Subscriber example demonstrates how to use the Fractor framework to implement a multi-stage document processing system. This example showcases how initial work can generate additional sub-work items, creating a hierarchical processing pattern.
+This example is particularly useful for:
+* Implementing producer-consumer patterns in parallel systems
+* Managing dependencies between work items
+* Building hierarchical result structures from parallel processing
+== Implementation Details
+The example consists of the following key components:
+=== InitialWork
+A subclass of `Fractor::Work` that represents a document to be processed. Each `InitialWork` instance contains:
+* The document data
+* A depth level (always 0 for initial work)
+=== SubWork
+A subclass of `Fractor::Work` that represents a section of a document. Each `SubWork` instance contains:
+* The section data
+* A reference to its parent work via `parent_id`
+* A depth level (typically depth + 1 from its parent)
+=== MultiWorker
+A versatile worker that can process both `InitialWork` and `SubWork` instances:
+* For `InitialWork`: Processes the document and identifies sections
+* For `SubWork`: Processes individual sections
+=== DocumentProcessor
+The main orchestration class that:
+1. Creates a supervisor for initial document processing
+2. Analyzes results to identify additional work (sections)
+3. Creates a second supervisor for processing sections
+4. Builds a hierarchical result tree from both stages
+== Usage
+[source,ruby]
+----
+# Example document list
+documents = [
+  "Annual Report 2025",
+  "Technical Documentation",
+  "Research Paper"
+]
+# Process documents with 4 workers
+processor = ProducerSubscriber::DocumentProcessor.new(documents, 4)
+result = processor.process
+# Print the hierarchical results
+puts result
+----
+== How It Works
+1. Initial documents are added to the processing queue
+2. Each document is processed in parallel by workers
+3. For each document, multiple sub-sections are identified (3 per document)
+4. These sub-sections are then processed in a second phase
+5. Results from both phases are combined into a hierarchical tree structure
+6. The final output presents documents with their processed sections
+== Multi-stage Processing Pattern
+This example demonstrates a powerful pattern for parallel processing:
+1. *First Stage Processing*: Process high-level items and identify additional work
+2. *Work Generation*: Create new work items based on first-stage results
+3. *Second Stage Processing*: Process the generated work items
+4. *Result Aggregation*: Combine results from both stages into a cohesive structure
+== Object Identity and References
+Note that this example uses `object_id` to maintain references between parent and child work items. This approach allows building a hierarchical result structure when processing is complete.

data/examples/producer_subscriber/producer_subscriber.rb ADDED Viewed

@@ -0,0 +1,256 @@
+# frozen_string_literal: true
+require_relative "../../lib/fractor"
+module ProducerSubscriber
+  # Initial work that will generate sub-works
+  class InitialWork < Fractor::Work
+    def initialize(data, depth = 0)
+      super({
+        data: data,
+        depth: depth
+      })
+    end
+    def data
+      input[:data]
+    end
+    def depth
+      input[:depth]
+    end
+    def to_s
+      "InitialWork: data=#{data}, depth=#{depth}"
+    end
+  end
+  # Work that is generated from initial work
+  class SubWork < Fractor::Work
+    def initialize(data, parent_id = nil, depth = 0)
+      super({
+        data: data,
+        parent_id: parent_id,
+        depth: depth
+      })
+    end
+    def data
+      input[:data]
+    end
+    def parent_id
+      input[:parent_id]
+    end
+    def depth
+      input[:depth]
+    end
+    def to_s
+      "SubWork: data=#{data}, parent_id=#{parent_id}, depth=#{depth}"
+    end
+  end
+  # Worker that processes both types of work
+  class MultiWorker < Fractor::Worker
+    def process(work)
+      # Handle different work types based on class
+      if work.is_a?(InitialWork)
+        process_initial_work(work)
+      elsif work.is_a?(SubWork)
+        process_sub_work(work)
+      else
+        Fractor::WorkResult.new(
+          error: "Unknown work type: #{work.class}",
+          work: work
+        )
+      end
+    end
+    private
+    def process_initial_work(work)
+      # Simulate processing time
+      sleep(rand(0.01..0.05))
+      # Process the data
+      processed_data = "Processed: #{work}"
+      # Return the result with metadata about sub-works
+      result = {
+        processed_data: processed_data,
+        sub_works: [] # Will be populated by the supervisor
+      }
+      # Return a successful result
+      Fractor::WorkResult.new(
+        result: result,
+        work: work
+      )
+    end
+    def process_sub_work(work)
+      # Simulate processing time
+      sleep(rand(0.01..0.03))
+      # Process the data
+      processed_data = "Sub-processed: #{work.data} (depth: #{work.depth})"
+      # Return a successful result
+      Fractor::WorkResult.new(
+        result: {
+          processed_data: processed_data,
+          parent_id: work.parent_id
+        },
+        work: work
+      )
+    end
+  end
+  # Manager for the document processing system
+  class DocumentProcessor
+    attr_reader :documents, :worker_count, :result_tree
+    def initialize(documents, worker_count = 4)
+      @documents = documents
+      @worker_count = worker_count
+      @result_tree = {}
+    end
+    def process
+      # Create the supervisor
+      supervisor = Fractor::Supervisor.new(
+        worker_pools: [
+          { worker_class: MultiWorker, num_workers: @worker_count }
+        ]
+      )
+      # Create and add initial work items
+      initial_work_items = documents.map { |doc| InitialWork.new(doc, 0) }
+      supervisor.add_work_items(initial_work_items)
+      # Run the initial processing
+      supervisor.run
+      # Analyze results and create sub-works
+      sub_works = create_sub_works(supervisor.results)
+      # If we have sub-works, process them too
+      if sub_works.empty?
+        # Just build the result tree with the initial results
+        build_result_tree(supervisor.results, nil)
+      else
+        # Create a new supervisor for sub-works
+        sub_supervisor = Fractor::Supervisor.new(
+          worker_pools: [
+            { worker_class: MultiWorker, num_workers: @worker_count }
+          ]
+        )
+        # Create and add the sub-work items
+        sub_work_items = sub_works.map { |sw| SubWork.new(sw[:data], sw[:parent_id], sw[:depth]) }
+        sub_supervisor.add_work_items(sub_work_items)
+        sub_supervisor.run
+        # Build the final result tree
+        build_result_tree(supervisor.results, sub_supervisor.results)
+      end
+      # Return a formatted representation of the tree
+      format_tree
+    end
+    private
+    def create_sub_works(results_aggregator)
+      sub_works = []
+      # Go through the successful results
+      results_aggregator.results.each do |result|
+        work = result.work
+        # Only create sub-works if depth is less than 2
+        next unless work.depth < 2
+        # Create 3 sub-works for each initial work
+        3.times do |i|
+          sub_data = "#{work.data}-#{i}"
+          sub_works << {
+            data: sub_data,
+            parent_id: work.object_id,
+            depth: work.depth + 1
+          }
+        end
+        # Store the sub-work IDs in the result for reference
+        result.result[:sub_works] = sub_works.last(3).map { |sw| sw[:parent_id] }
+      end
+      sub_works
+    end
+    def build_result_tree(initial_results, sub_results)
+      # Process initial results to build the base tree
+      initial_results.results.each do |result|
+        @result_tree[result.work.object_id] = {
+          data: result.result[:processed_data],
+          children: []
+        }
+      end
+      # Process sub-results if any
+      return unless sub_results
+      sub_results.results.each do |result|
+        parent_id = result.result[:parent_id]
+        @result_tree[parent_id][:children] << result.result[:processed_data] if @result_tree[parent_id]
+      end
+    end
+    def format_tree
+      result = []
+      @result_tree.each_value do |node|
+        result << "Root: #{node[:data]}"
+        node[:children].each_with_index do |child, index|
+          result << "  ├─ Child #{index + 1}: #{child}"
+        end
+        result << ""
+      end
+      result.join("\n")
+    end
+  end
+end
+# Example usage: Document processing system
+if __FILE__ == $PROGRAM_NAME
+  puts "Starting producer-subscriber example: Document Processing System"
+  puts "This example simulates a document processing system where:"
+  puts "1. Initial documents are broken down into sections"
+  puts "2. Sections are further broken down into paragraphs"
+  puts "3. Paragraphs are processed individually"
+  puts "4. Results are assembled into a hierarchical structure"
+  puts
+  # Sample documents to process
+  documents = [
+    "Annual Report 2025",
+    "Technical Documentation",
+    "Research Paper"
+  ]
+  worker_count = 4
+  puts "Using #{worker_count} workers to process #{documents.size} documents"
+  puts
+  start_time = Time.now
+  processor = ProducerSubscriber::DocumentProcessor.new(documents, worker_count)
+  result = processor.process
+  end_time = Time.now
+  puts "Processing Results:"
+  puts "==================="
+  puts result
+  puts
+  puts "Processing completed in #{end_time - start_time} seconds"
+end

data/examples/scatter_gather/README.adoc ADDED Viewed

@@ -0,0 +1,43 @@
+= Scatter Gather Example
+== Overview
+This example demonstrates the Scatter-Gather pattern with Fractor. In this pattern, work is scattered (distributed) across multiple workers for parallel processing, and then the results are gathered and combined.
+== Key Concepts
+* *Scatter*: A primary task is broken down into multiple subtasks
+* *Parallel Processing*: Each subtask is processed concurrently by different workers
+* *Gather*: Results from all subtasks are collected and aggregated
+* *Final Processing*: The aggregated results are combined to form the final output
+== Example Explanation
+This example processes a large dataset by:
+1. Breaking it down into smaller chunks (scatter)
+2. Processing each chunk in parallel using Fractor workers
+3. Collecting the processed chunks (gather)
+4. Combining the results for the final output
+== Features Demonstrated
+* Effective workload distribution
+* Parallel processing for improved performance
+* Result aggregation from multiple workers
+* Error handling in a distributed computation context
+== Running the Example
+[source,sh]
+----
+ruby examples/scatter_gather/scatter_gather.rb
+----
+== Expected Output
+The example will show:
+* The input data being broken into chunks
+* Workers processing the chunks in parallel
+* Results being gathered from workers
+* The final aggregated results