fractor 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,216 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../lib/fractor"
4
+
5
+ module PipelineProcessing
6
+ # Work that carries both the data and stage information
7
+ class MediaWork < Fractor::Work
8
+ def initialize(data, stage = :resize, metadata = {})
9
+ super({
10
+ data: data,
11
+ stage: stage,
12
+ metadata: metadata
13
+ })
14
+ end
15
+
16
+ def data
17
+ input[:data]
18
+ end
19
+
20
+ def stage
21
+ input[:stage]
22
+ end
23
+
24
+ def metadata
25
+ input[:metadata]
26
+ end
27
+
28
+ def to_s
29
+ "MediaWork: stage=#{stage}, metadata=#{metadata}, data_size=#{begin
30
+ data.bytesize
31
+ rescue StandardError
32
+ "unknown"
33
+ end}"
34
+ end
35
+ end
36
+
37
+ # Worker for all pipeline stages
38
+ class PipelineWorker < Fractor::Worker
39
+ def process(work)
40
+ # Process based on the stage
41
+ result = case work.stage
42
+ when :resize then process_resize(work)
43
+ when :filter then process_filter(work)
44
+ when :compress then process_compress(work)
45
+ when :tag then process_tag(work)
46
+ else
47
+ return Fractor::WorkResult.new(
48
+ error: "Unknown stage: #{work.stage}",
49
+ work: work
50
+ )
51
+ end
52
+
53
+ # Determine the next stage
54
+ stages = %i[resize filter compress tag]
55
+ current_index = stages.index(work.stage)
56
+ next_stage = current_index < stages.size - 1 ? stages[current_index + 1] : nil
57
+
58
+ # Update metadata with processing information
59
+ updated_metadata = work.metadata.merge(
60
+ "#{work.stage}_completed" => true,
61
+ "#{work.stage}_time" => Time.now.to_s
62
+ )
63
+
64
+ # Return the result with next stage information
65
+ Fractor::WorkResult.new(
66
+ result: {
67
+ processed_data: result,
68
+ current_stage: work.stage,
69
+ next_stage: next_stage,
70
+ metadata: updated_metadata
71
+ },
72
+ work: work
73
+ )
74
+ end
75
+
76
+ private
77
+
78
+ def process_resize(work)
79
+ sleep(rand(0.01..0.05)) # Simulate processing time
80
+ "Resized image: #{work.data} (#{rand(800..1200)}x#{rand(600..900)})"
81
+ end
82
+
83
+ def process_filter(work)
84
+ sleep(rand(0.01..0.05)) # Simulate processing time
85
+ filters = %w[sepia grayscale vibrance contrast]
86
+ "Applied #{filters.sample} filter to: #{work.data}"
87
+ end
88
+
89
+ def process_compress(work)
90
+ sleep(rand(0.01..0.05)) # Simulate processing time
91
+ "Compressed image: #{work.data} (reduced by #{rand(30..70)}%)"
92
+ end
93
+
94
+ def process_tag(work)
95
+ sleep(rand(0.01..0.05)) # Simulate processing time
96
+ tags = %w[landscape portrait nature urban abstract]
97
+ selected_tags = tags.sample(rand(1..3))
98
+ "Tagged image: #{work.data} (tags: #{selected_tags.join(", ")})"
99
+ end
100
+ end
101
+
102
+ # Controller class that manages the pipeline flow
103
+ class MediaPipeline
104
+ attr_reader :results
105
+
106
+ def initialize(worker_count = 4)
107
+ @supervisor = Fractor::Supervisor.new(
108
+ worker_pools: [
109
+ { worker_class: PipelineWorker, num_workers: worker_count }
110
+ ]
111
+ )
112
+
113
+ # Register callback to handle pipeline stage transitions
114
+ @supervisor.results.on_new_result do |result|
115
+ next_stage = result.result[:next_stage]
116
+
117
+ if next_stage
118
+ # Create new work for the next stage
119
+ new_work = MediaWork.new(
120
+ result.result[:processed_data],
121
+ next_stage,
122
+ result.result[:metadata]
123
+ )
124
+ @supervisor.add_work_item(new_work)
125
+ end
126
+ end
127
+
128
+ @results = {
129
+ completed: [],
130
+ in_progress: []
131
+ }
132
+ end
133
+
134
+ def process_images(images)
135
+ # Create initial work items for the first stage (resize)
136
+ initial_work_items = images.map do |image|
137
+ MediaWork.new(
138
+ image,
139
+ :resize,
140
+ { original_filename: image, started_at: Time.now.to_s }
141
+ )
142
+ end
143
+
144
+ # Add the work items and run the pipeline
145
+ @supervisor.add_work_items(initial_work_items)
146
+ @supervisor.run
147
+
148
+ # Analyze results - collect completed ones (those that reached the final stage)
149
+ @supervisor.results.results.each do |result|
150
+ if result.result[:next_stage].nil?
151
+ @results[:completed] << result.result
152
+ else
153
+ @results[:in_progress] << result.result
154
+ end
155
+ end
156
+
157
+ # Return summary
158
+ {
159
+ total_images: images.size,
160
+ completed: @results[:completed].size,
161
+ in_progress: @results[:in_progress].size,
162
+ results: @results[:completed]
163
+ }
164
+ end
165
+ end
166
+ end
167
+
168
+ # Example usage
169
+ if __FILE__ == $PROGRAM_NAME
170
+ puts "Starting Pipeline Processing Example"
171
+ puts "====================================="
172
+ puts "This example demonstrates a media processing pipeline with multiple stages:"
173
+ puts "1. Resize - Adjusts image dimensions"
174
+ puts "2. Filter - Applies visual filters"
175
+ puts "3. Compress - Optimizes file size"
176
+ puts "4. Tag - Analyzes and adds metadata tags"
177
+ puts
178
+
179
+ # Simulate some images to process
180
+ images = [
181
+ "sunset.jpg",
182
+ "mountains.png",
183
+ "beach.jpg",
184
+ "city_skyline.jpg",
185
+ "forest.png"
186
+ ]
187
+
188
+ worker_count = 4
189
+ puts "Processing #{images.size} images with #{worker_count} workers..."
190
+ puts
191
+
192
+ start_time = Time.now
193
+ pipeline = PipelineProcessing::MediaPipeline.new(worker_count)
194
+ result = pipeline.process_images(images)
195
+ end_time = Time.now
196
+
197
+ puts "Pipeline Results:"
198
+ puts "----------------"
199
+ puts "Total images: #{result[:total_images]}"
200
+ puts "Completed: #{result[:completed]}"
201
+ puts "In progress: #{result[:in_progress]}"
202
+ puts
203
+ puts "Processed Images:"
204
+ result[:results].each_with_index do |image_result, index|
205
+ puts "Image #{index + 1}: #{image_result[:processed_data]}"
206
+ puts " Processing path:"
207
+ image_result[:metadata].each do |key, value|
208
+ next unless key.to_s.end_with?("_completed") || key.to_s.end_with?("_time")
209
+
210
+ puts " #{key}: #{value}"
211
+ end
212
+ puts
213
+ end
214
+
215
+ puts "Processing completed in #{end_time - start_time} seconds"
216
+ end
@@ -0,0 +1,92 @@
1
+ = Producer-Subscriber Example
2
+ :toc: macro
3
+ :toc-title: Table of Contents
4
+ :toclevels: 3
5
+
6
+ toc::[]
7
+
8
+ == Overview
9
+
10
+ The Producer-Subscriber example demonstrates how to use the Fractor framework to implement a multi-stage document processing system. This example showcases how initial work can generate additional sub-work items, creating a hierarchical processing pattern.
11
+
12
+ This example is particularly useful for:
13
+
14
+ * Implementing producer-consumer patterns in parallel systems
15
+ * Managing dependencies between work items
16
+ * Building hierarchical result structures from parallel processing
17
+
18
+ == Implementation Details
19
+
20
+ The example consists of the following key components:
21
+
22
+ === InitialWork
23
+
24
+ A subclass of `Fractor::Work` that represents a document to be processed. Each `InitialWork` instance contains:
25
+
26
+ * The document data
27
+ * A depth level (always 0 for initial work)
28
+
29
+ === SubWork
30
+
31
+ A subclass of `Fractor::Work` that represents a section of a document. Each `SubWork` instance contains:
32
+
33
+ * The section data
34
+ * A reference to its parent work via `parent_id`
35
+ * A depth level (typically depth + 1 from its parent)
36
+
37
+ === MultiWorker
38
+
39
+ A versatile worker that can process both `InitialWork` and `SubWork` instances:
40
+
41
+ * For `InitialWork`: Processes the document and identifies sections
42
+ * For `SubWork`: Processes individual sections
43
+
44
+ === DocumentProcessor
45
+
46
+ The main orchestration class that:
47
+
48
+ 1. Creates a supervisor for initial document processing
49
+ 2. Analyzes results to identify additional work (sections)
50
+ 3. Creates a second supervisor for processing sections
51
+ 4. Builds a hierarchical result tree from both stages
52
+
53
+ == Usage
54
+
55
+ [source,ruby]
56
+ ----
57
+ # Example document list
58
+ documents = [
59
+ "Annual Report 2025",
60
+ "Technical Documentation",
61
+ "Research Paper"
62
+ ]
63
+
64
+ # Process documents with 4 workers
65
+ processor = ProducerSubscriber::DocumentProcessor.new(documents, 4)
66
+ result = processor.process
67
+
68
+ # Print the hierarchical results
69
+ puts result
70
+ ----
71
+
72
+ == How It Works
73
+
74
+ 1. Initial documents are added to the processing queue
75
+ 2. Each document is processed in parallel by workers
76
+ 3. For each document, multiple sub-sections are identified (3 per document)
77
+ 4. These sub-sections are then processed in a second phase
78
+ 5. Results from both phases are combined into a hierarchical tree structure
79
+ 6. The final output presents documents with their processed sections
80
+
81
+ == Multi-stage Processing Pattern
82
+
83
+ This example demonstrates a powerful pattern for parallel processing:
84
+
85
+ 1. *First Stage Processing*: Process high-level items and identify additional work
86
+ 2. *Work Generation*: Create new work items based on first-stage results
87
+ 3. *Second Stage Processing*: Process the generated work items
88
+ 4. *Result Aggregation*: Combine results from both stages into a cohesive structure
89
+
90
+ == Object Identity and References
91
+
92
+ Note that this example uses `object_id` to maintain references between parent and child work items. This approach allows building a hierarchical result structure when processing is complete.
@@ -0,0 +1,256 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../lib/fractor"
4
+
5
+ module ProducerSubscriber
6
+ # Initial work that will generate sub-works
7
+ class InitialWork < Fractor::Work
8
+ def initialize(data, depth = 0)
9
+ super({
10
+ data: data,
11
+ depth: depth
12
+ })
13
+ end
14
+
15
+ def data
16
+ input[:data]
17
+ end
18
+
19
+ def depth
20
+ input[:depth]
21
+ end
22
+
23
+ def to_s
24
+ "InitialWork: data=#{data}, depth=#{depth}"
25
+ end
26
+ end
27
+
28
+ # Work that is generated from initial work
29
+ class SubWork < Fractor::Work
30
+ def initialize(data, parent_id = nil, depth = 0)
31
+ super({
32
+ data: data,
33
+ parent_id: parent_id,
34
+ depth: depth
35
+ })
36
+ end
37
+
38
+ def data
39
+ input[:data]
40
+ end
41
+
42
+ def parent_id
43
+ input[:parent_id]
44
+ end
45
+
46
+ def depth
47
+ input[:depth]
48
+ end
49
+
50
+ def to_s
51
+ "SubWork: data=#{data}, parent_id=#{parent_id}, depth=#{depth}"
52
+ end
53
+ end
54
+
55
+ # Worker that processes both types of work
56
+ class MultiWorker < Fractor::Worker
57
+ def process(work)
58
+ # Handle different work types based on class
59
+ if work.is_a?(InitialWork)
60
+ process_initial_work(work)
61
+ elsif work.is_a?(SubWork)
62
+ process_sub_work(work)
63
+ else
64
+ Fractor::WorkResult.new(
65
+ error: "Unknown work type: #{work.class}",
66
+ work: work
67
+ )
68
+ end
69
+ end
70
+
71
+ private
72
+
73
+ def process_initial_work(work)
74
+ # Simulate processing time
75
+ sleep(rand(0.01..0.05))
76
+
77
+ # Process the data
78
+ processed_data = "Processed: #{work}"
79
+
80
+ # Return the result with metadata about sub-works
81
+ result = {
82
+ processed_data: processed_data,
83
+ sub_works: [] # Will be populated by the supervisor
84
+ }
85
+
86
+ # Return a successful result
87
+ Fractor::WorkResult.new(
88
+ result: result,
89
+ work: work
90
+ )
91
+ end
92
+
93
+ def process_sub_work(work)
94
+ # Simulate processing time
95
+ sleep(rand(0.01..0.03))
96
+
97
+ # Process the data
98
+ processed_data = "Sub-processed: #{work.data} (depth: #{work.depth})"
99
+
100
+ # Return a successful result
101
+ Fractor::WorkResult.new(
102
+ result: {
103
+ processed_data: processed_data,
104
+ parent_id: work.parent_id
105
+ },
106
+ work: work
107
+ )
108
+ end
109
+ end
110
+
111
+ # Manager for the document processing system
112
+ class DocumentProcessor
113
+ attr_reader :documents, :worker_count, :result_tree
114
+
115
+ def initialize(documents, worker_count = 4)
116
+ @documents = documents
117
+ @worker_count = worker_count
118
+ @result_tree = {}
119
+ end
120
+
121
+ def process
122
+ # Create the supervisor
123
+ supervisor = Fractor::Supervisor.new(
124
+ worker_pools: [
125
+ { worker_class: MultiWorker, num_workers: @worker_count }
126
+ ]
127
+ )
128
+
129
+ # Create and add initial work items
130
+ initial_work_items = documents.map { |doc| InitialWork.new(doc, 0) }
131
+ supervisor.add_work_items(initial_work_items)
132
+
133
+ # Run the initial processing
134
+ supervisor.run
135
+
136
+ # Analyze results and create sub-works
137
+ sub_works = create_sub_works(supervisor.results)
138
+
139
+ # If we have sub-works, process them too
140
+ if sub_works.empty?
141
+ # Just build the result tree with the initial results
142
+ build_result_tree(supervisor.results, nil)
143
+ else
144
+ # Create a new supervisor for sub-works
145
+ sub_supervisor = Fractor::Supervisor.new(
146
+ worker_pools: [
147
+ { worker_class: MultiWorker, num_workers: @worker_count }
148
+ ]
149
+ )
150
+
151
+ # Create and add the sub-work items
152
+ sub_work_items = sub_works.map { |sw| SubWork.new(sw[:data], sw[:parent_id], sw[:depth]) }
153
+ sub_supervisor.add_work_items(sub_work_items)
154
+ sub_supervisor.run
155
+
156
+ # Build the final result tree
157
+ build_result_tree(supervisor.results, sub_supervisor.results)
158
+ end
159
+
160
+ # Return a formatted representation of the tree
161
+ format_tree
162
+ end
163
+
164
+ private
165
+
166
+ def create_sub_works(results_aggregator)
167
+ sub_works = []
168
+
169
+ # Go through the successful results
170
+ results_aggregator.results.each do |result|
171
+ work = result.work
172
+
173
+ # Only create sub-works if depth is less than 2
174
+ next unless work.depth < 2
175
+
176
+ # Create 3 sub-works for each initial work
177
+ 3.times do |i|
178
+ sub_data = "#{work.data}-#{i}"
179
+ sub_works << {
180
+ data: sub_data,
181
+ parent_id: work.object_id,
182
+ depth: work.depth + 1
183
+ }
184
+ end
185
+
186
+ # Store the sub-work IDs in the result for reference
187
+ result.result[:sub_works] = sub_works.last(3).map { |sw| sw[:parent_id] }
188
+ end
189
+
190
+ sub_works
191
+ end
192
+
193
+ def build_result_tree(initial_results, sub_results)
194
+ # Process initial results to build the base tree
195
+ initial_results.results.each do |result|
196
+ @result_tree[result.work.object_id] = {
197
+ data: result.result[:processed_data],
198
+ children: []
199
+ }
200
+ end
201
+
202
+ # Process sub-results if any
203
+ return unless sub_results
204
+
205
+ sub_results.results.each do |result|
206
+ parent_id = result.result[:parent_id]
207
+ @result_tree[parent_id][:children] << result.result[:processed_data] if @result_tree[parent_id]
208
+ end
209
+ end
210
+
211
+ def format_tree
212
+ result = []
213
+ @result_tree.each_value do |node|
214
+ result << "Root: #{node[:data]}"
215
+ node[:children].each_with_index do |child, index|
216
+ result << " ├─ Child #{index + 1}: #{child}"
217
+ end
218
+ result << ""
219
+ end
220
+ result.join("\n")
221
+ end
222
+ end
223
+ end
224
+
225
+ # Example usage: Document processing system
226
+ if __FILE__ == $PROGRAM_NAME
227
+ puts "Starting producer-subscriber example: Document Processing System"
228
+ puts "This example simulates a document processing system where:"
229
+ puts "1. Initial documents are broken down into sections"
230
+ puts "2. Sections are further broken down into paragraphs"
231
+ puts "3. Paragraphs are processed individually"
232
+ puts "4. Results are assembled into a hierarchical structure"
233
+ puts
234
+
235
+ # Sample documents to process
236
+ documents = [
237
+ "Annual Report 2025",
238
+ "Technical Documentation",
239
+ "Research Paper"
240
+ ]
241
+
242
+ worker_count = 4
243
+ puts "Using #{worker_count} workers to process #{documents.size} documents"
244
+ puts
245
+
246
+ start_time = Time.now
247
+ processor = ProducerSubscriber::DocumentProcessor.new(documents, worker_count)
248
+ result = processor.process
249
+ end_time = Time.now
250
+
251
+ puts "Processing Results:"
252
+ puts "==================="
253
+ puts result
254
+ puts
255
+ puts "Processing completed in #{end_time - start_time} seconds"
256
+ end
@@ -0,0 +1,43 @@
1
+ = Scatter Gather Example
2
+
3
+ == Overview
4
+
5
+ This example demonstrates the Scatter-Gather pattern with Fractor. In this pattern, work is scattered (distributed) across multiple workers for parallel processing, and then the results are gathered and combined.
6
+
7
+ == Key Concepts
8
+
9
+ * *Scatter*: A primary task is broken down into multiple subtasks
10
+ * *Parallel Processing*: Each subtask is processed concurrently by different workers
11
+ * *Gather*: Results from all subtasks are collected and aggregated
12
+ * *Final Processing*: The aggregated results are combined to form the final output
13
+
14
+ == Example Explanation
15
+
16
+ This example processes a large dataset by:
17
+
18
+ 1. Breaking it down into smaller chunks (scatter)
19
+ 2. Processing each chunk in parallel using Fractor workers
20
+ 3. Collecting the processed chunks (gather)
21
+ 4. Combining the results for the final output
22
+
23
+ == Features Demonstrated
24
+
25
+ * Effective workload distribution
26
+ * Parallel processing for improved performance
27
+ * Result aggregation from multiple workers
28
+ * Error handling in a distributed computation context
29
+
30
+ == Running the Example
31
+
32
+ [source,sh]
33
+ ----
34
+ ruby examples/scatter_gather/scatter_gather.rb
35
+ ----
36
+
37
+ == Expected Output
38
+
39
+ The example will show:
40
+ * The input data being broken into chunks
41
+ * Workers processing the chunks in parallel
42
+ * Results being gathered from workers
43
+ * The final aggregated results