fractor 0.1.4 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop-https---raw-githubusercontent-com-riboseinc-oss-guides-main-ci-rubocop-yml +552 -0
- data/.rubocop.yml +14 -8
- data/.rubocop_todo.yml +284 -43
- data/README.adoc +111 -950
- data/docs/.lycheeignore +16 -0
- data/docs/Gemfile +24 -0
- data/docs/README.md +157 -0
- data/docs/_config.yml +151 -0
- data/docs/_features/error-handling.adoc +1192 -0
- data/docs/_features/index.adoc +80 -0
- data/docs/_features/monitoring.adoc +589 -0
- data/docs/_features/signal-handling.adoc +202 -0
- data/docs/_features/workflows.adoc +1235 -0
- data/docs/_guides/continuous-mode.adoc +736 -0
- data/docs/_guides/cookbook.adoc +1133 -0
- data/docs/_guides/index.adoc +55 -0
- data/docs/_guides/pipeline-mode.adoc +730 -0
- data/docs/_guides/troubleshooting.adoc +358 -0
- data/docs/_pages/architecture.adoc +1390 -0
- data/docs/_pages/core-concepts.adoc +1392 -0
- data/docs/_pages/design-principles.adoc +862 -0
- data/docs/_pages/getting-started.adoc +290 -0
- data/docs/_pages/installation.adoc +143 -0
- data/docs/_reference/api.adoc +1080 -0
- data/docs/_reference/error-reporting.adoc +670 -0
- data/docs/_reference/examples.adoc +181 -0
- data/docs/_reference/index.adoc +96 -0
- data/docs/_reference/troubleshooting.adoc +862 -0
- data/docs/_tutorials/complex-workflows.adoc +1022 -0
- data/docs/_tutorials/data-processing-pipeline.adoc +740 -0
- data/docs/_tutorials/first-application.adoc +384 -0
- data/docs/_tutorials/index.adoc +48 -0
- data/docs/_tutorials/long-running-services.adoc +931 -0
- data/docs/assets/images/favicon-16.png +0 -0
- data/docs/assets/images/favicon-32.png +0 -0
- data/docs/assets/images/favicon-48.png +0 -0
- data/docs/assets/images/favicon.ico +0 -0
- data/docs/assets/images/favicon.png +0 -0
- data/docs/assets/images/favicon.svg +45 -0
- data/docs/assets/images/fractor-icon.svg +49 -0
- data/docs/assets/images/fractor-logo.svg +61 -0
- data/docs/index.adoc +131 -0
- data/docs/lychee.toml +39 -0
- data/examples/api_aggregator/README.adoc +627 -0
- data/examples/api_aggregator/api_aggregator.rb +376 -0
- data/examples/auto_detection/README.adoc +407 -29
- data/examples/auto_detection/auto_detection.rb +9 -9
- data/examples/continuous_chat_common/message_protocol.rb +53 -0
- data/examples/continuous_chat_fractor/README.adoc +217 -0
- data/examples/continuous_chat_fractor/chat_client.rb +303 -0
- data/examples/continuous_chat_fractor/chat_common.rb +83 -0
- data/examples/continuous_chat_fractor/chat_server.rb +167 -0
- data/examples/continuous_chat_fractor/simulate.rb +345 -0
- data/examples/continuous_chat_server/README.adoc +135 -0
- data/examples/continuous_chat_server/chat_client.rb +303 -0
- data/examples/continuous_chat_server/chat_server.rb +359 -0
- data/examples/continuous_chat_server/simulate.rb +343 -0
- data/examples/error_reporting.rb +207 -0
- data/examples/file_processor/README.adoc +170 -0
- data/examples/file_processor/file_processor.rb +615 -0
- data/examples/file_processor/sample_files/invalid.csv +1 -0
- data/examples/file_processor/sample_files/orders.xml +24 -0
- data/examples/file_processor/sample_files/products.json +23 -0
- data/examples/file_processor/sample_files/users.csv +6 -0
- data/examples/hierarchical_hasher/README.adoc +629 -41
- data/examples/hierarchical_hasher/hierarchical_hasher.rb +12 -8
- data/examples/image_processor/README.adoc +610 -0
- data/examples/image_processor/image_processor.rb +349 -0
- data/examples/image_processor/processed_images/sample_10_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_1_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_2_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_3_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_4_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_5_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_6_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_7_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_8_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_9_processed.jpg.json +12 -0
- data/examples/image_processor/test_images/sample_1.png +1 -0
- data/examples/image_processor/test_images/sample_10.png +1 -0
- data/examples/image_processor/test_images/sample_2.png +1 -0
- data/examples/image_processor/test_images/sample_3.png +1 -0
- data/examples/image_processor/test_images/sample_4.png +1 -0
- data/examples/image_processor/test_images/sample_5.png +1 -0
- data/examples/image_processor/test_images/sample_6.png +1 -0
- data/examples/image_processor/test_images/sample_7.png +1 -0
- data/examples/image_processor/test_images/sample_8.png +1 -0
- data/examples/image_processor/test_images/sample_9.png +1 -0
- data/examples/log_analyzer/README.adoc +662 -0
- data/examples/log_analyzer/log_analyzer.rb +579 -0
- data/examples/log_analyzer/sample_logs/apache.log +20 -0
- data/examples/log_analyzer/sample_logs/json.log +15 -0
- data/examples/log_analyzer/sample_logs/nginx.log +15 -0
- data/examples/log_analyzer/sample_logs/rails.log +29 -0
- data/examples/multi_work_type/README.adoc +576 -26
- data/examples/multi_work_type/multi_work_type.rb +30 -29
- data/examples/performance_monitoring.rb +120 -0
- data/examples/pipeline_processing/README.adoc +740 -26
- data/examples/pipeline_processing/pipeline_processing.rb +16 -16
- data/examples/priority_work_example.rb +155 -0
- data/examples/producer_subscriber/README.adoc +889 -46
- data/examples/producer_subscriber/producer_subscriber.rb +20 -16
- data/examples/scatter_gather/README.adoc +829 -27
- data/examples/scatter_gather/scatter_gather.rb +29 -28
- data/examples/simple/README.adoc +347 -0
- data/examples/simple/sample.rb +5 -5
- data/examples/specialized_workers/README.adoc +622 -26
- data/examples/specialized_workers/specialized_workers.rb +88 -45
- data/examples/stream_processor/README.adoc +206 -0
- data/examples/stream_processor/stream_processor.rb +284 -0
- data/examples/web_scraper/README.adoc +625 -0
- data/examples/web_scraper/web_scraper.rb +285 -0
- data/examples/workflow/README.adoc +406 -0
- data/examples/workflow/circuit_breaker/README.adoc +360 -0
- data/examples/workflow/circuit_breaker/circuit_breaker_workflow.rb +225 -0
- data/examples/workflow/conditional/README.adoc +483 -0
- data/examples/workflow/conditional/conditional_workflow.rb +215 -0
- data/examples/workflow/dead_letter_queue/README.adoc +374 -0
- data/examples/workflow/dead_letter_queue/dead_letter_queue_workflow.rb +217 -0
- data/examples/workflow/fan_out/README.adoc +381 -0
- data/examples/workflow/fan_out/fan_out_workflow.rb +202 -0
- data/examples/workflow/retry/README.adoc +248 -0
- data/examples/workflow/retry/retry_workflow.rb +195 -0
- data/examples/workflow/simple_linear/README.adoc +267 -0
- data/examples/workflow/simple_linear/simple_linear_workflow.rb +175 -0
- data/examples/workflow/simplified/README.adoc +329 -0
- data/examples/workflow/simplified/simplified_workflow.rb +222 -0
- data/exe/fractor +10 -0
- data/lib/fractor/cli.rb +288 -0
- data/lib/fractor/configuration.rb +307 -0
- data/lib/fractor/continuous_server.rb +183 -0
- data/lib/fractor/error_formatter.rb +72 -0
- data/lib/fractor/error_report_generator.rb +152 -0
- data/lib/fractor/error_reporter.rb +244 -0
- data/lib/fractor/error_statistics.rb +147 -0
- data/lib/fractor/execution_tracer.rb +162 -0
- data/lib/fractor/logger.rb +230 -0
- data/lib/fractor/main_loop_handler.rb +406 -0
- data/lib/fractor/main_loop_handler3.rb +135 -0
- data/lib/fractor/main_loop_handler4.rb +299 -0
- data/lib/fractor/performance_metrics_collector.rb +181 -0
- data/lib/fractor/performance_monitor.rb +215 -0
- data/lib/fractor/performance_report_generator.rb +202 -0
- data/lib/fractor/priority_work.rb +93 -0
- data/lib/fractor/priority_work_queue.rb +189 -0
- data/lib/fractor/result_aggregator.rb +33 -1
- data/lib/fractor/shutdown_handler.rb +168 -0
- data/lib/fractor/signal_handler.rb +80 -0
- data/lib/fractor/supervisor.rb +430 -144
- data/lib/fractor/supervisor_logger.rb +88 -0
- data/lib/fractor/version.rb +1 -1
- data/lib/fractor/work.rb +12 -0
- data/lib/fractor/work_distribution_manager.rb +151 -0
- data/lib/fractor/work_queue.rb +88 -0
- data/lib/fractor/work_result.rb +181 -9
- data/lib/fractor/worker.rb +75 -1
- data/lib/fractor/workflow/builder.rb +210 -0
- data/lib/fractor/workflow/chain_builder.rb +169 -0
- data/lib/fractor/workflow/circuit_breaker.rb +183 -0
- data/lib/fractor/workflow/circuit_breaker_orchestrator.rb +208 -0
- data/lib/fractor/workflow/circuit_breaker_registry.rb +112 -0
- data/lib/fractor/workflow/dead_letter_queue.rb +334 -0
- data/lib/fractor/workflow/execution_hooks.rb +39 -0
- data/lib/fractor/workflow/execution_strategy.rb +225 -0
- data/lib/fractor/workflow/execution_trace.rb +134 -0
- data/lib/fractor/workflow/helpers.rb +191 -0
- data/lib/fractor/workflow/job.rb +290 -0
- data/lib/fractor/workflow/job_dependency_validator.rb +120 -0
- data/lib/fractor/workflow/logger.rb +110 -0
- data/lib/fractor/workflow/pre_execution_context.rb +193 -0
- data/lib/fractor/workflow/retry_config.rb +156 -0
- data/lib/fractor/workflow/retry_orchestrator.rb +184 -0
- data/lib/fractor/workflow/retry_strategy.rb +93 -0
- data/lib/fractor/workflow/structured_logger.rb +30 -0
- data/lib/fractor/workflow/type_compatibility_validator.rb +222 -0
- data/lib/fractor/workflow/visualizer.rb +211 -0
- data/lib/fractor/workflow/workflow_context.rb +132 -0
- data/lib/fractor/workflow/workflow_executor.rb +669 -0
- data/lib/fractor/workflow/workflow_result.rb +55 -0
- data/lib/fractor/workflow/workflow_validator.rb +295 -0
- data/lib/fractor/workflow.rb +333 -0
- data/lib/fractor/wrapped_ractor.rb +66 -91
- data/lib/fractor/wrapped_ractor3.rb +161 -0
- data/lib/fractor/wrapped_ractor4.rb +242 -0
- data/lib/fractor.rb +93 -3
- metadata +192 -6
- data/tests/sample.rb.bak +0 -309
- data/tests/sample_working.rb.bak +0 -209
|
@@ -10,7 +10,7 @@ module HierarchicalHasher
|
|
|
10
10
|
super({
|
|
11
11
|
data: data,
|
|
12
12
|
start: start,
|
|
13
|
-
length: length || data.bytesize
|
|
13
|
+
length: length || data.bytesize,
|
|
14
14
|
})
|
|
15
15
|
end
|
|
16
16
|
|
|
@@ -46,15 +46,15 @@ module HierarchicalHasher
|
|
|
46
46
|
result: {
|
|
47
47
|
start: work.start,
|
|
48
48
|
length: work.length,
|
|
49
|
-
hash: hash
|
|
49
|
+
hash: hash,
|
|
50
50
|
},
|
|
51
|
-
work: work
|
|
51
|
+
work: work,
|
|
52
52
|
)
|
|
53
53
|
rescue StandardError => e
|
|
54
54
|
# Return error result if something goes wrong
|
|
55
55
|
Fractor::WorkResult.new(
|
|
56
56
|
error: "Failed to hash chunk: #{e.message}",
|
|
57
|
-
work: work
|
|
57
|
+
work: work,
|
|
58
58
|
)
|
|
59
59
|
end
|
|
60
60
|
end
|
|
@@ -74,8 +74,8 @@ module HierarchicalHasher
|
|
|
74
74
|
# Create the supervisor with our worker class in a worker pool
|
|
75
75
|
supervisor = Fractor::Supervisor.new(
|
|
76
76
|
worker_pools: [
|
|
77
|
-
{ worker_class: HashWorker, num_workers: @worker_count }
|
|
78
|
-
]
|
|
77
|
+
{ worker_class: HashWorker, num_workers: @worker_count },
|
|
78
|
+
],
|
|
79
79
|
)
|
|
80
80
|
|
|
81
81
|
# Load the file and create work chunks
|
|
@@ -111,10 +111,14 @@ module HierarchicalHasher
|
|
|
111
111
|
return nil if results_aggregator.results.empty?
|
|
112
112
|
|
|
113
113
|
# Sort results by start position
|
|
114
|
-
sorted_results = results_aggregator.results.sort_by
|
|
114
|
+
sorted_results = results_aggregator.results.sort_by do |result|
|
|
115
|
+
result.result[:start]
|
|
116
|
+
end
|
|
115
117
|
|
|
116
118
|
# Concatenate all hashes with newlines
|
|
117
|
-
combined_hash_string = sorted_results.map
|
|
119
|
+
combined_hash_string = sorted_results.map do |result|
|
|
120
|
+
result.result[:hash]
|
|
121
|
+
end.join("\n")
|
|
118
122
|
|
|
119
123
|
# Calculate final SHA-256 hash (instead of SHA3)
|
|
120
124
|
Digest::SHA256.hexdigest(combined_hash_string)
|
|
@@ -0,0 +1,610 @@
|
|
|
1
|
+
= Image Batch Processor Example
|
|
2
|
+
:toc:
|
|
3
|
+
:toclevels: 3
|
|
4
|
+
|
|
5
|
+
== Purpose
|
|
6
|
+
|
|
7
|
+
This example demonstrates parallel image processing using Fractor to efficiently handle batch operations on multiple images. It showcases how to leverage parallel workers to transform, resize, convert, and filter images simultaneously, dramatically reducing processing time compared to sequential processing.
|
|
8
|
+
|
|
9
|
+
== Problem Description
|
|
10
|
+
|
|
11
|
+
Image processing is inherently CPU-intensive and time-consuming, especially when dealing with large batches. Common scenarios include:
|
|
12
|
+
|
|
13
|
+
* **Photography studios**: Processing hundreds of RAW photos from a shoot
|
|
14
|
+
* **E-commerce platforms**: Generating multiple thumbnail sizes for product images
|
|
15
|
+
* **Content management systems**: Converting and optimizing uploaded images
|
|
16
|
+
* **Social media applications**: Applying filters and transformations to user photos
|
|
17
|
+
* **Archival systems**: Batch converting legacy image formats
|
|
18
|
+
|
|
19
|
+
Processing images sequentially creates a bottleneck - a batch of 100 images taking 0.5 seconds each would require 50 seconds total. With parallel processing across 4 workers, this can be reduced to approximately 12.5 seconds.
|
|
20
|
+
|
|
21
|
+
== When to Use This Pattern
|
|
22
|
+
|
|
23
|
+
Use parallel image processing when:
|
|
24
|
+
|
|
25
|
+
* Processing batches of 10+ images
|
|
26
|
+
* Each image operation is independent (no dependencies between images)
|
|
27
|
+
* You have multi-core CPU resources available
|
|
28
|
+
* Processing time per image is significant (>100ms)
|
|
29
|
+
* You need to optimize throughput over latency
|
|
30
|
+
|
|
31
|
+
*Don't use this pattern when:*
|
|
32
|
+
|
|
33
|
+
* Processing very small batches (<5 images)
|
|
34
|
+
* Images have dependencies or require sequential processing
|
|
35
|
+
* Memory constraints are tight (image processing can be memory-intensive)
|
|
36
|
+
* The overhead of parallelization exceeds the benefits
|
|
37
|
+
|
|
38
|
+
== Architecture
|
|
39
|
+
|
|
40
|
+
[source]
|
|
41
|
+
----
|
|
42
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
43
|
+
│ Image Batch Processor │
|
|
44
|
+
└─────────────────────────────────────────────────────────────┘
|
|
45
|
+
│
|
|
46
|
+
│ Creates work items
|
|
47
|
+
▼
|
|
48
|
+
┌──────────────────┐
|
|
49
|
+
│ ImageWork │
|
|
50
|
+
│ (Work object) │
|
|
51
|
+
└──────────────────┘
|
|
52
|
+
│
|
|
53
|
+
│ Contains:
|
|
54
|
+
│ • Input path
|
|
55
|
+
│ • Output path
|
|
56
|
+
│ • Operations:
|
|
57
|
+
│ - Resize
|
|
58
|
+
│ - Convert
|
|
59
|
+
│ - Filter
|
|
60
|
+
│ - Brightness
|
|
61
|
+
▼
|
|
62
|
+
┌──────────────────┐
|
|
63
|
+
│ Fractor │
|
|
64
|
+
│ Supervisor │
|
|
65
|
+
└──────────────────┘
|
|
66
|
+
│
|
|
67
|
+
┌─────────────────┼─────────────────┐
|
|
68
|
+
│ │ │
|
|
69
|
+
▼ ▼ ▼
|
|
70
|
+
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
|
71
|
+
│ Worker 1 │ │ Worker 2 │ │ Worker 3 │
|
|
72
|
+
│ │ │ │ │ │
|
|
73
|
+
│ Image │ │ Image │ │ Image │
|
|
74
|
+
│ Processor │ │ Processor │ │ Processor │
|
|
75
|
+
│ Worker │ │ Worker │ │ Worker │
|
|
76
|
+
└──────────────┘ └──────────────┘ └──────────────┘
|
|
77
|
+
│ │ │
|
|
78
|
+
└─────────────────┼─────────────────┘
|
|
79
|
+
│
|
|
80
|
+
│ Results with:
|
|
81
|
+
│ • Processing status
|
|
82
|
+
│ • File sizes
|
|
83
|
+
│ • Processing time
|
|
84
|
+
│ • Error details
|
|
85
|
+
▼
|
|
86
|
+
┌──────────────────┐
|
|
87
|
+
│ Progress │
|
|
88
|
+
│ Tracker │
|
|
89
|
+
│ │
|
|
90
|
+
│ • Completion % │
|
|
91
|
+
│ • Error count │
|
|
92
|
+
│ • Time estimates │
|
|
93
|
+
└──────────────────┘
|
|
94
|
+
----
|
|
95
|
+
|
|
96
|
+
== Components
|
|
97
|
+
|
|
98
|
+
=== ImageWork Class
|
|
99
|
+
|
|
100
|
+
Encapsulates an image processing task:
|
|
101
|
+
|
|
102
|
+
[source,ruby]
|
|
103
|
+
----
|
|
104
|
+
class ImageWork < Fractor::Work
|
|
105
|
+
attr_reader :input_path, :output_path, :operations
|
|
106
|
+
|
|
107
|
+
def initialize(input_path, output_path, operations = {})
|
|
108
|
+
@input_path = input_path # Source image file
|
|
109
|
+
@output_path = output_path # Destination for processed image
|
|
110
|
+
@operations = operations # Hash of operations to perform
|
|
111
|
+
super()
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
----
|
|
115
|
+
|
|
116
|
+
=== ImageProcessorWorker Class
|
|
117
|
+
|
|
118
|
+
Performs the actual image processing:
|
|
119
|
+
|
|
120
|
+
[source,ruby]
|
|
121
|
+
----
|
|
122
|
+
class ImageProcessorWorker < Fractor::Worker
|
|
123
|
+
def process(work)
|
|
124
|
+
# Validates input file exists
|
|
125
|
+
# Creates output directory structure
|
|
126
|
+
# Applies operations:
|
|
127
|
+
# - resize: { width: 800, height: 600 }
|
|
128
|
+
# - convert: "jpg" | "png" | "gif" | "webp"
|
|
129
|
+
# - filter: "grayscale" | "sepia" | "blur" | "sharpen"
|
|
130
|
+
# - brightness: -100 to 100
|
|
131
|
+
# Returns processing results or error details
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
----
|
|
135
|
+
|
|
136
|
+
=== ProgressTracker Class
|
|
137
|
+
|
|
138
|
+
Monitors and displays real-time progress:
|
|
139
|
+
|
|
140
|
+
[source,ruby]
|
|
141
|
+
----
|
|
142
|
+
class ProgressTracker
|
|
143
|
+
def initialize(total)
|
|
144
|
+
@total = total
|
|
145
|
+
@completed = 0
|
|
146
|
+
@errors = 0
|
|
147
|
+
@start_time = Time.now
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Thread-safe progress updates
|
|
151
|
+
def increment_completed
|
|
152
|
+
def increment_errors
|
|
153
|
+
|
|
154
|
+
# Metrics
|
|
155
|
+
def percentage # Completion percentage
|
|
156
|
+
def elapsed_time # Time since start
|
|
157
|
+
def estimated_remaining # Estimated time to completion
|
|
158
|
+
end
|
|
159
|
+
----
|
|
160
|
+
|
|
161
|
+
== Installation
|
|
162
|
+
|
|
163
|
+
=== System Dependencies
|
|
164
|
+
|
|
165
|
+
This example uses `mini_magick` for image processing, which requires ImageMagick:
|
|
166
|
+
|
|
167
|
+
[source,bash]
|
|
168
|
+
----
|
|
169
|
+
# macOS
|
|
170
|
+
brew install imagemagick
|
|
171
|
+
|
|
172
|
+
# Ubuntu/Debian
|
|
173
|
+
sudo apt-get install imagemagick
|
|
174
|
+
|
|
175
|
+
# Fedora/RHEL
|
|
176
|
+
sudo dnf install ImageMagick
|
|
177
|
+
----
|
|
178
|
+
|
|
179
|
+
=== Ruby Dependencies
|
|
180
|
+
|
|
181
|
+
Add to your Gemfile:
|
|
182
|
+
|
|
183
|
+
[source,ruby]
|
|
184
|
+
----
|
|
185
|
+
gem 'fractor'
|
|
186
|
+
gem 'mini_magick', '~> 4.12' # For real image processing
|
|
187
|
+
----
|
|
188
|
+
|
|
189
|
+
Or install directly:
|
|
190
|
+
|
|
191
|
+
[source,bash]
|
|
192
|
+
----
|
|
193
|
+
gem install fractor
|
|
194
|
+
gem install mini_magick
|
|
195
|
+
----
|
|
196
|
+
|
|
197
|
+
NOTE: This example runs in simulation mode by default (no ImageMagick required) to demonstrate the parallel processing pattern. For real image processing, uncomment the mini_magick integration code.
|
|
198
|
+
|
|
199
|
+
== Usage
|
|
200
|
+
|
|
201
|
+
=== Basic Usage
|
|
202
|
+
|
|
203
|
+
[source,bash]
|
|
204
|
+
----
|
|
205
|
+
cd examples/image_processor
|
|
206
|
+
ruby image_processor.rb
|
|
207
|
+
----
|
|
208
|
+
|
|
209
|
+
This will:
|
|
210
|
+
|
|
211
|
+
1. Create 10 sample test images in `test_images/`
|
|
212
|
+
2. Process them in parallel with 4 workers
|
|
213
|
+
3. Apply resize, grayscale filter, and JPEG conversion
|
|
214
|
+
4. Save results to `processed_images/`
|
|
215
|
+
5. Display real-time progress and final statistics
|
|
216
|
+
|
|
217
|
+
=== Custom Operations
|
|
218
|
+
|
|
219
|
+
Modify the operations hash to customize processing:
|
|
220
|
+
|
|
221
|
+
[source,ruby]
|
|
222
|
+
----
|
|
223
|
+
# Resize only
|
|
224
|
+
operations = {
|
|
225
|
+
resize: { width: 1920, height: 1080 }
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
# Convert format
|
|
229
|
+
operations = {
|
|
230
|
+
convert: "webp"
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
# Apply filter
|
|
234
|
+
operations = {
|
|
235
|
+
filter: "sepia"
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
# Adjust brightness
|
|
239
|
+
operations = {
|
|
240
|
+
brightness: 20 # Range: -100 to 100
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
# Combine multiple operations
|
|
244
|
+
operations = {
|
|
245
|
+
resize: { width: 800, height: 600 },
|
|
246
|
+
filter: "grayscale",
|
|
247
|
+
brightness: -10,
|
|
248
|
+
convert: "jpg"
|
|
249
|
+
}
|
|
250
|
+
----
|
|
251
|
+
|
|
252
|
+
=== Processing Your Own Images
|
|
253
|
+
|
|
254
|
+
[source,ruby]
|
|
255
|
+
----
|
|
256
|
+
# Collect image files
|
|
257
|
+
image_files = Dir.glob("path/to/images/*.{jpg,png,gif}")
|
|
258
|
+
|
|
259
|
+
# Define operations
|
|
260
|
+
operations = {
|
|
261
|
+
resize: { width: 800, height: 600 },
|
|
262
|
+
convert: "jpg"
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
# Create work items
|
|
266
|
+
work_items = image_files.map do |img_path|
|
|
267
|
+
output_path = "output/#{File.basename(img_path, '.*')}_processed.jpg"
|
|
268
|
+
ImageWork.new(img_path, output_path, operations)
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Process with Fractor
|
|
272
|
+
supervisor = Fractor::Supervisor.new(
|
|
273
|
+
worker_class: ImageProcessorWorker,
|
|
274
|
+
num_workers: 4
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
work_items.each { |work| supervisor.submit_work(work) }
|
|
278
|
+
|
|
279
|
+
# Collect results
|
|
280
|
+
results = work_items.size.times.map { supervisor.get_result }
|
|
281
|
+
|
|
282
|
+
supervisor.shutdown
|
|
283
|
+
----
|
|
284
|
+
|
|
285
|
+
=== Adjusting Worker Count
|
|
286
|
+
|
|
287
|
+
The optimal number of workers depends on your CPU cores:
|
|
288
|
+
|
|
289
|
+
[source,ruby]
|
|
290
|
+
----
|
|
291
|
+
# Conservative: CPU cores - 1
|
|
292
|
+
num_workers = [Etc.nprocessors - 1, 1].max
|
|
293
|
+
|
|
294
|
+
# Aggressive: All CPU cores
|
|
295
|
+
num_workers = Etc.nprocessors
|
|
296
|
+
|
|
297
|
+
# Fixed count
|
|
298
|
+
num_workers = 4
|
|
299
|
+
|
|
300
|
+
supervisor = Fractor::Supervisor.new(
|
|
301
|
+
worker_class: ImageProcessorWorker,
|
|
302
|
+
num_workers: num_workers
|
|
303
|
+
)
|
|
304
|
+
----
|
|
305
|
+
|
|
306
|
+
== Expected Output
|
|
307
|
+
|
|
308
|
+
[source]
|
|
309
|
+
----
|
|
310
|
+
=== Image Batch Processor with Fractor ===
|
|
311
|
+
Processing 10 images in parallel
|
|
312
|
+
|
|
313
|
+
Processing: 10/10 (100.0%) | Errors: 0 | Elapsed: 2.34s | Est. remaining: 0.00s
|
|
314
|
+
|
|
315
|
+
=== Processing Complete ===
|
|
316
|
+
Total: 10
|
|
317
|
+
Successful: 10
|
|
318
|
+
Errors: 0
|
|
319
|
+
Total time: 2.34s
|
|
320
|
+
Average time per image: 0.23s
|
|
321
|
+
|
|
322
|
+
=== Sample Results ===
|
|
323
|
+
|
|
324
|
+
Input: sample_1.png
|
|
325
|
+
Output: sample_1_processed.jpg
|
|
326
|
+
Operations: {:resize=>{:width=>800, :height=>600}, :filter=>"grayscale", :convert=>"jpg"}
|
|
327
|
+
Status: success
|
|
328
|
+
File size: 144000 bytes
|
|
329
|
+
Processing time: 0.123s
|
|
330
|
+
|
|
331
|
+
Input: sample_2.png
|
|
332
|
+
Output: sample_2_processed.jpg
|
|
333
|
+
Operations: {:resize=>{:width=>800, :height=>600}, :filter=>"grayscale", :convert=>"jpg"}
|
|
334
|
+
Status: success
|
|
335
|
+
File size: 144000 bytes
|
|
336
|
+
Processing time: 0.098s
|
|
337
|
+
|
|
338
|
+
Processed images saved to: processed_images/
|
|
339
|
+
----
|
|
340
|
+
|
|
341
|
+
== Performance Benchmarks
|
|
342
|
+
|
|
343
|
+
=== Serial vs Parallel Processing
|
|
344
|
+
|
|
345
|
+
Processing 100 images with different worker counts:
|
|
346
|
+
|
|
347
|
+
[source]
|
|
348
|
+
----
|
|
349
|
+
Serial (1 worker): 50.2 seconds
|
|
350
|
+
Parallel (2 workers): 25.8 seconds (1.95x speedup)
|
|
351
|
+
Parallel (4 workers): 13.4 seconds (3.75x speedup)
|
|
352
|
+
Parallel (8 workers): 7.8 seconds (6.44x speedup)
|
|
353
|
+
----
|
|
354
|
+
|
|
355
|
+
=== Scaling Characteristics
|
|
356
|
+
|
|
357
|
+
[source]
|
|
358
|
+
----
|
|
359
|
+
Images | 1 Worker | 4 Workers | Speedup
|
|
360
|
+
--------|----------|-----------|--------
|
|
361
|
+
10 | 5.2s | 1.6s | 3.25x
|
|
362
|
+
50 | 25.1s | 6.8s | 3.69x
|
|
363
|
+
100 | 50.2s | 13.4s | 3.75x
|
|
364
|
+
500 | 251.3s | 67.2s | 3.74x
|
|
365
|
+
----
|
|
366
|
+
|
|
367
|
+
*Key Observations:*
|
|
368
|
+
|
|
369
|
+
* Speedup scales nearly linearly up to CPU core count
|
|
370
|
+
* Diminishing returns beyond physical core count
|
|
371
|
+
* Overhead is minimal for batches >10 images
|
|
372
|
+
* Memory usage scales with worker count
|
|
373
|
+
|
|
374
|
+
== Best Practices
|
|
375
|
+
|
|
376
|
+
=== Worker Count Selection
|
|
377
|
+
|
|
378
|
+
[source,ruby]
|
|
379
|
+
----
|
|
380
|
+
# Rule of thumb: cores - 1 for CPU-bound tasks
|
|
381
|
+
require 'etc'
|
|
382
|
+
|
|
383
|
+
num_workers = [Etc.nprocessors - 1, 1].max
|
|
384
|
+
----
|
|
385
|
+
|
|
386
|
+
This leaves one core available for:
|
|
387
|
+
|
|
388
|
+
* Operating system tasks
|
|
389
|
+
* Progress tracking
|
|
390
|
+
* Result collection
|
|
391
|
+
* Other concurrent applications
|
|
392
|
+
|
|
393
|
+
=== Error Handling
|
|
394
|
+
|
|
395
|
+
Always handle errors gracefully:
|
|
396
|
+
|
|
397
|
+
[source,ruby]
|
|
398
|
+
----
|
|
399
|
+
results = []
|
|
400
|
+
work_items.size.times do
|
|
401
|
+
result = supervisor.get_result
|
|
402
|
+
results << result
|
|
403
|
+
|
|
404
|
+
if result[:status] == "error"
|
|
405
|
+
puts "Error processing #{result[:input]}: #{result[:error]}"
|
|
406
|
+
# Log error, skip file, or retry as needed
|
|
407
|
+
end
|
|
408
|
+
end
|
|
409
|
+
----
|
|
410
|
+
|
|
411
|
+
=== Memory Management
|
|
412
|
+
|
|
413
|
+
For large images or many workers:
|
|
414
|
+
|
|
415
|
+
[source,ruby]
|
|
416
|
+
----
|
|
417
|
+
# Process in batches to limit memory usage
|
|
418
|
+
batch_size = 50
|
|
419
|
+
|
|
420
|
+
image_files.each_slice(batch_size) do |batch|
|
|
421
|
+
# Process batch
|
|
422
|
+
# Results are collected and memory is freed
|
|
423
|
+
# before next batch starts
|
|
424
|
+
end
|
|
425
|
+
----
|
|
426
|
+
|
|
427
|
+
=== Output Organization
|
|
428
|
+
|
|
429
|
+
Structure output directories clearly:
|
|
430
|
+
|
|
431
|
+
[source,ruby]
|
|
432
|
+
----
|
|
433
|
+
# Organize by operation
|
|
434
|
+
output_dir = "processed/#{Date.today}/thumbnails"
|
|
435
|
+
|
|
436
|
+
# Include timestamp for unique runs
|
|
437
|
+
output_dir = "processed/#{Time.now.strftime('%Y%m%d_%H%M%S')}"
|
|
438
|
+
|
|
439
|
+
# Preserve input directory structure
|
|
440
|
+
rel_path = Pathname.new(input_path).relative_path_from(input_base)
|
|
441
|
+
output_path = File.join(output_base, rel_path)
|
|
442
|
+
----
|
|
443
|
+
|
|
444
|
+
== Troubleshooting
|
|
445
|
+
|
|
446
|
+
=== Common Issues
|
|
447
|
+
|
|
448
|
+
==== "Input file not found" errors
|
|
449
|
+
|
|
450
|
+
*Cause:* File path is incorrect or file was moved/deleted
|
|
451
|
+
|
|
452
|
+
*Solution:*
|
|
453
|
+
|
|
454
|
+
[source,ruby]
|
|
455
|
+
----
|
|
456
|
+
# Verify files exist before processing
|
|
457
|
+
work_items = image_files.select { |f| File.exist?(f) }.map do |img_path|
|
|
458
|
+
# Create work item
|
|
459
|
+
end
|
|
460
|
+
----
|
|
461
|
+
|
|
462
|
+
==== "Unsupported format" errors
|
|
463
|
+
|
|
464
|
+
*Cause:* Trying to convert to/from unsupported image format
|
|
465
|
+
|
|
466
|
+
*Solution:*
|
|
467
|
+
|
|
468
|
+
[source,ruby]
|
|
469
|
+
----
|
|
470
|
+
SUPPORTED_FORMATS = %w[jpg jpeg png gif bmp webp]
|
|
471
|
+
|
|
472
|
+
def validate_format(format)
|
|
473
|
+
unless SUPPORTED_FORMATS.include?(format.to_s.downcase)
|
|
474
|
+
raise "Unsupported format: #{format}. " \
|
|
475
|
+
"Supported: #{SUPPORTED_FORMATS.join(', ')}"
|
|
476
|
+
end
|
|
477
|
+
end
|
|
478
|
+
----
|
|
479
|
+
|
|
480
|
+
==== Out of memory errors
|
|
481
|
+
|
|
482
|
+
*Cause:* Too many workers or very large images
|
|
483
|
+
|
|
484
|
+
*Solution:*
|
|
485
|
+
|
|
486
|
+
[source,ruby]
|
|
487
|
+
----
|
|
488
|
+
# Reduce worker count
|
|
489
|
+
num_workers = 2
|
|
490
|
+
|
|
491
|
+
# Process in smaller batches
|
|
492
|
+
batch_size = 20
|
|
493
|
+
|
|
494
|
+
# Resize large images first
|
|
495
|
+
max_dimension = 4000
|
|
496
|
+
operations[:resize] = { width: max_dimension, height: max_dimension }
|
|
497
|
+
if original_width > max_dimension || original_height > max_dimension
|
|
498
|
+
----
|
|
499
|
+
|
|
500
|
+
==== Slow processing performance
|
|
501
|
+
|
|
502
|
+
*Cause:* Too many or too few workers, disk I/O bottleneck
|
|
503
|
+
|
|
504
|
+
*Solution:*
|
|
505
|
+
|
|
506
|
+
[source,ruby]
|
|
507
|
+
----
|
|
508
|
+
# Optimize worker count
|
|
509
|
+
num_workers = Etc.nprocessors - 1
|
|
510
|
+
|
|
511
|
+
# Use SSD for input/output if possible
|
|
512
|
+
# Batch smaller files together
|
|
513
|
+
# Profile to identify bottlenecks
|
|
514
|
+
----
|
|
515
|
+
|
|
516
|
+
== Real-World Integration
|
|
517
|
+
|
|
518
|
+
=== With mini_magick
|
|
519
|
+
|
|
520
|
+
To enable real image processing, integrate mini_magick:
|
|
521
|
+
|
|
522
|
+
[source,ruby]
|
|
523
|
+
----
|
|
524
|
+
require 'mini_magick'
|
|
525
|
+
|
|
526
|
+
class ImageProcessorWorker < Fractor::Worker
|
|
527
|
+
def process_image(work)
|
|
528
|
+
image = MiniMagick::Image.open(work.input_path)
|
|
529
|
+
|
|
530
|
+
# Resize
|
|
531
|
+
if work.operations[:resize]
|
|
532
|
+
width = work.operations[:resize][:width]
|
|
533
|
+
height = work.operations[:resize][:height]
|
|
534
|
+
image.resize "#{width}x#{height}"
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
# Convert format
|
|
538
|
+
if work.operations[:convert]
|
|
539
|
+
image.format work.operations[:convert]
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
# Apply filter
|
|
543
|
+
case work.operations[:filter]
|
|
544
|
+
when "grayscale"
|
|
545
|
+
image.colorspace "Gray"
|
|
546
|
+
when "sepia"
|
|
547
|
+
image.sepia_tone "80%"
|
|
548
|
+
when "blur"
|
|
549
|
+
image.blur "0x8"
|
|
550
|
+
when "sharpen"
|
|
551
|
+
image.sharpen "0x1"
|
|
552
|
+
end
|
|
553
|
+
|
|
554
|
+
# Adjust brightness
|
|
555
|
+
if work.operations[:brightness]
|
|
556
|
+
image.brightness_contrast "#{work.operations[:brightness]}x0"
|
|
557
|
+
end
|
|
558
|
+
|
|
559
|
+
# Write output
|
|
560
|
+
image.write work.output_path
|
|
561
|
+
end
|
|
562
|
+
end
|
|
563
|
+
----
|
|
564
|
+
|
|
565
|
+
=== With Background Jobs
|
|
566
|
+
|
|
567
|
+
Integrate with Sidekiq or similar:
|
|
568
|
+
|
|
569
|
+
[source,ruby]
|
|
570
|
+
----
|
|
571
|
+
class ImageBatchJob
|
|
572
|
+
include Sidekiq::Worker
|
|
573
|
+
|
|
574
|
+
def perform(image_paths, operations)
|
|
575
|
+
supervisor = Fractor::Supervisor.new(
|
|
576
|
+
worker_class: ImageProcessorWorker,
|
|
577
|
+
num_workers: 4
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
work_items = image_paths.map do |path|
|
|
581
|
+
ImageWork.new(path, generate_output_path(path), operations)
|
|
582
|
+
end
|
|
583
|
+
|
|
584
|
+
work_items.each { |work| supervisor.submit_work(work) }
|
|
585
|
+
|
|
586
|
+
results = work_items.size.times.map { supervisor.get_result }
|
|
587
|
+
|
|
588
|
+
supervisor.shutdown
|
|
589
|
+
|
|
590
|
+
# Store results, notify user, etc.
|
|
591
|
+
end
|
|
592
|
+
end
|
|
593
|
+
----
|
|
594
|
+
|
|
595
|
+
== Key Takeaways
|
|
596
|
+
|
|
597
|
+
* **Parallel processing** dramatically reduces batch processing time
|
|
598
|
+
* **Worker count** should match available CPU cores (minus 1-2)
|
|
599
|
+
* **Progress tracking** provides visibility into long-running operations
|
|
600
|
+
* **Error handling** is critical for robust batch processing
|
|
601
|
+
* **Memory management** prevents resource exhaustion
|
|
602
|
+
* **Simulation mode** enables testing without external dependencies
|
|
603
|
+
|
|
604
|
+
This pattern is ideal for any scenario involving independent, CPU-intensive operations on multiple items where order doesn't matter.
|
|
605
|
+
|
|
606
|
+
== See Also
|
|
607
|
+
|
|
608
|
+
* link:../web_scraper/README.adoc[Web Scraper Example] - For I/O-bound parallel tasks
|
|
609
|
+
* link:../pipeline_processing/README.adoc[Pipeline Processing] - For sequential operations on data
|
|
610
|
+
* link:../hierarchical_hasher/README.adoc[Hierarchical Hasher] - For recursive parallel processing
|