fractor 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +227 -102
- data/README.adoc +113 -1940
- data/docs/.lycheeignore +16 -0
- data/docs/Gemfile +24 -0
- data/docs/README.md +157 -0
- data/docs/_config.yml +151 -0
- data/docs/_features/error-handling.adoc +1192 -0
- data/docs/_features/index.adoc +80 -0
- data/docs/_features/monitoring.adoc +589 -0
- data/docs/_features/signal-handling.adoc +202 -0
- data/docs/_features/workflows.adoc +1235 -0
- data/docs/_guides/continuous-mode.adoc +736 -0
- data/docs/_guides/cookbook.adoc +1133 -0
- data/docs/_guides/index.adoc +55 -0
- data/docs/_guides/pipeline-mode.adoc +730 -0
- data/docs/_guides/troubleshooting.adoc +358 -0
- data/docs/_pages/architecture.adoc +1390 -0
- data/docs/_pages/core-concepts.adoc +1392 -0
- data/docs/_pages/design-principles.adoc +862 -0
- data/docs/_pages/getting-started.adoc +290 -0
- data/docs/_pages/installation.adoc +143 -0
- data/docs/_reference/api.adoc +1080 -0
- data/docs/_reference/error-reporting.adoc +670 -0
- data/docs/_reference/examples.adoc +181 -0
- data/docs/_reference/index.adoc +96 -0
- data/docs/_reference/troubleshooting.adoc +862 -0
- data/docs/_tutorials/complex-workflows.adoc +1022 -0
- data/docs/_tutorials/data-processing-pipeline.adoc +740 -0
- data/docs/_tutorials/first-application.adoc +384 -0
- data/docs/_tutorials/index.adoc +48 -0
- data/docs/_tutorials/long-running-services.adoc +931 -0
- data/docs/assets/images/favicon-16.png +0 -0
- data/docs/assets/images/favicon-32.png +0 -0
- data/docs/assets/images/favicon-48.png +0 -0
- data/docs/assets/images/favicon.ico +0 -0
- data/docs/assets/images/favicon.png +0 -0
- data/docs/assets/images/favicon.svg +45 -0
- data/docs/assets/images/fractor-icon.svg +49 -0
- data/docs/assets/images/fractor-logo.svg +61 -0
- data/docs/index.adoc +131 -0
- data/docs/lychee.toml +39 -0
- data/examples/api_aggregator/README.adoc +627 -0
- data/examples/api_aggregator/api_aggregator.rb +376 -0
- data/examples/auto_detection/README.adoc +407 -29
- data/examples/continuous_chat_common/message_protocol.rb +1 -1
- data/examples/error_reporting.rb +207 -0
- data/examples/file_processor/README.adoc +170 -0
- data/examples/file_processor/file_processor.rb +615 -0
- data/examples/file_processor/sample_files/invalid.csv +1 -0
- data/examples/file_processor/sample_files/orders.xml +24 -0
- data/examples/file_processor/sample_files/products.json +23 -0
- data/examples/file_processor/sample_files/users.csv +6 -0
- data/examples/hierarchical_hasher/README.adoc +629 -41
- data/examples/image_processor/README.adoc +610 -0
- data/examples/image_processor/image_processor.rb +349 -0
- data/examples/image_processor/processed_images/sample_10_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_1_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_2_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_3_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_4_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_5_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_6_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_7_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_8_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_9_processed.jpg.json +12 -0
- data/examples/image_processor/test_images/sample_1.png +1 -0
- data/examples/image_processor/test_images/sample_10.png +1 -0
- data/examples/image_processor/test_images/sample_2.png +1 -0
- data/examples/image_processor/test_images/sample_3.png +1 -0
- data/examples/image_processor/test_images/sample_4.png +1 -0
- data/examples/image_processor/test_images/sample_5.png +1 -0
- data/examples/image_processor/test_images/sample_6.png +1 -0
- data/examples/image_processor/test_images/sample_7.png +1 -0
- data/examples/image_processor/test_images/sample_8.png +1 -0
- data/examples/image_processor/test_images/sample_9.png +1 -0
- data/examples/log_analyzer/README.adoc +662 -0
- data/examples/log_analyzer/log_analyzer.rb +579 -0
- data/examples/log_analyzer/sample_logs/apache.log +20 -0
- data/examples/log_analyzer/sample_logs/json.log +15 -0
- data/examples/log_analyzer/sample_logs/nginx.log +15 -0
- data/examples/log_analyzer/sample_logs/rails.log +29 -0
- data/examples/multi_work_type/README.adoc +576 -26
- data/examples/performance_monitoring.rb +120 -0
- data/examples/pipeline_processing/README.adoc +740 -26
- data/examples/pipeline_processing/pipeline_processing.rb +2 -2
- data/examples/priority_work_example.rb +155 -0
- data/examples/producer_subscriber/README.adoc +889 -46
- data/examples/scatter_gather/README.adoc +829 -27
- data/examples/simple/README.adoc +347 -0
- data/examples/specialized_workers/README.adoc +622 -26
- data/examples/specialized_workers/specialized_workers.rb +44 -8
- data/examples/stream_processor/README.adoc +206 -0
- data/examples/stream_processor/stream_processor.rb +284 -0
- data/examples/web_scraper/README.adoc +625 -0
- data/examples/web_scraper/web_scraper.rb +285 -0
- data/examples/workflow/README.adoc +406 -0
- data/examples/workflow/circuit_breaker/README.adoc +360 -0
- data/examples/workflow/circuit_breaker/circuit_breaker_workflow.rb +225 -0
- data/examples/workflow/conditional/README.adoc +483 -0
- data/examples/workflow/conditional/conditional_workflow.rb +215 -0
- data/examples/workflow/dead_letter_queue/README.adoc +374 -0
- data/examples/workflow/dead_letter_queue/dead_letter_queue_workflow.rb +217 -0
- data/examples/workflow/fan_out/README.adoc +381 -0
- data/examples/workflow/fan_out/fan_out_workflow.rb +202 -0
- data/examples/workflow/retry/README.adoc +248 -0
- data/examples/workflow/retry/retry_workflow.rb +195 -0
- data/examples/workflow/simple_linear/README.adoc +267 -0
- data/examples/workflow/simple_linear/simple_linear_workflow.rb +175 -0
- data/examples/workflow/simplified/README.adoc +329 -0
- data/examples/workflow/simplified/simplified_workflow.rb +222 -0
- data/exe/fractor +10 -0
- data/lib/fractor/cli.rb +288 -0
- data/lib/fractor/configuration.rb +307 -0
- data/lib/fractor/continuous_server.rb +60 -65
- data/lib/fractor/error_formatter.rb +72 -0
- data/lib/fractor/error_report_generator.rb +152 -0
- data/lib/fractor/error_reporter.rb +244 -0
- data/lib/fractor/error_statistics.rb +147 -0
- data/lib/fractor/execution_tracer.rb +162 -0
- data/lib/fractor/logger.rb +230 -0
- data/lib/fractor/main_loop_handler.rb +406 -0
- data/lib/fractor/main_loop_handler3.rb +135 -0
- data/lib/fractor/main_loop_handler4.rb +299 -0
- data/lib/fractor/performance_metrics_collector.rb +181 -0
- data/lib/fractor/performance_monitor.rb +215 -0
- data/lib/fractor/performance_report_generator.rb +202 -0
- data/lib/fractor/priority_work.rb +93 -0
- data/lib/fractor/priority_work_queue.rb +189 -0
- data/lib/fractor/result_aggregator.rb +32 -0
- data/lib/fractor/shutdown_handler.rb +168 -0
- data/lib/fractor/signal_handler.rb +80 -0
- data/lib/fractor/supervisor.rb +382 -269
- data/lib/fractor/supervisor_logger.rb +88 -0
- data/lib/fractor/version.rb +1 -1
- data/lib/fractor/work.rb +12 -0
- data/lib/fractor/work_distribution_manager.rb +151 -0
- data/lib/fractor/work_queue.rb +20 -0
- data/lib/fractor/work_result.rb +181 -9
- data/lib/fractor/worker.rb +73 -0
- data/lib/fractor/workflow/builder.rb +210 -0
- data/lib/fractor/workflow/chain_builder.rb +169 -0
- data/lib/fractor/workflow/circuit_breaker.rb +183 -0
- data/lib/fractor/workflow/circuit_breaker_orchestrator.rb +208 -0
- data/lib/fractor/workflow/circuit_breaker_registry.rb +112 -0
- data/lib/fractor/workflow/dead_letter_queue.rb +334 -0
- data/lib/fractor/workflow/execution_hooks.rb +39 -0
- data/lib/fractor/workflow/execution_strategy.rb +225 -0
- data/lib/fractor/workflow/execution_trace.rb +134 -0
- data/lib/fractor/workflow/helpers.rb +191 -0
- data/lib/fractor/workflow/job.rb +290 -0
- data/lib/fractor/workflow/job_dependency_validator.rb +120 -0
- data/lib/fractor/workflow/logger.rb +110 -0
- data/lib/fractor/workflow/pre_execution_context.rb +193 -0
- data/lib/fractor/workflow/retry_config.rb +156 -0
- data/lib/fractor/workflow/retry_orchestrator.rb +184 -0
- data/lib/fractor/workflow/retry_strategy.rb +93 -0
- data/lib/fractor/workflow/structured_logger.rb +30 -0
- data/lib/fractor/workflow/type_compatibility_validator.rb +222 -0
- data/lib/fractor/workflow/visualizer.rb +211 -0
- data/lib/fractor/workflow/workflow_context.rb +132 -0
- data/lib/fractor/workflow/workflow_executor.rb +669 -0
- data/lib/fractor/workflow/workflow_result.rb +55 -0
- data/lib/fractor/workflow/workflow_validator.rb +295 -0
- data/lib/fractor/workflow.rb +333 -0
- data/lib/fractor/wrapped_ractor.rb +66 -101
- data/lib/fractor/wrapped_ractor3.rb +161 -0
- data/lib/fractor/wrapped_ractor4.rb +242 -0
- data/lib/fractor.rb +92 -4
- metadata +179 -6
- data/tests/sample.rb.bak +0 -309
- data/tests/sample_working.rb.bak +0 -209
|
@@ -0,0 +1,862 @@
|
|
|
1
|
+
---
|
|
2
|
+
layout: default
|
|
3
|
+
title: Troubleshooting
|
|
4
|
+
nav_order: 5
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
== Troubleshooting Guide
|
|
8
|
+
|
|
9
|
+
=== Overview
|
|
10
|
+
|
|
11
|
+
This guide provides solutions to common issues and errors you may encounter when using Fractor. Each entry includes the problem description, common causes, and step-by-step solutions.
|
|
12
|
+
|
|
13
|
+
=== Quick Diagnosis
|
|
14
|
+
|
|
15
|
+
[cols="1,3"]
|
|
16
|
+
|===
|
|
17
|
+
|Symptom |Likely Cause
|
|
18
|
+
|
|
19
|
+
|Workers not processing work
|
|
20
|
+
|<<workers-idle,Workers idle>> or <<supervisor-not-running,Supervisor not running>>
|
|
21
|
+
|
|
22
|
+
|High memory usage
|
|
23
|
+
|<<memory-issues,Memory leak>> or <<large-work-items,Work items too large>>
|
|
24
|
+
|
|
25
|
+
|Slow processing
|
|
26
|
+
|<<bottlenecks,Worker bottleneck>> or <<io-bound,I/O bound operations>>
|
|
27
|
+
|
|
28
|
+
|Errors not being caught
|
|
29
|
+
|<<error-handling,Improper error handling>> in workers
|
|
30
|
+
|
|
31
|
+
|Ractor errors
|
|
32
|
+
|<<ractor-shareability,Shareable object issues>>
|
|
33
|
+
|
|
34
|
+
|Process hangs
|
|
35
|
+
|<<deadlock,Deadlock>> or <<infinite-loop,Infinite work generation>>
|
|
36
|
+
|
|
37
|
+
|Signal handling not working
|
|
38
|
+
|<<signals-not-working,Signal handler conflicts>>
|
|
39
|
+
|===
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
=== Common Issues
|
|
44
|
+
|
|
45
|
+
[[workers-idle]]
|
|
46
|
+
==== Workers Stay Idle / No Work Processing
|
|
47
|
+
|
|
48
|
+
**Symptoms:**
|
|
49
|
+
|
|
50
|
+
* Supervisor starts but workers don't process work
|
|
51
|
+
* Queue has items but nothing happens
|
|
52
|
+
* No results returned
|
|
53
|
+
|
|
54
|
+
**Common Causes:**
|
|
55
|
+
|
|
56
|
+
1. Work not added before `run()` in pipeline mode
|
|
57
|
+
2. Work source not registered in continuous mode
|
|
58
|
+
3. Work items not shareable between Ractors
|
|
59
|
+
|
|
60
|
+
**Solutions:**
|
|
61
|
+
|
|
62
|
+
===== Pipeline Mode
|
|
63
|
+
|
|
64
|
+
[source,ruby]
|
|
65
|
+
----
|
|
66
|
+
# ✗ Wrong: run() called before adding work
|
|
67
|
+
supervisor.run
|
|
68
|
+
supervisor.add_work_items(work_items) # Too late!
|
|
69
|
+
|
|
70
|
+
# ✓ Correct: add work before run()
|
|
71
|
+
supervisor.add_work_items(work_items)
|
|
72
|
+
supervisor.run
|
|
73
|
+
----
|
|
74
|
+
|
|
75
|
+
===== Continuous Mode
|
|
76
|
+
|
|
77
|
+
[source,ruby]
|
|
78
|
+
----
|
|
79
|
+
# ✗ Wrong: No work source registered
|
|
80
|
+
supervisor = Fractor::Supervisor.new(
|
|
81
|
+
worker_pools: [{ worker_class: MyWorker }],
|
|
82
|
+
continuous_mode: true
|
|
83
|
+
)
|
|
84
|
+
supervisor.run # Nothing will happen
|
|
85
|
+
|
|
86
|
+
# ✓ Correct: Register work source
|
|
87
|
+
supervisor.register_work_source do
|
|
88
|
+
# Return work items
|
|
89
|
+
get_next_work
|
|
90
|
+
end
|
|
91
|
+
supervisor.run
|
|
92
|
+
|
|
93
|
+
# Or use WorkQueue + ContinuousServer
|
|
94
|
+
work_queue = Fractor::WorkQueue.new
|
|
95
|
+
server = Fractor::ContinuousServer.new(
|
|
96
|
+
worker_pools: [{ worker_class: MyWorker }],
|
|
97
|
+
work_queue: work_queue
|
|
98
|
+
)
|
|
99
|
+
----
|
|
100
|
+
|
|
101
|
+
===== Shareable Objects
|
|
102
|
+
|
|
103
|
+
[source,ruby]
|
|
104
|
+
----
|
|
105
|
+
# ✗ Wrong: Mutable objects aren't automatically shareable
|
|
106
|
+
work = Fractor::Work.new({ mutable: [1, 2, 3] })
|
|
107
|
+
|
|
108
|
+
# ✓ Correct: Freeze objects
|
|
109
|
+
work = Fractor::Work.new({ mutable: [1, 2, 3].freeze }.freeze)
|
|
110
|
+
|
|
111
|
+
# ✓ Better: Use immutable data
|
|
112
|
+
work = Fractor::Work.new({ value: 42, name: "item" })
|
|
113
|
+
----
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
[[supervisor-not-running]]
|
|
118
|
+
==== Supervisor Doesn't Start
|
|
119
|
+
|
|
120
|
+
**Symptoms:**
|
|
121
|
+
|
|
122
|
+
* `run()` returns immediately
|
|
123
|
+
* No workers created
|
|
124
|
+
* Silent failure
|
|
125
|
+
|
|
126
|
+
**Common Causes:**
|
|
127
|
+
|
|
128
|
+
1. No workers configured
|
|
129
|
+
2. Invalid worker class
|
|
130
|
+
3. Exception during initialization
|
|
131
|
+
|
|
132
|
+
**Solutions:**
|
|
133
|
+
|
|
134
|
+
[source,ruby]
|
|
135
|
+
----
|
|
136
|
+
# ✗ Wrong: Empty worker pools
|
|
137
|
+
supervisor = Fractor::Supervisor.new(worker_pools: [])
|
|
138
|
+
|
|
139
|
+
# ✓ Correct: At least one worker pool
|
|
140
|
+
supervisor = Fractor::Supervisor.new(
|
|
141
|
+
worker_pools: [{ worker_class: MyWorker, num_workers: 4 }]
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Check for initialization errors
|
|
145
|
+
begin
|
|
146
|
+
supervisor = Fractor::Supervisor.new(
|
|
147
|
+
worker_pools: [{ worker_class: MyWorker }]
|
|
148
|
+
)
|
|
149
|
+
rescue => e
|
|
150
|
+
puts "Failed to create supervisor: #{e.message}"
|
|
151
|
+
puts e.backtrace
|
|
152
|
+
end
|
|
153
|
+
----
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
[[memory-issues]]
|
|
158
|
+
==== High Memory Usage / Memory Leaks
|
|
159
|
+
|
|
160
|
+
**Symptoms:**
|
|
161
|
+
|
|
162
|
+
* Memory grows continuously
|
|
163
|
+
* Out of memory errors
|
|
164
|
+
* Slow performance over time
|
|
165
|
+
|
|
166
|
+
**Common Causes:**
|
|
167
|
+
|
|
168
|
+
1. Large result accumulation
|
|
169
|
+
2. Not clearing processed results
|
|
170
|
+
3. Infinite work generation
|
|
171
|
+
4. Large work item payloads
|
|
172
|
+
|
|
173
|
+
**Solutions:**
|
|
174
|
+
|
|
175
|
+
===== Clear Results Periodically
|
|
176
|
+
|
|
177
|
+
[source,ruby]
|
|
178
|
+
----
|
|
179
|
+
# ✗ Wrong: Results accumulate indefinitely
|
|
180
|
+
loop do
|
|
181
|
+
supervisor.add_work(work)
|
|
182
|
+
supervisor.run
|
|
183
|
+
# Results never cleared
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# ✓ Correct: Process and clear results
|
|
187
|
+
loop do
|
|
188
|
+
supervisor.add_work(work)
|
|
189
|
+
supervisor.run
|
|
190
|
+
|
|
191
|
+
# Process results
|
|
192
|
+
supervisor.results.results.each { |r| process_result(r) }
|
|
193
|
+
|
|
194
|
+
# Clear for next batch
|
|
195
|
+
supervisor.results.results.clear
|
|
196
|
+
supervisor.results.errors.clear
|
|
197
|
+
end
|
|
198
|
+
----
|
|
199
|
+
|
|
200
|
+
===== Use Streaming for Large Datasets
|
|
201
|
+
|
|
202
|
+
[source,ruby]
|
|
203
|
+
----
|
|
204
|
+
# ✗ Wrong: Load entire dataset into memory
|
|
205
|
+
all_data = File.readlines('huge_file.txt')
|
|
206
|
+
work_items = all_data.map { |line| MyWork.new(line) }
|
|
207
|
+
|
|
208
|
+
# ✓ Correct: Process in batches
|
|
209
|
+
File.foreach('huge_file.txt').each_slice(1000) do |batch|
|
|
210
|
+
work_items = batch.map { |line| MyWork.new(line) }
|
|
211
|
+
supervisor.add_work_items(work_items)
|
|
212
|
+
supervisor.run
|
|
213
|
+
|
|
214
|
+
# Process and clear results
|
|
215
|
+
handle_results(supervisor.results)
|
|
216
|
+
supervisor.results.results.clear
|
|
217
|
+
supervisor.results.errors.clear
|
|
218
|
+
end
|
|
219
|
+
----
|
|
220
|
+
|
|
221
|
+
===== Monitor Memory
|
|
222
|
+
|
|
223
|
+
[source,ruby]
|
|
224
|
+
----
|
|
225
|
+
require 'fractor/performance_monitor'
|
|
226
|
+
|
|
227
|
+
monitor = Fractor::PerformanceMonitor.new(supervisor)
|
|
228
|
+
monitor.start
|
|
229
|
+
|
|
230
|
+
# Check memory periodically
|
|
231
|
+
snapshot = monitor.snapshot
|
|
232
|
+
if snapshot[:memory_mb] > 500
|
|
233
|
+
puts "Warning: High memory usage (#{snapshot[:memory_mb]} MB)"
|
|
234
|
+
# Take action: clear caches, reduce batch size, etc.
|
|
235
|
+
end
|
|
236
|
+
----
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
[[large-work-items]]
|
|
241
|
+
==== Work Items Too Large
|
|
242
|
+
|
|
243
|
+
**Symptoms:**
|
|
244
|
+
|
|
245
|
+
* Slow work distribution
|
|
246
|
+
* High memory usage
|
|
247
|
+
* Ractor creation failures
|
|
248
|
+
|
|
249
|
+
**Common Causes:**
|
|
250
|
+
|
|
251
|
+
1. Including large data in work items
|
|
252
|
+
2. Embedding entire files/datasets
|
|
253
|
+
3. Not using references
|
|
254
|
+
|
|
255
|
+
**Solutions:**
|
|
256
|
+
|
|
257
|
+
[source,ruby]
|
|
258
|
+
----
|
|
259
|
+
# ✗ Wrong: Embed large data
|
|
260
|
+
file_content = File.read('10GB_file.dat')
|
|
261
|
+
work = Fractor::Work.new(content: file_content)
|
|
262
|
+
|
|
263
|
+
# ✓ Correct: Use file paths or references
|
|
264
|
+
work = Fractor::Work.new(filepath: '10GB_file.dat')
|
|
265
|
+
|
|
266
|
+
# Worker reads file when needed
|
|
267
|
+
class FileWorker < Fractor::Worker
|
|
268
|
+
def process(work)
|
|
269
|
+
# Read file in worker context
|
|
270
|
+
content = File.read(work.input[:filepath])
|
|
271
|
+
result = process_content(content)
|
|
272
|
+
Fractor::WorkResult.new(result: result, work: work)
|
|
273
|
+
end
|
|
274
|
+
end
|
|
275
|
+
----
|
|
276
|
+
|
|
277
|
+
---
|
|
278
|
+
|
|
279
|
+
[[bottlenecks]]
|
|
280
|
+
==== Performance Bottlenecks
|
|
281
|
+
|
|
282
|
+
**Symptoms:**
|
|
283
|
+
|
|
284
|
+
* Slow processing despite multiple workers
|
|
285
|
+
* Low CPU utilization
|
|
286
|
+
* Queue always full or always empty
|
|
287
|
+
|
|
288
|
+
**Diagnosis:**
|
|
289
|
+
|
|
290
|
+
[source,ruby]
|
|
291
|
+
----
|
|
292
|
+
monitor = Fractor::PerformanceMonitor.new(supervisor)
|
|
293
|
+
monitor.start
|
|
294
|
+
|
|
295
|
+
# Run for a while
|
|
296
|
+
sleep 60
|
|
297
|
+
|
|
298
|
+
# Check metrics
|
|
299
|
+
snapshot = monitor.snapshot
|
|
300
|
+
puts monitor.report
|
|
301
|
+
|
|
302
|
+
# Key indicators:
|
|
303
|
+
# - Low worker_utilization: Not enough work
|
|
304
|
+
# - High queue_depth: Workers too slow
|
|
305
|
+
# - Low throughput: Bottleneck somewhere
|
|
306
|
+
----
|
|
307
|
+
|
|
308
|
+
**Solutions:**
|
|
309
|
+
|
|
310
|
+
===== Balance Worker Count
|
|
311
|
+
|
|
312
|
+
[source,ruby]
|
|
313
|
+
----
|
|
314
|
+
# CPU-bound work
|
|
315
|
+
supervisor = Fractor::Supervisor.new(
|
|
316
|
+
worker_pools: [
|
|
317
|
+
# Use number of CPU cores
|
|
318
|
+
{ worker_class: CPUWorker, num_workers: 4 }
|
|
319
|
+
]
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
# I/O-bound work
|
|
323
|
+
supervisor = Fractor::Supervisor.new(
|
|
324
|
+
worker_pools: [
|
|
325
|
+
# Use more workers (2-4x CPU cores)
|
|
326
|
+
{ worker_class: IOWorker, num_workers: 16 }
|
|
327
|
+
]
|
|
328
|
+
)
|
|
329
|
+
----
|
|
330
|
+
|
|
331
|
+
===== Optimize Work Distribution
|
|
332
|
+
|
|
333
|
+
[source,ruby]
|
|
334
|
+
----
|
|
335
|
+
# ✗ Wrong: Very small work items (high overhead)
|
|
336
|
+
(1..10000).each do |i|
|
|
337
|
+
supervisor.add_work(TinyWork.new(i))
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
# ✓ Correct: Batch small items
|
|
341
|
+
(1..10000).each_slice(100) do |batch|
|
|
342
|
+
supervisor.add_work(BatchWork.new(batch))
|
|
343
|
+
end
|
|
344
|
+
----
|
|
345
|
+
|
|
346
|
+
---
|
|
347
|
+
|
|
348
|
+
[[io-bound]]
|
|
349
|
+
==== I/O Bound Operations
|
|
350
|
+
|
|
351
|
+
**Symptoms:**
|
|
352
|
+
|
|
353
|
+
* Workers idle waiting for I/O
|
|
354
|
+
* Low CPU usage
|
|
355
|
+
* High latency
|
|
356
|
+
|
|
357
|
+
**Solutions:**
|
|
358
|
+
|
|
359
|
+
[source,ruby]
|
|
360
|
+
----
|
|
361
|
+
# Increase worker count for I/O-bound work
|
|
362
|
+
supervisor = Fractor::Supervisor.new(
|
|
363
|
+
worker_pools: [
|
|
364
|
+
{ worker_class: DatabaseWorker, num_workers: 20 }, # High for I/O
|
|
365
|
+
{ worker_class: APIWorker, num_workers: 30 } # Even higher
|
|
366
|
+
]
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
# Use connection pooling
|
|
370
|
+
class DatabaseWorker < Fractor::Worker
|
|
371
|
+
@@pool = ConnectionPool.new(size: 20) { Database.connect }
|
|
372
|
+
|
|
373
|
+
def process(work)
|
|
374
|
+
@@pool.with do |conn|
|
|
375
|
+
result = conn.query(work.input[:query])
|
|
376
|
+
Fractor::WorkResult.new(result: result, work: work)
|
|
377
|
+
end
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
----
|
|
381
|
+
|
|
382
|
+
---
|
|
383
|
+
|
|
384
|
+
[[error-handling]]
|
|
385
|
+
==== Errors Not Being Caught
|
|
386
|
+
|
|
387
|
+
**Symptoms:**
|
|
388
|
+
|
|
389
|
+
* Worker crashes
|
|
390
|
+
* No error results returned
|
|
391
|
+
* Silent failures
|
|
392
|
+
|
|
393
|
+
**Solutions:**
|
|
394
|
+
|
|
395
|
+
===== Always Wrap in WorkResult
|
|
396
|
+
|
|
397
|
+
[source,ruby]
|
|
398
|
+
----
|
|
399
|
+
# ✗ Wrong: Unhandled exceptions
|
|
400
|
+
class BadWorker < Fractor::Worker
|
|
401
|
+
def process(work)
|
|
402
|
+
result = risky_operation(work.input)
|
|
403
|
+
Fractor::WorkResult.new(result: result, work: work)
|
|
404
|
+
# Exception crashes the worker!
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
# ✓ Correct: Rescue and return error result
|
|
409
|
+
class GoodWorker < Fractor::Worker
|
|
410
|
+
def process(work)
|
|
411
|
+
result = risky_operation(work.input)
|
|
412
|
+
Fractor::WorkResult.new(result: result, work: work)
|
|
413
|
+
rescue => e
|
|
414
|
+
Fractor::WorkResult.new(
|
|
415
|
+
error: e,
|
|
416
|
+
error_code: :operation_failed,
|
|
417
|
+
error_context: { input: work.input },
|
|
418
|
+
work: work
|
|
419
|
+
)
|
|
420
|
+
end
|
|
421
|
+
end
|
|
422
|
+
----
|
|
423
|
+
|
|
424
|
+
===== Check for Errors
|
|
425
|
+
|
|
426
|
+
[source,ruby]
|
|
427
|
+
----
|
|
428
|
+
supervisor.run
|
|
429
|
+
|
|
430
|
+
# ✗ Wrong: Ignore errors
|
|
431
|
+
results = supervisor.results.results
|
|
432
|
+
|
|
433
|
+
# ✓ Correct: Handle both results and errors
|
|
434
|
+
results = supervisor.results.results
|
|
435
|
+
errors = supervisor.results.errors
|
|
436
|
+
|
|
437
|
+
puts "Succeeded: #{results.size}"
|
|
438
|
+
puts "Failed: #{errors.size}"
|
|
439
|
+
|
|
440
|
+
errors.each do |error_result|
|
|
441
|
+
puts "Error: #{error_result.error}"
|
|
442
|
+
puts "Code: #{error_result.error_code}"
|
|
443
|
+
puts "Context: #{error_result.error_context}"
|
|
444
|
+
end
|
|
445
|
+
----
|
|
446
|
+
|
|
447
|
+
---
|
|
448
|
+
|
|
449
|
+
[[ractor-shareability]]
|
|
450
|
+
==== Ractor Shareability Issues
|
|
451
|
+
|
|
452
|
+
**Symptoms:**
|
|
453
|
+
|
|
454
|
+
* `Ractor::IsolationError`
|
|
455
|
+
* "can not make shareable" errors
|
|
456
|
+
* Objects can't be sent to workers
|
|
457
|
+
|
|
458
|
+
**Common Causes:**
|
|
459
|
+
|
|
460
|
+
1. Mutable objects (arrays, hashes)
|
|
461
|
+
2. Objects with instance variables
|
|
462
|
+
3. Procs/lambdas
|
|
463
|
+
4. Class instances with state
|
|
464
|
+
|
|
465
|
+
**Solutions:**
|
|
466
|
+
|
|
467
|
+
===== Freeze Mutable Objects
|
|
468
|
+
|
|
469
|
+
[source,ruby]
|
|
470
|
+
----
|
|
471
|
+
# ✗ Wrong: Mutable arrays/hashes
|
|
472
|
+
work = Fractor::Work.new({
|
|
473
|
+
items: [1, 2, 3],
|
|
474
|
+
config: { timeout: 30 }
|
|
475
|
+
})
|
|
476
|
+
|
|
477
|
+
# ✓ Correct: Freeze everything
|
|
478
|
+
work = Fractor::Work.new({
|
|
479
|
+
items: [1, 2, 3].freeze,
|
|
480
|
+
config: { timeout: 30 }.freeze
|
|
481
|
+
}.freeze)
|
|
482
|
+
----
|
|
483
|
+
|
|
484
|
+
===== Use Ractor.make_shareable
|
|
485
|
+
|
|
486
|
+
[source,ruby]
|
|
487
|
+
----
|
|
488
|
+
# For complex objects
|
|
489
|
+
data = {
|
|
490
|
+
users: [{ id: 1, name: "Alice" }, { id: 2, name: "Bob" }],
|
|
491
|
+
settings: { timeout: 30, retries: 3 }
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
shareable_data = Ractor.make_shareable(data)
|
|
495
|
+
work = Fractor::Work.new(shareable_data)
|
|
496
|
+
----
|
|
497
|
+
|
|
498
|
+
===== Avoid Non-Shareable Objects
|
|
499
|
+
|
|
500
|
+
[source,ruby]
|
|
501
|
+
----
|
|
502
|
+
# ✗ Wrong: Procs aren't shareable
|
|
503
|
+
work = Fractor::Work.new({
|
|
504
|
+
callback: -> { puts "done" } # Error!
|
|
505
|
+
})
|
|
506
|
+
|
|
507
|
+
# ✓ Correct: Use symbols or serializable data
|
|
508
|
+
work = Fractor::Work.new({
|
|
509
|
+
callback_name: :handle_completion
|
|
510
|
+
})
|
|
511
|
+
|
|
512
|
+
# Worker looks up callback by name
|
|
513
|
+
class Worker < Fractor::Worker
|
|
514
|
+
CALLBACKS = {
|
|
515
|
+
handle_completion: -> { puts "done" }
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
def process(work)
|
|
519
|
+
callback = CALLBACKS[work.input[:callback_name]]
|
|
520
|
+
# ...
|
|
521
|
+
end
|
|
522
|
+
end
|
|
523
|
+
----
|
|
524
|
+
|
|
525
|
+
---
|
|
526
|
+
|
|
527
|
+
[[deadlock]]
|
|
528
|
+
==== Deadlock or Process Hangs
|
|
529
|
+
|
|
530
|
+
**Symptoms:**
|
|
531
|
+
|
|
532
|
+
* Process stops responding
|
|
533
|
+
* Workers stuck waiting
|
|
534
|
+
* No progress made
|
|
535
|
+
|
|
536
|
+
**Common Causes:**
|
|
537
|
+
|
|
538
|
+
1. Circular dependencies
|
|
539
|
+
2. All workers waiting for resources
|
|
540
|
+
3. Queue is full and workers are blocked
|
|
541
|
+
|
|
542
|
+
**Solutions:**
|
|
543
|
+
|
|
544
|
+
===== Check for Circular Dependencies
|
|
545
|
+
|
|
546
|
+
[source,ruby]
|
|
547
|
+
----
|
|
548
|
+
# ✗ Wrong: Circular work generation
|
|
549
|
+
class ProducerWorker < Fractor::Worker
|
|
550
|
+
def process(work)
|
|
551
|
+
# Producer creates work that needs itself!
|
|
552
|
+
supervisor.add_work(ProducerWork.new(supervisor))
|
|
553
|
+
end
|
|
554
|
+
end
|
|
555
|
+
----
|
|
556
|
+
|
|
557
|
+
===== Use Timeouts
|
|
558
|
+
|
|
559
|
+
[source,ruby]
|
|
560
|
+
----
|
|
561
|
+
class TimeoutWorker < Fractor::Worker
|
|
562
|
+
def process(work)
|
|
563
|
+
Timeout.timeout(30) do
|
|
564
|
+
# Operation that might hang
|
|
565
|
+
risky_operation(work.input)
|
|
566
|
+
end
|
|
567
|
+
|
|
568
|
+
Fractor::WorkResult.new(result: result, work: work)
|
|
569
|
+
rescue Timeout::Error => e
|
|
570
|
+
Fractor::WorkResult.new(
|
|
571
|
+
error: e,
|
|
572
|
+
error_code: :timeout,
|
|
573
|
+
work: work
|
|
574
|
+
)
|
|
575
|
+
end
|
|
576
|
+
end
|
|
577
|
+
----
|
|
578
|
+
|
|
579
|
+
===== Monitor for Hangs
|
|
580
|
+
|
|
581
|
+
[source,ruby]
|
|
582
|
+
----
|
|
583
|
+
# Watchdog thread
|
|
584
|
+
watchdog = Thread.new do
|
|
585
|
+
last_progress = 0
|
|
586
|
+
loop do
|
|
587
|
+
sleep 30
|
|
588
|
+
current_progress = supervisor.results.results.size
|
|
589
|
+
|
|
590
|
+
if current_progress == last_progress
|
|
591
|
+
puts "Warning: No progress in 30 seconds!"
|
|
592
|
+
puts "Queue size: #{work_queue.size}"
|
|
593
|
+
puts "Active workers: #{supervisor.active_workers}"
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
last_progress = current_progress
|
|
597
|
+
end
|
|
598
|
+
end
|
|
599
|
+
----
|
|
600
|
+
|
|
601
|
+
---
|
|
602
|
+
|
|
603
|
+
[[infinite-loop]]
|
|
604
|
+
==== Infinite Work Generation
|
|
605
|
+
|
|
606
|
+
**Symptoms:**
|
|
607
|
+
|
|
608
|
+
* Work queue grows indefinitely
|
|
609
|
+
* Memory usage climbs continuously
|
|
610
|
+
* Process never completes
|
|
611
|
+
|
|
612
|
+
**Solutions:**
|
|
613
|
+
|
|
614
|
+
===== Add Depth Limits
|
|
615
|
+
|
|
616
|
+
[source,ruby]
|
|
617
|
+
----
|
|
618
|
+
class RecursiveWork < Fractor::Work
|
|
619
|
+
MAX_DEPTH = 10
|
|
620
|
+
|
|
621
|
+
def initialize(data, depth: 0)
|
|
622
|
+
raise "Max depth exceeded" if depth > MAX_DEPTH
|
|
623
|
+
super(data: data, depth: depth)
|
|
624
|
+
end
|
|
625
|
+
end
|
|
626
|
+
|
|
627
|
+
class RecursiveWorker < Fractor::Worker
|
|
628
|
+
def process(work)
|
|
629
|
+
data = work.input[:data]
|
|
630
|
+
depth = work.input[:depth]
|
|
631
|
+
|
|
632
|
+
# Only generate more work if under limit
|
|
633
|
+
if depth < RecursiveWork::MAX_DEPTH
|
|
634
|
+
children = generate_children(data)
|
|
635
|
+
children.each do |child|
|
|
636
|
+
supervisor.add_work(RecursiveWork.new(child, depth: depth + 1))
|
|
637
|
+
end
|
|
638
|
+
end
|
|
639
|
+
|
|
640
|
+
Fractor::WorkResult.new(result: data, work: work)
|
|
641
|
+
end
|
|
642
|
+
end
|
|
643
|
+
----
|
|
644
|
+
|
|
645
|
+
===== Monitor Queue Size
|
|
646
|
+
|
|
647
|
+
[source,ruby]
|
|
648
|
+
----
|
|
649
|
+
MAX_QUEUE_SIZE = 10000
|
|
650
|
+
|
|
651
|
+
loop do
|
|
652
|
+
if work_queue.size > MAX_QUEUE_SIZE
|
|
653
|
+
puts "Error: Queue size exceeded limit!"
|
|
654
|
+
break
|
|
655
|
+
end
|
|
656
|
+
|
|
657
|
+
# Add work...
|
|
658
|
+
sleep 0.1
|
|
659
|
+
end
|
|
660
|
+
----
|
|
661
|
+
|
|
662
|
+
---
|
|
663
|
+
|
|
664
|
+
[[signals-not-working]]
|
|
665
|
+
==== Signal Handling Not Working
|
|
666
|
+
|
|
667
|
+
**Symptoms:**
|
|
668
|
+
|
|
669
|
+
* Ctrl+C doesn't stop process
|
|
670
|
+
* SIGUSR1 doesn't print status
|
|
671
|
+
* Process can't be killed gracefully
|
|
672
|
+
|
|
673
|
+
**Common Causes:**
|
|
674
|
+
|
|
675
|
+
1. Signal handlers overridden
|
|
676
|
+
2. Running in thread without signal handling
|
|
677
|
+
3. Platform differences (Windows vs Unix)
|
|
678
|
+
|
|
679
|
+
**Solutions:**
|
|
680
|
+
|
|
681
|
+
===== Use ContinuousServer
|
|
682
|
+
|
|
683
|
+
[source,ruby]
|
|
684
|
+
----
|
|
685
|
+
# ✓ Automatic signal handling
|
|
686
|
+
server = Fractor::ContinuousServer.new(
|
|
687
|
+
worker_pools: [{ worker_class: MyWorker }],
|
|
688
|
+
work_queue: work_queue
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
# Handles SIGINT, SIGTERM, SIGUSR1/SIGBREAK automatically
|
|
692
|
+
server.run
|
|
693
|
+
----
|
|
694
|
+
|
|
695
|
+
===== Manual Signal Handling
|
|
696
|
+
|
|
697
|
+
[source,ruby]
|
|
698
|
+
----
|
|
699
|
+
# For custom implementations
|
|
700
|
+
trap('INT') do
|
|
701
|
+
puts "\nShutting down gracefully..."
|
|
702
|
+
supervisor.stop
|
|
703
|
+
exit
|
|
704
|
+
end
|
|
705
|
+
|
|
706
|
+
trap('TERM') do
|
|
707
|
+
supervisor.stop
|
|
708
|
+
exit
|
|
709
|
+
end
|
|
710
|
+
|
|
711
|
+
# Unix only
|
|
712
|
+
if Signal.list.key?('USR1')
|
|
713
|
+
trap('USR1') do
|
|
714
|
+
puts supervisor.status
|
|
715
|
+
end
|
|
716
|
+
end
|
|
717
|
+
|
|
718
|
+
# Windows
|
|
719
|
+
if Signal.list.key?('BREAK')
|
|
720
|
+
trap('BREAK') do
|
|
721
|
+
puts supervisor.status
|
|
722
|
+
end
|
|
723
|
+
end
|
|
724
|
+
----
|
|
725
|
+
|
|
726
|
+
---
|
|
727
|
+
|
|
728
|
+
=== FAQ
|
|
729
|
+
|
|
730
|
+
==== How many workers should I use?
|
|
731
|
+
|
|
732
|
+
**CPU-bound work:**
|
|
733
|
+
|
|
734
|
+
* Start with number of CPU cores
|
|
735
|
+
* Test with: `(1..4).each { |n| test_with_n_workers(n * CPU_count) }`
|
|
736
|
+
|
|
737
|
+
**I/O-bound work:**
|
|
738
|
+
|
|
739
|
+
* Start with 2-4x CPU cores
|
|
740
|
+
* Monitor utilization and increase if workers are idle
|
|
741
|
+
|
|
742
|
+
**Mixed workload:**
|
|
743
|
+
|
|
744
|
+
* Use separate worker pools with different counts
|
|
745
|
+
* Example:
|
|
746
|
+
[source,ruby]
|
|
747
|
+
----
|
|
748
|
+
worker_pools: [
|
|
749
|
+
{ worker_class: CPUWorker, num_workers: 4 },
|
|
750
|
+
{ worker_class: IOWorker, num_workers: 16 }
|
|
751
|
+
]
|
|
752
|
+
----
|
|
753
|
+
|
|
754
|
+
==== Should I use pipeline or continuous mode?
|
|
755
|
+
|
|
756
|
+
**Use pipeline mode when:**
|
|
757
|
+
|
|
758
|
+
* Processing a finite batch of work
|
|
759
|
+
* Work items known upfront
|
|
760
|
+
* Need to collect all results
|
|
761
|
+
* One-time operations
|
|
762
|
+
|
|
763
|
+
**Use continuous mode when:**
|
|
764
|
+
|
|
765
|
+
* Running indefinitely (servers, daemons)
|
|
766
|
+
* Work arrives dynamically
|
|
767
|
+
* Results processed via callbacks
|
|
768
|
+
* Long-running services
|
|
769
|
+
|
|
770
|
+
==== How do I debug worker issues?
|
|
771
|
+
|
|
772
|
+
1. Add logging to worker's `process` method
|
|
773
|
+
2. Use smaller batch for testing
|
|
774
|
+
3. Run with single worker initially
|
|
775
|
+
4. Check error results for details
|
|
776
|
+
5. Use PerformanceMonitor for metrics
|
|
777
|
+
|
|
778
|
+
[source,ruby]
|
|
779
|
+
----
|
|
780
|
+
class DebugWorker < Fractor::Worker
|
|
781
|
+
def process(work)
|
|
782
|
+
puts "Processing: #{work.input.inspect}"
|
|
783
|
+
|
|
784
|
+
result = perform_work(work.input)
|
|
785
|
+
|
|
786
|
+
puts "Result: #{result.inspect}"
|
|
787
|
+
|
|
788
|
+
Fractor::WorkResult.new(result: result, work: work)
|
|
789
|
+
rescue => e
|
|
790
|
+
puts "Error: #{e.class.name}: #{e.message}"
|
|
791
|
+
puts e.backtrace.first(5)
|
|
792
|
+
|
|
793
|
+
Fractor::WorkResult.new(error: e, work: work)
|
|
794
|
+
end
|
|
795
|
+
end
|
|
796
|
+
----
|
|
797
|
+
|
|
798
|
+
==== How do I handle worker crashes?
|
|
799
|
+
|
|
800
|
+
Ractors that crash are automatically restarted by the Supervisor. However:
|
|
801
|
+
|
|
802
|
+
1. Fix the root cause in your worker code
|
|
803
|
+
2. Add proper error handling (rescue blocks)
|
|
804
|
+
3. Return error WorkResults instead of raising
|
|
805
|
+
4. Log crashes for investigation
|
|
806
|
+
|
|
807
|
+
==== Can I use threads instead of Ractors?
|
|
808
|
+
|
|
809
|
+
Fractor is built on Ractors for true parallelism. If you need threads:
|
|
810
|
+
|
|
811
|
+
* Consider using standard thread pools
|
|
812
|
+
* Fractor's value is in Ractor-based parallelism
|
|
813
|
+
* For I/O-bound work, threads may work, but Ractors provide isolation
|
|
814
|
+
|
|
815
|
+
==== How do I test Fractor code?
|
|
816
|
+
|
|
817
|
+
[source,ruby]
|
|
818
|
+
----
|
|
819
|
+
require 'rspec'
|
|
820
|
+
|
|
821
|
+
RSpec.describe MyWorker do
|
|
822
|
+
it "processes work correctly" do
|
|
823
|
+
work = MyWork.new(data: 'test')
|
|
824
|
+
worker = MyWorker.new
|
|
825
|
+
|
|
826
|
+
result = worker.process(work)
|
|
827
|
+
|
|
828
|
+
expect(result).to be_success
|
|
829
|
+
expect(result.result).to eq(expected_value)
|
|
830
|
+
end
|
|
831
|
+
|
|
832
|
+
it "handles errors" do
|
|
833
|
+
work = MyWork.new(data: nil)
|
|
834
|
+
worker = MyWorker.new
|
|
835
|
+
|
|
836
|
+
result = worker.process(work)
|
|
837
|
+
|
|
838
|
+
expect(result).to be_error
|
|
839
|
+
expect(result.error_code).to eq(:validation_error)
|
|
840
|
+
end
|
|
841
|
+
end
|
|
842
|
+
----
|
|
843
|
+
|
|
844
|
+
---
|
|
845
|
+
|
|
846
|
+
=== Getting Help
|
|
847
|
+
|
|
848
|
+
If you can't find a solution here:
|
|
849
|
+
|
|
850
|
+
1. **Check Examples**: link:examples[Real-world examples] often show solutions
|
|
851
|
+
2. **Read API Docs**: link:api[API Reference] for detailed method documentation
|
|
852
|
+
3. **Review Guides**: link:../guides/core-concepts[Core Concepts] for fundamentals
|
|
853
|
+
4. **GitHub Issues**: Search existing issues or create a new one
|
|
854
|
+
5. **Enable Debug Logging**: Set `FRACTOR_LOG_FILE` environment variable
|
|
855
|
+
|
|
856
|
+
=== See Also
|
|
857
|
+
|
|
858
|
+
* link:../guides/core-concepts[Core Concepts]
|
|
859
|
+
* link:../guides/cookbook[Cookbook] - Common patterns
|
|
860
|
+
* link:api[API Reference]
|
|
861
|
+
* link:error-reporting[Error Reporting]
|
|
862
|
+
* link:examples[Examples]
|