fractor 0.1.4 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop-https---raw-githubusercontent-com-riboseinc-oss-guides-main-ci-rubocop-yml +552 -0
- data/.rubocop.yml +14 -8
- data/.rubocop_todo.yml +284 -43
- data/README.adoc +111 -950
- data/docs/.lycheeignore +16 -0
- data/docs/Gemfile +24 -0
- data/docs/README.md +157 -0
- data/docs/_config.yml +151 -0
- data/docs/_features/error-handling.adoc +1192 -0
- data/docs/_features/index.adoc +80 -0
- data/docs/_features/monitoring.adoc +589 -0
- data/docs/_features/signal-handling.adoc +202 -0
- data/docs/_features/workflows.adoc +1235 -0
- data/docs/_guides/continuous-mode.adoc +736 -0
- data/docs/_guides/cookbook.adoc +1133 -0
- data/docs/_guides/index.adoc +55 -0
- data/docs/_guides/pipeline-mode.adoc +730 -0
- data/docs/_guides/troubleshooting.adoc +358 -0
- data/docs/_pages/architecture.adoc +1390 -0
- data/docs/_pages/core-concepts.adoc +1392 -0
- data/docs/_pages/design-principles.adoc +862 -0
- data/docs/_pages/getting-started.adoc +290 -0
- data/docs/_pages/installation.adoc +143 -0
- data/docs/_reference/api.adoc +1080 -0
- data/docs/_reference/error-reporting.adoc +670 -0
- data/docs/_reference/examples.adoc +181 -0
- data/docs/_reference/index.adoc +96 -0
- data/docs/_reference/troubleshooting.adoc +862 -0
- data/docs/_tutorials/complex-workflows.adoc +1022 -0
- data/docs/_tutorials/data-processing-pipeline.adoc +740 -0
- data/docs/_tutorials/first-application.adoc +384 -0
- data/docs/_tutorials/index.adoc +48 -0
- data/docs/_tutorials/long-running-services.adoc +931 -0
- data/docs/assets/images/favicon-16.png +0 -0
- data/docs/assets/images/favicon-32.png +0 -0
- data/docs/assets/images/favicon-48.png +0 -0
- data/docs/assets/images/favicon.ico +0 -0
- data/docs/assets/images/favicon.png +0 -0
- data/docs/assets/images/favicon.svg +45 -0
- data/docs/assets/images/fractor-icon.svg +49 -0
- data/docs/assets/images/fractor-logo.svg +61 -0
- data/docs/index.adoc +131 -0
- data/docs/lychee.toml +39 -0
- data/examples/api_aggregator/README.adoc +627 -0
- data/examples/api_aggregator/api_aggregator.rb +376 -0
- data/examples/auto_detection/README.adoc +407 -29
- data/examples/auto_detection/auto_detection.rb +9 -9
- data/examples/continuous_chat_common/message_protocol.rb +53 -0
- data/examples/continuous_chat_fractor/README.adoc +217 -0
- data/examples/continuous_chat_fractor/chat_client.rb +303 -0
- data/examples/continuous_chat_fractor/chat_common.rb +83 -0
- data/examples/continuous_chat_fractor/chat_server.rb +167 -0
- data/examples/continuous_chat_fractor/simulate.rb +345 -0
- data/examples/continuous_chat_server/README.adoc +135 -0
- data/examples/continuous_chat_server/chat_client.rb +303 -0
- data/examples/continuous_chat_server/chat_server.rb +359 -0
- data/examples/continuous_chat_server/simulate.rb +343 -0
- data/examples/error_reporting.rb +207 -0
- data/examples/file_processor/README.adoc +170 -0
- data/examples/file_processor/file_processor.rb +615 -0
- data/examples/file_processor/sample_files/invalid.csv +1 -0
- data/examples/file_processor/sample_files/orders.xml +24 -0
- data/examples/file_processor/sample_files/products.json +23 -0
- data/examples/file_processor/sample_files/users.csv +6 -0
- data/examples/hierarchical_hasher/README.adoc +629 -41
- data/examples/hierarchical_hasher/hierarchical_hasher.rb +12 -8
- data/examples/image_processor/README.adoc +610 -0
- data/examples/image_processor/image_processor.rb +349 -0
- data/examples/image_processor/processed_images/sample_10_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_1_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_2_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_3_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_4_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_5_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_6_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_7_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_8_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_9_processed.jpg.json +12 -0
- data/examples/image_processor/test_images/sample_1.png +1 -0
- data/examples/image_processor/test_images/sample_10.png +1 -0
- data/examples/image_processor/test_images/sample_2.png +1 -0
- data/examples/image_processor/test_images/sample_3.png +1 -0
- data/examples/image_processor/test_images/sample_4.png +1 -0
- data/examples/image_processor/test_images/sample_5.png +1 -0
- data/examples/image_processor/test_images/sample_6.png +1 -0
- data/examples/image_processor/test_images/sample_7.png +1 -0
- data/examples/image_processor/test_images/sample_8.png +1 -0
- data/examples/image_processor/test_images/sample_9.png +1 -0
- data/examples/log_analyzer/README.adoc +662 -0
- data/examples/log_analyzer/log_analyzer.rb +579 -0
- data/examples/log_analyzer/sample_logs/apache.log +20 -0
- data/examples/log_analyzer/sample_logs/json.log +15 -0
- data/examples/log_analyzer/sample_logs/nginx.log +15 -0
- data/examples/log_analyzer/sample_logs/rails.log +29 -0
- data/examples/multi_work_type/README.adoc +576 -26
- data/examples/multi_work_type/multi_work_type.rb +30 -29
- data/examples/performance_monitoring.rb +120 -0
- data/examples/pipeline_processing/README.adoc +740 -26
- data/examples/pipeline_processing/pipeline_processing.rb +16 -16
- data/examples/priority_work_example.rb +155 -0
- data/examples/producer_subscriber/README.adoc +889 -46
- data/examples/producer_subscriber/producer_subscriber.rb +20 -16
- data/examples/scatter_gather/README.adoc +829 -27
- data/examples/scatter_gather/scatter_gather.rb +29 -28
- data/examples/simple/README.adoc +347 -0
- data/examples/simple/sample.rb +5 -5
- data/examples/specialized_workers/README.adoc +622 -26
- data/examples/specialized_workers/specialized_workers.rb +88 -45
- data/examples/stream_processor/README.adoc +206 -0
- data/examples/stream_processor/stream_processor.rb +284 -0
- data/examples/web_scraper/README.adoc +625 -0
- data/examples/web_scraper/web_scraper.rb +285 -0
- data/examples/workflow/README.adoc +406 -0
- data/examples/workflow/circuit_breaker/README.adoc +360 -0
- data/examples/workflow/circuit_breaker/circuit_breaker_workflow.rb +225 -0
- data/examples/workflow/conditional/README.adoc +483 -0
- data/examples/workflow/conditional/conditional_workflow.rb +215 -0
- data/examples/workflow/dead_letter_queue/README.adoc +374 -0
- data/examples/workflow/dead_letter_queue/dead_letter_queue_workflow.rb +217 -0
- data/examples/workflow/fan_out/README.adoc +381 -0
- data/examples/workflow/fan_out/fan_out_workflow.rb +202 -0
- data/examples/workflow/retry/README.adoc +248 -0
- data/examples/workflow/retry/retry_workflow.rb +195 -0
- data/examples/workflow/simple_linear/README.adoc +267 -0
- data/examples/workflow/simple_linear/simple_linear_workflow.rb +175 -0
- data/examples/workflow/simplified/README.adoc +329 -0
- data/examples/workflow/simplified/simplified_workflow.rb +222 -0
- data/exe/fractor +10 -0
- data/lib/fractor/cli.rb +288 -0
- data/lib/fractor/configuration.rb +307 -0
- data/lib/fractor/continuous_server.rb +183 -0
- data/lib/fractor/error_formatter.rb +72 -0
- data/lib/fractor/error_report_generator.rb +152 -0
- data/lib/fractor/error_reporter.rb +244 -0
- data/lib/fractor/error_statistics.rb +147 -0
- data/lib/fractor/execution_tracer.rb +162 -0
- data/lib/fractor/logger.rb +230 -0
- data/lib/fractor/main_loop_handler.rb +406 -0
- data/lib/fractor/main_loop_handler3.rb +135 -0
- data/lib/fractor/main_loop_handler4.rb +299 -0
- data/lib/fractor/performance_metrics_collector.rb +181 -0
- data/lib/fractor/performance_monitor.rb +215 -0
- data/lib/fractor/performance_report_generator.rb +202 -0
- data/lib/fractor/priority_work.rb +93 -0
- data/lib/fractor/priority_work_queue.rb +189 -0
- data/lib/fractor/result_aggregator.rb +33 -1
- data/lib/fractor/shutdown_handler.rb +168 -0
- data/lib/fractor/signal_handler.rb +80 -0
- data/lib/fractor/supervisor.rb +430 -144
- data/lib/fractor/supervisor_logger.rb +88 -0
- data/lib/fractor/version.rb +1 -1
- data/lib/fractor/work.rb +12 -0
- data/lib/fractor/work_distribution_manager.rb +151 -0
- data/lib/fractor/work_queue.rb +88 -0
- data/lib/fractor/work_result.rb +181 -9
- data/lib/fractor/worker.rb +75 -1
- data/lib/fractor/workflow/builder.rb +210 -0
- data/lib/fractor/workflow/chain_builder.rb +169 -0
- data/lib/fractor/workflow/circuit_breaker.rb +183 -0
- data/lib/fractor/workflow/circuit_breaker_orchestrator.rb +208 -0
- data/lib/fractor/workflow/circuit_breaker_registry.rb +112 -0
- data/lib/fractor/workflow/dead_letter_queue.rb +334 -0
- data/lib/fractor/workflow/execution_hooks.rb +39 -0
- data/lib/fractor/workflow/execution_strategy.rb +225 -0
- data/lib/fractor/workflow/execution_trace.rb +134 -0
- data/lib/fractor/workflow/helpers.rb +191 -0
- data/lib/fractor/workflow/job.rb +290 -0
- data/lib/fractor/workflow/job_dependency_validator.rb +120 -0
- data/lib/fractor/workflow/logger.rb +110 -0
- data/lib/fractor/workflow/pre_execution_context.rb +193 -0
- data/lib/fractor/workflow/retry_config.rb +156 -0
- data/lib/fractor/workflow/retry_orchestrator.rb +184 -0
- data/lib/fractor/workflow/retry_strategy.rb +93 -0
- data/lib/fractor/workflow/structured_logger.rb +30 -0
- data/lib/fractor/workflow/type_compatibility_validator.rb +222 -0
- data/lib/fractor/workflow/visualizer.rb +211 -0
- data/lib/fractor/workflow/workflow_context.rb +132 -0
- data/lib/fractor/workflow/workflow_executor.rb +669 -0
- data/lib/fractor/workflow/workflow_result.rb +55 -0
- data/lib/fractor/workflow/workflow_validator.rb +295 -0
- data/lib/fractor/workflow.rb +333 -0
- data/lib/fractor/wrapped_ractor.rb +66 -91
- data/lib/fractor/wrapped_ractor3.rb +161 -0
- data/lib/fractor/wrapped_ractor4.rb +242 -0
- data/lib/fractor.rb +93 -3
- metadata +192 -6
- data/tests/sample.rb.bak +0 -309
- data/tests/sample_working.rb.bak +0 -209
|
@@ -0,0 +1,670 @@
|
|
|
1
|
+
---
|
|
2
|
+
layout: default
|
|
3
|
+
title: Error Reporting
|
|
4
|
+
nav_order: 4
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
== Error Reporting and Analytics
|
|
8
|
+
|
|
9
|
+
=== Overview
|
|
10
|
+
|
|
11
|
+
Fractor provides comprehensive error reporting and analytics through the [`ErrorReporter`](api#fractorerror reporter) class. This system aggregates errors, tracks statistics, detects trends, and provides actionable insights into application health.
|
|
12
|
+
|
|
13
|
+
=== Purpose
|
|
14
|
+
|
|
15
|
+
The ErrorReporter helps you:
|
|
16
|
+
|
|
17
|
+
* **Monitor error patterns** across your application
|
|
18
|
+
* **Identify problematic jobs** with high error rates
|
|
19
|
+
* **Detect trending issues** before they become critical
|
|
20
|
+
* **Track error severity** and categorization
|
|
21
|
+
* **Export metrics** to monitoring systems
|
|
22
|
+
* **Respond to critical errors** in real-time
|
|
23
|
+
|
|
24
|
+
=== Basic Usage
|
|
25
|
+
|
|
26
|
+
==== Setup
|
|
27
|
+
|
|
28
|
+
[source,ruby]
|
|
29
|
+
----
|
|
30
|
+
require 'fractor'
|
|
31
|
+
|
|
32
|
+
# Create an error reporter instance
|
|
33
|
+
reporter = Fractor::ErrorReporter.new
|
|
34
|
+
----
|
|
35
|
+
|
|
36
|
+
==== Recording Work Results
|
|
37
|
+
|
|
38
|
+
The ErrorReporter tracks both successes and failures:
|
|
39
|
+
|
|
40
|
+
[source,ruby]
|
|
41
|
+
----
|
|
42
|
+
# Record a successful result
|
|
43
|
+
work_result = Fractor::WorkResult.new(result: "Success")
|
|
44
|
+
reporter.record(work_result)
|
|
45
|
+
|
|
46
|
+
# Record an error result
|
|
47
|
+
work_result = Fractor::WorkResult.new(
|
|
48
|
+
error: StandardError.new("Connection failed"),
|
|
49
|
+
error_code: :connection_failed,
|
|
50
|
+
error_category: :network,
|
|
51
|
+
error_severity: :error
|
|
52
|
+
)
|
|
53
|
+
reporter.record(work_result, job_name: "fetch_data")
|
|
54
|
+
----
|
|
55
|
+
|
|
56
|
+
==== Viewing Statistics
|
|
57
|
+
|
|
58
|
+
[source,ruby]
|
|
59
|
+
----
|
|
60
|
+
# Overall statistics
|
|
61
|
+
puts "Total Errors: #{reporter.total_errors}"
|
|
62
|
+
puts "Total Successes: #{reporter.total_successes}"
|
|
63
|
+
puts "Error Rate: #{reporter.overall_error_rate}%"
|
|
64
|
+
|
|
65
|
+
# Top error categories
|
|
66
|
+
reporter.top_categories.each do |category, count|
|
|
67
|
+
puts "#{category}: #{count} errors"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Top error jobs
|
|
71
|
+
reporter.top_jobs.each do |job, count|
|
|
72
|
+
puts "#{job}: #{count} errors"
|
|
73
|
+
end
|
|
74
|
+
----
|
|
75
|
+
|
|
76
|
+
=== Error Categorization
|
|
77
|
+
|
|
78
|
+
Fractor automatically categorizes errors based on their type:
|
|
79
|
+
|
|
80
|
+
[cols="1,2,3"]
|
|
81
|
+
|===
|
|
82
|
+
|Category |Error Types |Description
|
|
83
|
+
|
|
84
|
+
|`:validation`
|
|
85
|
+
|`ArgumentError`, `TypeError`
|
|
86
|
+
|Input validation errors
|
|
87
|
+
|
|
88
|
+
|`:timeout`
|
|
89
|
+
|`Timeout::Error`
|
|
90
|
+
|Operation timeout errors
|
|
91
|
+
|
|
92
|
+
|`:network`
|
|
93
|
+
|`SocketError`, `Errno::ECONNREFUSED`, `Errno::ETIMEDOUT`
|
|
94
|
+
|Network-related errors
|
|
95
|
+
|
|
96
|
+
|`:resource`
|
|
97
|
+
|`Errno::ENOMEM`, `Errno::ENOSPC`
|
|
98
|
+
|Resource exhaustion errors
|
|
99
|
+
|
|
100
|
+
|`:system`
|
|
101
|
+
|`SystemCallError`, `SystemStackError`
|
|
102
|
+
|System-level errors
|
|
103
|
+
|
|
104
|
+
|`:business`
|
|
105
|
+
|Custom business logic errors
|
|
106
|
+
|Application-specific errors
|
|
107
|
+
|
|
108
|
+
|`:unknown`
|
|
109
|
+
|Other errors
|
|
110
|
+
|Uncategorized errors
|
|
111
|
+
|===
|
|
112
|
+
|
|
113
|
+
=== Error Severity Levels
|
|
114
|
+
|
|
115
|
+
Errors are assigned severity levels:
|
|
116
|
+
|
|
117
|
+
[cols="1,3"]
|
|
118
|
+
|===
|
|
119
|
+
|Severity |Description
|
|
120
|
+
|
|
121
|
+
|`:critical`
|
|
122
|
+
|System-breaking errors requiring immediate attention
|
|
123
|
+
|
|
124
|
+
|`:error`
|
|
125
|
+
|Standard errors that prevent operation completion
|
|
126
|
+
|
|
127
|
+
|`:warning`
|
|
128
|
+
|Non-fatal issues that may need investigation
|
|
129
|
+
|
|
130
|
+
|`:info`
|
|
131
|
+
|Informational messages
|
|
132
|
+
|===
|
|
133
|
+
|
|
134
|
+
=== Real-Time Error Handlers
|
|
135
|
+
|
|
136
|
+
Register callbacks to respond to errors as they occur:
|
|
137
|
+
|
|
138
|
+
==== Basic Handler
|
|
139
|
+
|
|
140
|
+
[source,ruby]
|
|
141
|
+
----
|
|
142
|
+
reporter.on_error do |work_result, job_name|
|
|
143
|
+
puts "Error in #{job_name}: #{work_result.error.message}"
|
|
144
|
+
end
|
|
145
|
+
----
|
|
146
|
+
|
|
147
|
+
==== Critical Error Alerts
|
|
148
|
+
|
|
149
|
+
[source,ruby]
|
|
150
|
+
----
|
|
151
|
+
reporter.on_error do |work_result, job_name|
|
|
152
|
+
if work_result.critical?
|
|
153
|
+
# Send alert to operations team
|
|
154
|
+
AlertService.notify(
|
|
155
|
+
severity: "critical",
|
|
156
|
+
job: job_name,
|
|
157
|
+
error: work_result.error.message,
|
|
158
|
+
context: work_result.error_context
|
|
159
|
+
)
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
----
|
|
163
|
+
|
|
164
|
+
==== Multiple Handlers
|
|
165
|
+
|
|
166
|
+
You can register multiple handlers for different purposes:
|
|
167
|
+
|
|
168
|
+
[source,ruby]
|
|
169
|
+
----
|
|
170
|
+
# Handler 1: Log all errors
|
|
171
|
+
reporter.on_error do |work_result, job_name|
|
|
172
|
+
Logger.error("Job #{job_name} failed: #{work_result.error.message}")
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Handler 2: Send metrics
|
|
176
|
+
reporter.on_error do |work_result, job_name|
|
|
177
|
+
Metrics.increment("errors.#{work_result.error_category}")
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Handler 3: Alert on critical errors
|
|
181
|
+
reporter.on_error do |work_result, job_name|
|
|
182
|
+
AlertService.notify(work_result) if work_result.critical?
|
|
183
|
+
end
|
|
184
|
+
----
|
|
185
|
+
|
|
186
|
+
=== Generating Reports
|
|
187
|
+
|
|
188
|
+
==== Formatted Text Report
|
|
189
|
+
|
|
190
|
+
Generate a human-readable report:
|
|
191
|
+
|
|
192
|
+
[source,ruby]
|
|
193
|
+
----
|
|
194
|
+
puts reporter.formatted_report
|
|
195
|
+
----
|
|
196
|
+
|
|
197
|
+
Output example:
|
|
198
|
+
|
|
199
|
+
[source]
|
|
200
|
+
----
|
|
201
|
+
================================================================================
|
|
202
|
+
ERROR REPORT
|
|
203
|
+
================================================================================
|
|
204
|
+
|
|
205
|
+
SUMMARY
|
|
206
|
+
--------------------------------------------------------------------------------
|
|
207
|
+
Uptime: 127.45s
|
|
208
|
+
Total Errors: 15
|
|
209
|
+
Total Successes: 85
|
|
210
|
+
Error Rate: 15.0%
|
|
211
|
+
|
|
212
|
+
Errors by Severity:
|
|
213
|
+
critical : 1
|
|
214
|
+
error : 12
|
|
215
|
+
warning : 2
|
|
216
|
+
|
|
217
|
+
TOP ERROR CATEGORIES
|
|
218
|
+
--------------------------------------------------------------------------------
|
|
219
|
+
network : 8 errors
|
|
220
|
+
validation : 5 errors
|
|
221
|
+
timeout : 2 errors
|
|
222
|
+
|
|
223
|
+
TOP ERROR JOBS
|
|
224
|
+
--------------------------------------------------------------------------------
|
|
225
|
+
fetch_data : 8 errors
|
|
226
|
+
process_data : 5 errors
|
|
227
|
+
validate_input : 2 errors
|
|
228
|
+
|
|
229
|
+
CRITICAL ERRORS
|
|
230
|
+
--------------------------------------------------------------------------------
|
|
231
|
+
Category: system
|
|
232
|
+
Count: 1
|
|
233
|
+
Recent errors:
|
|
234
|
+
- [2025-01-15 10:30:45] SystemStackError: Stack overflow
|
|
235
|
+
|
|
236
|
+
TRENDING ERRORS (Increasing)
|
|
237
|
+
--------------------------------------------------------------------------------
|
|
238
|
+
Category: network
|
|
239
|
+
Total Count: 8
|
|
240
|
+
Error Rate: 0.06/s
|
|
241
|
+
Trend: increasing
|
|
242
|
+
================================================================================
|
|
243
|
+
----
|
|
244
|
+
|
|
245
|
+
==== Programmatic Access
|
|
246
|
+
|
|
247
|
+
[source,ruby]
|
|
248
|
+
----
|
|
249
|
+
report = reporter.report
|
|
250
|
+
|
|
251
|
+
# Access specific sections
|
|
252
|
+
summary = report[:summary]
|
|
253
|
+
puts "Uptime: #{summary[:uptime]}s"
|
|
254
|
+
puts "Error Rate: #{summary[:error_rate]}%"
|
|
255
|
+
|
|
256
|
+
# Critical errors
|
|
257
|
+
report[:critical_errors].each do |error_info|
|
|
258
|
+
puts "Critical in #{error_info[:category]}: #{error_info[:count]} errors"
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Trending errors
|
|
262
|
+
report[:trending_errors].each do |trend|
|
|
263
|
+
puts "Trending: #{trend[:category]}"
|
|
264
|
+
end
|
|
265
|
+
----
|
|
266
|
+
|
|
267
|
+
=== Exporting Metrics
|
|
268
|
+
|
|
269
|
+
==== Prometheus Format
|
|
270
|
+
|
|
271
|
+
Export metrics for Prometheus monitoring:
|
|
272
|
+
|
|
273
|
+
[source,ruby]
|
|
274
|
+
----
|
|
275
|
+
# Write to file
|
|
276
|
+
File.write("metrics.txt", reporter.to_prometheus)
|
|
277
|
+
|
|
278
|
+
# Or serve via HTTP endpoint
|
|
279
|
+
get '/metrics' do
|
|
280
|
+
content_type 'text/plain'
|
|
281
|
+
reporter.to_prometheus
|
|
282
|
+
end
|
|
283
|
+
----
|
|
284
|
+
|
|
285
|
+
Example output:
|
|
286
|
+
|
|
287
|
+
[source]
|
|
288
|
+
----
|
|
289
|
+
# HELP fractor_errors_total Total number of errors
|
|
290
|
+
# TYPE fractor_errors_total counter
|
|
291
|
+
fractor_errors_total 15
|
|
292
|
+
|
|
293
|
+
# HELP fractor_successes_total Total number of successes
|
|
294
|
+
# TYPE fractor_successes_total counter
|
|
295
|
+
fractor_successes_total 85
|
|
296
|
+
|
|
297
|
+
# HELP fractor_error_rate Error rate percentage
|
|
298
|
+
# TYPE fractor_error_rate gauge
|
|
299
|
+
fractor_error_rate 15.0
|
|
300
|
+
|
|
301
|
+
# HELP fractor_errors_by_severity Errors by severity level
|
|
302
|
+
# TYPE fractor_errors_by_severity gauge
|
|
303
|
+
fractor_errors_by_severity{severity="critical"} 1
|
|
304
|
+
fractor_errors_by_severity{severity="error"} 12
|
|
305
|
+
fractor_errors_by_severity{severity="warning"} 2
|
|
306
|
+
|
|
307
|
+
# HELP fractor_errors_by_category Errors by category
|
|
308
|
+
# TYPE fractor_errors_by_category gauge
|
|
309
|
+
fractor_errors_by_category{category="network"} 8
|
|
310
|
+
fractor_errors_by_category{category="validation"} 5
|
|
311
|
+
fractor_errors_by_category{category="timeout"} 2
|
|
312
|
+
----
|
|
313
|
+
|
|
314
|
+
==== JSON Format
|
|
315
|
+
|
|
316
|
+
Export as JSON for programmatic consumption:
|
|
317
|
+
|
|
318
|
+
[source,ruby]
|
|
319
|
+
----
|
|
320
|
+
# Write to file
|
|
321
|
+
File.write("error_report.json", reporter.to_json)
|
|
322
|
+
|
|
323
|
+
# Or serve via API
|
|
324
|
+
get '/api/errors' do
|
|
325
|
+
content_type 'application/json'
|
|
326
|
+
reporter.to_json
|
|
327
|
+
end
|
|
328
|
+
----
|
|
329
|
+
|
|
330
|
+
=== Job-Specific Statistics
|
|
331
|
+
|
|
332
|
+
Get detailed statistics for a specific job:
|
|
333
|
+
|
|
334
|
+
[source,ruby]
|
|
335
|
+
----
|
|
336
|
+
stats = reporter.job_stats("fetch_data")
|
|
337
|
+
|
|
338
|
+
puts "Job: fetch_data"
|
|
339
|
+
puts "Total Errors: #{stats[:total_count]}"
|
|
340
|
+
puts "Error Rate: #{stats[:error_rate]}/s"
|
|
341
|
+
puts "Most Common Error: #{stats[:most_common_code]}"
|
|
342
|
+
puts "Highest Severity: #{stats[:highest_severity]}"
|
|
343
|
+
puts "Trend: #{stats[:trending]}"
|
|
344
|
+
----
|
|
345
|
+
|
|
346
|
+
=== Category-Specific Statistics
|
|
347
|
+
|
|
348
|
+
Get detailed statistics for an error category:
|
|
349
|
+
|
|
350
|
+
[source,ruby]
|
|
351
|
+
----
|
|
352
|
+
stats = reporter.category_stats(:network)
|
|
353
|
+
|
|
354
|
+
puts "Category: network"
|
|
355
|
+
puts "Total Count: #{stats[:total_count]}"
|
|
356
|
+
puts "Error Rate: #{stats[:error_rate]}/s"
|
|
357
|
+
puts "By Severity: #{stats[:by_severity]}"
|
|
358
|
+
puts "By Code: #{stats[:by_code]}"
|
|
359
|
+
puts "Trending: #{stats[:trending]}"
|
|
360
|
+
----
|
|
361
|
+
|
|
362
|
+
=== Detecting Trends
|
|
363
|
+
|
|
364
|
+
The ErrorReporter automatically detects increasing error rates:
|
|
365
|
+
|
|
366
|
+
[source,ruby]
|
|
367
|
+
----
|
|
368
|
+
# Get all trending errors
|
|
369
|
+
trending = reporter.trending_errors
|
|
370
|
+
|
|
371
|
+
trending.each do |trend|
|
|
372
|
+
category = trend[:category]
|
|
373
|
+
stats = trend[:stats]
|
|
374
|
+
|
|
375
|
+
puts "⚠️ #{category} errors are increasing!"
|
|
376
|
+
puts " Count: #{stats[:total_count]}"
|
|
377
|
+
puts " Rate: #{stats[:error_rate]}/s"
|
|
378
|
+
end
|
|
379
|
+
----
|
|
380
|
+
|
|
381
|
+
=== Integration Examples
|
|
382
|
+
|
|
383
|
+
==== With Supervisor
|
|
384
|
+
|
|
385
|
+
[source,ruby]
|
|
386
|
+
----
|
|
387
|
+
supervisor = Fractor::Supervisor.new(
|
|
388
|
+
worker_pools: [{ worker_class: MyWorker, count: 4 }]
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
reporter = Fractor::ErrorReporter.new
|
|
392
|
+
|
|
393
|
+
# Record results as they complete
|
|
394
|
+
supervisor.on_result do |result|
|
|
395
|
+
reporter.record(result)
|
|
396
|
+
end
|
|
397
|
+
----
|
|
398
|
+
|
|
399
|
+
==== With Workflows
|
|
400
|
+
|
|
401
|
+
[source,ruby]
|
|
402
|
+
----
|
|
403
|
+
class MyWorkflow < Fractor::Workflow
|
|
404
|
+
workflow "monitored-workflow" do
|
|
405
|
+
job "process" do
|
|
406
|
+
runs_with ProcessWorker
|
|
407
|
+
|
|
408
|
+
on_error do |error, context|
|
|
409
|
+
# Report error
|
|
410
|
+
reporter.record(
|
|
411
|
+
Fractor::WorkResult.new(
|
|
412
|
+
error: error,
|
|
413
|
+
error_context: context
|
|
414
|
+
),
|
|
415
|
+
job_name: "process"
|
|
416
|
+
)
|
|
417
|
+
end
|
|
418
|
+
end
|
|
419
|
+
end
|
|
420
|
+
end
|
|
421
|
+
----
|
|
422
|
+
|
|
423
|
+
==== Periodic Reporting
|
|
424
|
+
|
|
425
|
+
[source,ruby]
|
|
426
|
+
----
|
|
427
|
+
# Report every 5 minutes
|
|
428
|
+
Thread.new do
|
|
429
|
+
loop do
|
|
430
|
+
sleep 300 # 5 minutes
|
|
431
|
+
|
|
432
|
+
# Log summary
|
|
433
|
+
Logger.info("Error Summary: #{reporter.overall_error_rate}% error rate")
|
|
434
|
+
|
|
435
|
+
# Alert on high error rates
|
|
436
|
+
if reporter.overall_error_rate > 10.0
|
|
437
|
+
AlertService.notify("High error rate detected!")
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# Check for trending errors
|
|
441
|
+
reporter.trending_errors.each do |trend|
|
|
442
|
+
AlertService.notify("Trending: #{trend[:category]}")
|
|
443
|
+
end
|
|
444
|
+
end
|
|
445
|
+
end
|
|
446
|
+
----
|
|
447
|
+
|
|
448
|
+
=== Production Best Practices
|
|
449
|
+
|
|
450
|
+
==== 1. Set Up Monitoring
|
|
451
|
+
|
|
452
|
+
[source,ruby]
|
|
453
|
+
----
|
|
454
|
+
# Configure Prometheus scraping
|
|
455
|
+
# In config/prometheus.yml:
|
|
456
|
+
# scrape_configs:
|
|
457
|
+
# - job_name: 'fractor'
|
|
458
|
+
# static_configs:
|
|
459
|
+
# - targets: ['localhost:9090']
|
|
460
|
+
|
|
461
|
+
# Serve metrics endpoint
|
|
462
|
+
require 'sinatra'
|
|
463
|
+
|
|
464
|
+
get '/metrics' do
|
|
465
|
+
content_type 'text/plain'
|
|
466
|
+
$error_reporter.to_prometheus
|
|
467
|
+
end
|
|
468
|
+
----
|
|
469
|
+
|
|
470
|
+
==== 2. Configure Alerts
|
|
471
|
+
|
|
472
|
+
[source,ruby]
|
|
473
|
+
----
|
|
474
|
+
reporter.on_error do |work_result, job_name|
|
|
475
|
+
case work_result.error_severity
|
|
476
|
+
when :critical
|
|
477
|
+
PagerDuty.alert(work_result, job_name)
|
|
478
|
+
when :error
|
|
479
|
+
Slack.notify(work_result, job_name) if should_notify?(work_result)
|
|
480
|
+
when :warning
|
|
481
|
+
Logger.warn("#{job_name}: #{work_result.error.message}")
|
|
482
|
+
end
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
def should_notify?(work_result)
|
|
486
|
+
# Only notify for non-retriable errors or after multiple failures
|
|
487
|
+
!work_result.retriable? || failure_count(work_result) > 3
|
|
488
|
+
end
|
|
489
|
+
----
|
|
490
|
+
|
|
491
|
+
==== 3. Regular Health Checks
|
|
492
|
+
|
|
493
|
+
[source,ruby]
|
|
494
|
+
----
|
|
495
|
+
# Run health checks every minute
|
|
496
|
+
Thread.new do
|
|
497
|
+
loop do
|
|
498
|
+
sleep 60
|
|
499
|
+
|
|
500
|
+
# Check critical errors
|
|
501
|
+
critical = reporter.critical_errors
|
|
502
|
+
if critical.any?
|
|
503
|
+
PagerDuty.alert("Critical errors detected: #{critical.size}")
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
# Check error rate
|
|
507
|
+
if reporter.overall_error_rate > 25.0
|
|
508
|
+
Slack.notify("High error rate: #{reporter.overall_error_rate}%")
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
# Check trending
|
|
512
|
+
if reporter.trending_errors.any?
|
|
513
|
+
Slack.notify("Trending errors detected")
|
|
514
|
+
end
|
|
515
|
+
end
|
|
516
|
+
end
|
|
517
|
+
----
|
|
518
|
+
|
|
519
|
+
==== 4. Data Retention
|
|
520
|
+
|
|
521
|
+
[source,ruby]
|
|
522
|
+
----
|
|
523
|
+
# Reset statistics daily to prevent unbounded memory growth
|
|
524
|
+
Thread.new do
|
|
525
|
+
loop do
|
|
526
|
+
sleep 86400 # 24 hours
|
|
527
|
+
|
|
528
|
+
# Archive current stats
|
|
529
|
+
File.write(
|
|
530
|
+
"error_report_#{Date.today}.json",
|
|
531
|
+
reporter.to_json
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
# Reset for new day
|
|
535
|
+
reporter.reset
|
|
536
|
+
Logger.info("Error reporter statistics reset")
|
|
537
|
+
end
|
|
538
|
+
end
|
|
539
|
+
----
|
|
540
|
+
|
|
541
|
+
=== Advanced Features
|
|
542
|
+
|
|
543
|
+
==== Custom Error Categorization
|
|
544
|
+
|
|
545
|
+
Override the default categorization:
|
|
546
|
+
|
|
547
|
+
[source,ruby]
|
|
548
|
+
----
|
|
549
|
+
class MyCustomError < StandardError; end
|
|
550
|
+
|
|
551
|
+
# In your worker
|
|
552
|
+
def process(work)
|
|
553
|
+
raise MyCustomError, "Custom error"
|
|
554
|
+
rescue MyCustomError => e
|
|
555
|
+
Fractor::WorkResult.new(
|
|
556
|
+
error: e,
|
|
557
|
+
error_category: :business, # Custom category
|
|
558
|
+
error_code: :custom_failure,
|
|
559
|
+
error_severity: :error,
|
|
560
|
+
work: work
|
|
561
|
+
)
|
|
562
|
+
end
|
|
563
|
+
----
|
|
564
|
+
|
|
565
|
+
==== Error Context Enrichment
|
|
566
|
+
|
|
567
|
+
Add contextual information to errors:
|
|
568
|
+
|
|
569
|
+
[source,ruby]
|
|
570
|
+
----
|
|
571
|
+
def process(work)
|
|
572
|
+
start_time = Time.now
|
|
573
|
+
# ... processing ...
|
|
574
|
+
rescue => e
|
|
575
|
+
Fractor::WorkResult.new(
|
|
576
|
+
error: e,
|
|
577
|
+
error_context: {
|
|
578
|
+
duration: Time.now - start_time,
|
|
579
|
+
input_size: work.input.size,
|
|
580
|
+
memory_used: get_memory_usage,
|
|
581
|
+
retry_count: work.retry_count,
|
|
582
|
+
worker_id: Thread.current.object_id
|
|
583
|
+
},
|
|
584
|
+
work: work
|
|
585
|
+
)
|
|
586
|
+
end
|
|
587
|
+
----
|
|
588
|
+
|
|
589
|
+
==== Filtering and Analysis
|
|
590
|
+
|
|
591
|
+
[source,ruby]
|
|
592
|
+
----
|
|
593
|
+
# Get errors by specific criteria
|
|
594
|
+
report = reporter.report
|
|
595
|
+
|
|
596
|
+
# High-severity errors
|
|
597
|
+
high_severity = report[:category_breakdown].select do |category, stats|
|
|
598
|
+
stats[:highest_severity] == :critical ||
|
|
599
|
+
stats[:highest_severity] == :error
|
|
600
|
+
end
|
|
601
|
+
|
|
602
|
+
# Categories with high error rates
|
|
603
|
+
high_rate = report[:category_breakdown].select do |category, stats|
|
|
604
|
+
stats[:error_rate] > 1.0 # More than 1 error per second
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
# Recent spikes
|
|
608
|
+
recent_spikes = report[:trending_errors].select do |trend|
|
|
609
|
+
trend[:stats][:trending] == "increasing"
|
|
610
|
+
end
|
|
611
|
+
----
|
|
612
|
+
|
|
613
|
+
=== Troubleshooting
|
|
614
|
+
|
|
615
|
+
==== Memory Usage
|
|
616
|
+
|
|
617
|
+
The ErrorReporter keeps the last 100 errors per category. For high-volume applications:
|
|
618
|
+
|
|
619
|
+
[source,ruby]
|
|
620
|
+
----
|
|
621
|
+
# Reset periodically
|
|
622
|
+
reporter.reset
|
|
623
|
+
|
|
624
|
+
# Or implement custom cleanup
|
|
625
|
+
class CustomErrorReporter < Fractor::ErrorReporter
|
|
626
|
+
def record(work_result, job_name: nil)
|
|
627
|
+
super
|
|
628
|
+
cleanup_if_needed
|
|
629
|
+
end
|
|
630
|
+
|
|
631
|
+
private
|
|
632
|
+
|
|
633
|
+
def cleanup_if_needed
|
|
634
|
+
# Custom cleanup logic
|
|
635
|
+
if total_errors > 10_000
|
|
636
|
+
@mutex.synchronize do
|
|
637
|
+
# Keep only recent categories
|
|
638
|
+
@by_category.select! do |_, stats|
|
|
639
|
+
stats.recent_errors.any? { |e| e[:timestamp] > 1.hour.ago }
|
|
640
|
+
end
|
|
641
|
+
end
|
|
642
|
+
end
|
|
643
|
+
end
|
|
644
|
+
end
|
|
645
|
+
----
|
|
646
|
+
|
|
647
|
+
==== Thread Safety
|
|
648
|
+
|
|
649
|
+
All ErrorReporter operations are thread-safe. No additional synchronization needed:
|
|
650
|
+
|
|
651
|
+
[source,ruby]
|
|
652
|
+
----
|
|
653
|
+
# Safe to use from multiple threads
|
|
654
|
+
threads = 10.times.map do
|
|
655
|
+
Thread.new do
|
|
656
|
+
100.times do |i|
|
|
657
|
+
result = process_work(i)
|
|
658
|
+
reporter.record(result) # Thread-safe
|
|
659
|
+
end
|
|
660
|
+
end
|
|
661
|
+
end
|
|
662
|
+
|
|
663
|
+
threads.each(&:join)
|
|
664
|
+
----
|
|
665
|
+
|
|
666
|
+
=== See Also
|
|
667
|
+
|
|
668
|
+
* link:api[API Reference] - Complete API documentation
|
|
669
|
+
* link:../guides/core-concepts[Core Concepts] - Understanding WorkResult
|
|
670
|
+
* link:../guides/workflows[Workflows] - Workflow error handling
|