fractor 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +227 -102
  3. data/README.adoc +113 -1940
  4. data/docs/.lycheeignore +16 -0
  5. data/docs/Gemfile +24 -0
  6. data/docs/README.md +157 -0
  7. data/docs/_config.yml +151 -0
  8. data/docs/_features/error-handling.adoc +1192 -0
  9. data/docs/_features/index.adoc +80 -0
  10. data/docs/_features/monitoring.adoc +589 -0
  11. data/docs/_features/signal-handling.adoc +202 -0
  12. data/docs/_features/workflows.adoc +1235 -0
  13. data/docs/_guides/continuous-mode.adoc +736 -0
  14. data/docs/_guides/cookbook.adoc +1133 -0
  15. data/docs/_guides/index.adoc +55 -0
  16. data/docs/_guides/pipeline-mode.adoc +730 -0
  17. data/docs/_guides/troubleshooting.adoc +358 -0
  18. data/docs/_pages/architecture.adoc +1390 -0
  19. data/docs/_pages/core-concepts.adoc +1392 -0
  20. data/docs/_pages/design-principles.adoc +862 -0
  21. data/docs/_pages/getting-started.adoc +290 -0
  22. data/docs/_pages/installation.adoc +143 -0
  23. data/docs/_reference/api.adoc +1080 -0
  24. data/docs/_reference/error-reporting.adoc +670 -0
  25. data/docs/_reference/examples.adoc +181 -0
  26. data/docs/_reference/index.adoc +96 -0
  27. data/docs/_reference/troubleshooting.adoc +862 -0
  28. data/docs/_tutorials/complex-workflows.adoc +1022 -0
  29. data/docs/_tutorials/data-processing-pipeline.adoc +740 -0
  30. data/docs/_tutorials/first-application.adoc +384 -0
  31. data/docs/_tutorials/index.adoc +48 -0
  32. data/docs/_tutorials/long-running-services.adoc +931 -0
  33. data/docs/assets/images/favicon-16.png +0 -0
  34. data/docs/assets/images/favicon-32.png +0 -0
  35. data/docs/assets/images/favicon-48.png +0 -0
  36. data/docs/assets/images/favicon.ico +0 -0
  37. data/docs/assets/images/favicon.png +0 -0
  38. data/docs/assets/images/favicon.svg +45 -0
  39. data/docs/assets/images/fractor-icon.svg +49 -0
  40. data/docs/assets/images/fractor-logo.svg +61 -0
  41. data/docs/index.adoc +131 -0
  42. data/docs/lychee.toml +39 -0
  43. data/examples/api_aggregator/README.adoc +627 -0
  44. data/examples/api_aggregator/api_aggregator.rb +376 -0
  45. data/examples/auto_detection/README.adoc +407 -29
  46. data/examples/continuous_chat_common/message_protocol.rb +1 -1
  47. data/examples/error_reporting.rb +207 -0
  48. data/examples/file_processor/README.adoc +170 -0
  49. data/examples/file_processor/file_processor.rb +615 -0
  50. data/examples/file_processor/sample_files/invalid.csv +1 -0
  51. data/examples/file_processor/sample_files/orders.xml +24 -0
  52. data/examples/file_processor/sample_files/products.json +23 -0
  53. data/examples/file_processor/sample_files/users.csv +6 -0
  54. data/examples/hierarchical_hasher/README.adoc +629 -41
  55. data/examples/image_processor/README.adoc +610 -0
  56. data/examples/image_processor/image_processor.rb +349 -0
  57. data/examples/image_processor/processed_images/sample_10_processed.jpg.json +12 -0
  58. data/examples/image_processor/processed_images/sample_1_processed.jpg.json +12 -0
  59. data/examples/image_processor/processed_images/sample_2_processed.jpg.json +12 -0
  60. data/examples/image_processor/processed_images/sample_3_processed.jpg.json +12 -0
  61. data/examples/image_processor/processed_images/sample_4_processed.jpg.json +12 -0
  62. data/examples/image_processor/processed_images/sample_5_processed.jpg.json +12 -0
  63. data/examples/image_processor/processed_images/sample_6_processed.jpg.json +12 -0
  64. data/examples/image_processor/processed_images/sample_7_processed.jpg.json +12 -0
  65. data/examples/image_processor/processed_images/sample_8_processed.jpg.json +12 -0
  66. data/examples/image_processor/processed_images/sample_9_processed.jpg.json +12 -0
  67. data/examples/image_processor/test_images/sample_1.png +1 -0
  68. data/examples/image_processor/test_images/sample_10.png +1 -0
  69. data/examples/image_processor/test_images/sample_2.png +1 -0
  70. data/examples/image_processor/test_images/sample_3.png +1 -0
  71. data/examples/image_processor/test_images/sample_4.png +1 -0
  72. data/examples/image_processor/test_images/sample_5.png +1 -0
  73. data/examples/image_processor/test_images/sample_6.png +1 -0
  74. data/examples/image_processor/test_images/sample_7.png +1 -0
  75. data/examples/image_processor/test_images/sample_8.png +1 -0
  76. data/examples/image_processor/test_images/sample_9.png +1 -0
  77. data/examples/log_analyzer/README.adoc +662 -0
  78. data/examples/log_analyzer/log_analyzer.rb +579 -0
  79. data/examples/log_analyzer/sample_logs/apache.log +20 -0
  80. data/examples/log_analyzer/sample_logs/json.log +15 -0
  81. data/examples/log_analyzer/sample_logs/nginx.log +15 -0
  82. data/examples/log_analyzer/sample_logs/rails.log +29 -0
  83. data/examples/multi_work_type/README.adoc +576 -26
  84. data/examples/performance_monitoring.rb +120 -0
  85. data/examples/pipeline_processing/README.adoc +740 -26
  86. data/examples/pipeline_processing/pipeline_processing.rb +2 -2
  87. data/examples/priority_work_example.rb +155 -0
  88. data/examples/producer_subscriber/README.adoc +889 -46
  89. data/examples/scatter_gather/README.adoc +829 -27
  90. data/examples/simple/README.adoc +347 -0
  91. data/examples/specialized_workers/README.adoc +622 -26
  92. data/examples/specialized_workers/specialized_workers.rb +44 -8
  93. data/examples/stream_processor/README.adoc +206 -0
  94. data/examples/stream_processor/stream_processor.rb +284 -0
  95. data/examples/web_scraper/README.adoc +625 -0
  96. data/examples/web_scraper/web_scraper.rb +285 -0
  97. data/examples/workflow/README.adoc +406 -0
  98. data/examples/workflow/circuit_breaker/README.adoc +360 -0
  99. data/examples/workflow/circuit_breaker/circuit_breaker_workflow.rb +225 -0
  100. data/examples/workflow/conditional/README.adoc +483 -0
  101. data/examples/workflow/conditional/conditional_workflow.rb +215 -0
  102. data/examples/workflow/dead_letter_queue/README.adoc +374 -0
  103. data/examples/workflow/dead_letter_queue/dead_letter_queue_workflow.rb +217 -0
  104. data/examples/workflow/fan_out/README.adoc +381 -0
  105. data/examples/workflow/fan_out/fan_out_workflow.rb +202 -0
  106. data/examples/workflow/retry/README.adoc +248 -0
  107. data/examples/workflow/retry/retry_workflow.rb +195 -0
  108. data/examples/workflow/simple_linear/README.adoc +267 -0
  109. data/examples/workflow/simple_linear/simple_linear_workflow.rb +175 -0
  110. data/examples/workflow/simplified/README.adoc +329 -0
  111. data/examples/workflow/simplified/simplified_workflow.rb +222 -0
  112. data/exe/fractor +10 -0
  113. data/lib/fractor/cli.rb +288 -0
  114. data/lib/fractor/configuration.rb +307 -0
  115. data/lib/fractor/continuous_server.rb +60 -65
  116. data/lib/fractor/error_formatter.rb +72 -0
  117. data/lib/fractor/error_report_generator.rb +152 -0
  118. data/lib/fractor/error_reporter.rb +244 -0
  119. data/lib/fractor/error_statistics.rb +147 -0
  120. data/lib/fractor/execution_tracer.rb +162 -0
  121. data/lib/fractor/logger.rb +230 -0
  122. data/lib/fractor/main_loop_handler.rb +406 -0
  123. data/lib/fractor/main_loop_handler3.rb +135 -0
  124. data/lib/fractor/main_loop_handler4.rb +299 -0
  125. data/lib/fractor/performance_metrics_collector.rb +181 -0
  126. data/lib/fractor/performance_monitor.rb +215 -0
  127. data/lib/fractor/performance_report_generator.rb +202 -0
  128. data/lib/fractor/priority_work.rb +93 -0
  129. data/lib/fractor/priority_work_queue.rb +189 -0
  130. data/lib/fractor/result_aggregator.rb +32 -0
  131. data/lib/fractor/shutdown_handler.rb +168 -0
  132. data/lib/fractor/signal_handler.rb +80 -0
  133. data/lib/fractor/supervisor.rb +382 -269
  134. data/lib/fractor/supervisor_logger.rb +88 -0
  135. data/lib/fractor/version.rb +1 -1
  136. data/lib/fractor/work.rb +12 -0
  137. data/lib/fractor/work_distribution_manager.rb +151 -0
  138. data/lib/fractor/work_queue.rb +20 -0
  139. data/lib/fractor/work_result.rb +181 -9
  140. data/lib/fractor/worker.rb +73 -0
  141. data/lib/fractor/workflow/builder.rb +210 -0
  142. data/lib/fractor/workflow/chain_builder.rb +169 -0
  143. data/lib/fractor/workflow/circuit_breaker.rb +183 -0
  144. data/lib/fractor/workflow/circuit_breaker_orchestrator.rb +208 -0
  145. data/lib/fractor/workflow/circuit_breaker_registry.rb +112 -0
  146. data/lib/fractor/workflow/dead_letter_queue.rb +334 -0
  147. data/lib/fractor/workflow/execution_hooks.rb +39 -0
  148. data/lib/fractor/workflow/execution_strategy.rb +225 -0
  149. data/lib/fractor/workflow/execution_trace.rb +134 -0
  150. data/lib/fractor/workflow/helpers.rb +191 -0
  151. data/lib/fractor/workflow/job.rb +290 -0
  152. data/lib/fractor/workflow/job_dependency_validator.rb +120 -0
  153. data/lib/fractor/workflow/logger.rb +110 -0
  154. data/lib/fractor/workflow/pre_execution_context.rb +193 -0
  155. data/lib/fractor/workflow/retry_config.rb +156 -0
  156. data/lib/fractor/workflow/retry_orchestrator.rb +184 -0
  157. data/lib/fractor/workflow/retry_strategy.rb +93 -0
  158. data/lib/fractor/workflow/structured_logger.rb +30 -0
  159. data/lib/fractor/workflow/type_compatibility_validator.rb +222 -0
  160. data/lib/fractor/workflow/visualizer.rb +211 -0
  161. data/lib/fractor/workflow/workflow_context.rb +132 -0
  162. data/lib/fractor/workflow/workflow_executor.rb +669 -0
  163. data/lib/fractor/workflow/workflow_result.rb +55 -0
  164. data/lib/fractor/workflow/workflow_validator.rb +295 -0
  165. data/lib/fractor/workflow.rb +333 -0
  166. data/lib/fractor/wrapped_ractor.rb +66 -101
  167. data/lib/fractor/wrapped_ractor3.rb +161 -0
  168. data/lib/fractor/wrapped_ractor4.rb +242 -0
  169. data/lib/fractor.rb +92 -4
  170. metadata +179 -6
  171. data/tests/sample.rb.bak +0 -309
  172. data/tests/sample_working.rb.bak +0 -209
@@ -0,0 +1,579 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "../../lib/fractor"
5
+ require "zlib"
6
+ require "zip"
7
+ require "json"
8
+ require "time"
9
+ require "benchmark"
10
+
11
+ # Work item for log chunk processing
12
+ class LogWork < Fractor::Work
13
+ attr_reader :file_path, :chunk_start, :chunk_size, :format
14
+
15
+ def initialize(file_path:, chunk_start:, chunk_size:, format: :auto)
16
+ @file_path = file_path
17
+ @chunk_start = chunk_start
18
+ @chunk_size = chunk_size
19
+ @format = format
20
+ # Pass a hash as input to satisfy Fractor::Work's requirement
21
+ super({ file_path: file_path, chunk_start: chunk_start, chunk_size: chunk_size, format: format })
22
+ end
23
+
24
+ def to_s
25
+ "LogWork(#{File.basename(file_path)}, #{chunk_start}..#{chunk_start + chunk_size})"
26
+ end
27
+ end
28
+
29
+ # Worker for processing log chunks
30
+ class LogAnalyzerWorker < Fractor::Worker
31
+ def process(work)
32
+ return nil unless work.is_a?(LogWork)
33
+
34
+ lines = read_chunk(work.file_path, work.chunk_start, work.chunk_size)
35
+ format = detect_format(lines, work.format)
36
+
37
+ stats = {
38
+ file: File.basename(work.file_path),
39
+ chunk_start: work.chunk_start,
40
+ chunk_size: work.chunk_size,
41
+ lines_processed: 0,
42
+ errors: 0,
43
+ warnings: 0,
44
+ info: 0,
45
+ debug: 0,
46
+ error_messages: [],
47
+ warning_messages: [],
48
+ timestamps: [],
49
+ status_codes: Hash.new(0),
50
+ response_times: [],
51
+ unique_ips: Set.new,
52
+ format: format
53
+ }
54
+
55
+ lines.each do |line|
56
+ next if line.strip.empty?
57
+
58
+ stats[:lines_processed] += 1
59
+ parse_line(line, format, stats)
60
+ end
61
+
62
+ # Convert Set to Array for serialization
63
+ stats[:unique_ips] = stats[:unique_ips].to_a
64
+ stats
65
+ end
66
+
67
+ private
68
+
69
+ def read_chunk(file_path, start, size)
70
+ if file_path.end_with?(".gz")
71
+ read_gzip_chunk(file_path, start, size)
72
+ elsif file_path.end_with?(".zip")
73
+ read_zip_chunk(file_path, start, size)
74
+ else
75
+ read_plain_chunk(file_path, start, size)
76
+ end
77
+ end
78
+
79
+ def read_plain_chunk(file_path, start, size)
80
+ lines = []
81
+ File.open(file_path, "r") do |f|
82
+ f.seek(start)
83
+ content = f.read(size)
84
+ return [] unless content
85
+
86
+ # Split on newlines without using global $/
87
+ lines = content.split("\n")
88
+ # Add back the newline to each line except potentially the last
89
+ lines = lines.map { |line| line + "\n" }
90
+ end
91
+ lines
92
+ rescue EOFError, Errno::EINVAL
93
+ lines
94
+ end
95
+
96
+ def read_gzip_chunk(file_path, start, size)
97
+ lines = []
98
+ Zlib::GzipReader.open(file_path) do |gz|
99
+ # For gzip, read the entire content and split
100
+ content = gz.read
101
+ all_lines = content.split("\n").map { |line| line + "\n" }
102
+
103
+ # Calculate which lines fall in our chunk range
104
+ current_pos = 0
105
+ start_line = 0
106
+ all_lines.each_with_index do |line, idx|
107
+ if current_pos >= start
108
+ start_line = idx
109
+ break
110
+ end
111
+ current_pos += line.bytesize
112
+ end
113
+
114
+ # Collect lines until we reach size limit
115
+ read_size = 0
116
+ all_lines[start_line..-1].each do |line|
117
+ break if read_size >= size
118
+ lines << line
119
+ read_size += line.bytesize
120
+ end
121
+ end
122
+ lines
123
+ rescue EOFError, Zlib::GzipFile::Error
124
+ lines
125
+ end
126
+
127
+ def read_zip_chunk(file_path, start, size)
128
+ lines = []
129
+ Zip::File.open(file_path) do |zip_file|
130
+ # Process first entry only
131
+ entry = zip_file.entries.first
132
+ next unless entry
133
+
134
+ content = entry.get_input_stream.read
135
+ lines = content.lines[start / 100, size / 100] || []
136
+ end
137
+ lines
138
+ rescue Zip::Error
139
+ lines
140
+ end
141
+
142
+ def detect_format(lines, requested_format)
143
+ return requested_format unless requested_format == :auto
144
+
145
+ sample = lines.first(5).join("\n")
146
+
147
+ if sample.match?(/^\{/)
148
+ :json
149
+ elsif sample.match?(/^\d+\.\d+\.\d+\.\d+ - - \[/)
150
+ :apache
151
+ elsif sample.match?(/\[.*\] "(GET|POST|PUT|DELETE|PATCH)/)
152
+ :nginx
153
+ elsif sample.match?(/\] (ERROR|WARN|INFO|DEBUG|FATAL) -- /)
154
+ :rails
155
+ else
156
+ :generic
157
+ end
158
+ end
159
+
160
+ def parse_line(line, format, stats)
161
+ case format
162
+ when :apache
163
+ parse_apache_line(line, stats)
164
+ when :nginx
165
+ parse_nginx_line(line, stats)
166
+ when :rails
167
+ parse_rails_line(line, stats)
168
+ when :json
169
+ parse_json_line(line, stats)
170
+ else
171
+ parse_generic_line(line, stats)
172
+ end
173
+ end
174
+
175
+ def parse_apache_line(line, stats)
176
+ # Apache format: 127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET /index.html HTTP/1.0" 200 2326
177
+ if line =~ /^(\S+) \S+ \S+ \[(.*?)\] "(\S+) (\S+) (\S+)" (\d+) (\d+)/
178
+ ip = Regexp.last_match(1)
179
+ timestamp = Regexp.last_match(2)
180
+ method = Regexp.last_match(3)
181
+ path = Regexp.last_match(4)
182
+ status = Regexp.last_match(6).to_i
183
+ bytes = Regexp.last_match(7).to_i
184
+
185
+ stats[:unique_ips] << ip
186
+ stats[:status_codes][status] += 1
187
+ stats[:timestamps] << timestamp
188
+
189
+ if status >= 500
190
+ stats[:errors] += 1
191
+ stats[:error_messages] << "#{method} #{path} - Status #{status}"
192
+ elsif status >= 400
193
+ stats[:warnings] += 1
194
+ stats[:warning_messages] << "#{method} #{path} - Status #{status}"
195
+ else
196
+ stats[:info] += 1
197
+ end
198
+ end
199
+ end
200
+
201
+ def parse_nginx_line(line, stats)
202
+ # Nginx format: 192.168.1.1 [10/Oct/2000:13:55:36 +0000] "GET /api/users HTTP/1.1" 200 1234 0.123
203
+ if line =~ /^(\S+) \[(.*?)\] "(\S+) (\S+) (\S+)" (\d+) (\d+)(?: (\d+\.\d+))?/
204
+ ip = Regexp.last_match(1)
205
+ timestamp = Regexp.last_match(2)
206
+ method = Regexp.last_match(3)
207
+ path = Regexp.last_match(4)
208
+ status = Regexp.last_match(6).to_i
209
+ bytes = Regexp.last_match(7).to_i
210
+ response_time = Regexp.last_match(8)&.to_f
211
+
212
+ stats[:unique_ips] << ip
213
+ stats[:status_codes][status] += 1
214
+ stats[:timestamps] << timestamp
215
+ stats[:response_times] << response_time if response_time
216
+
217
+ if status >= 500
218
+ stats[:errors] += 1
219
+ stats[:error_messages] << "#{method} #{path} - Status #{status}"
220
+ elsif status >= 400
221
+ stats[:warnings] += 1
222
+ stats[:warning_messages] << "#{method} #{path} - Status #{status}"
223
+ else
224
+ stats[:info] += 1
225
+ end
226
+ end
227
+ end
228
+
229
+ def parse_rails_line(line, stats)
230
+ # Rails format: [2024-10-25 12:00:00] ERROR -- : Failed to connect to database
231
+ if line =~ /\[(.*?)\] (ERROR|WARN|INFO|DEBUG|FATAL)/
232
+ timestamp = Regexp.last_match(1)
233
+ level = Regexp.last_match(2)
234
+
235
+ stats[:timestamps] << timestamp
236
+
237
+ case level
238
+ when "ERROR", "FATAL"
239
+ stats[:errors] += 1
240
+ stats[:error_messages] << line.strip
241
+ when "WARN"
242
+ stats[:warnings] += 1
243
+ stats[:warning_messages] << line.strip
244
+ when "INFO"
245
+ stats[:info] += 1
246
+ when "DEBUG"
247
+ stats[:debug] += 1
248
+ end
249
+ end
250
+ end
251
+
252
+ def parse_json_line(line, stats)
253
+ begin
254
+ data = JSON.parse(line)
255
+ level = data["level"] || data["severity"] || "INFO"
256
+ timestamp = data["timestamp"] || data["time"]
257
+ message = data["message"] || data["msg"]
258
+
259
+ stats[:timestamps] << timestamp if timestamp
260
+
261
+ case level.upcase
262
+ when "ERROR", "FATAL"
263
+ stats[:errors] += 1
264
+ stats[:error_messages] << message if message
265
+ when "WARN", "WARNING"
266
+ stats[:warnings] += 1
267
+ stats[:warning_messages] << message if message
268
+ when "INFO"
269
+ stats[:info] += 1
270
+ when "DEBUG"
271
+ stats[:debug] += 1
272
+ end
273
+
274
+ if data["status_code"]
275
+ stats[:status_codes][data["status_code"]] += 1
276
+ end
277
+
278
+ if data["response_time"]
279
+ stats[:response_times] << data["response_time"]
280
+ end
281
+
282
+ if data["ip"] || data["remote_addr"]
283
+ stats[:unique_ips] << (data["ip"] || data["remote_addr"])
284
+ end
285
+ rescue JSON::ParserError
286
+ # Treat as generic line if JSON parsing fails
287
+ parse_generic_line(line, stats)
288
+ end
289
+ end
290
+
291
+ def parse_generic_line(line, stats)
292
+ if line =~ /error|fail|exception/i
293
+ stats[:errors] += 1
294
+ stats[:error_messages] << line.strip
295
+ elsif line =~ /warn|warning/i
296
+ stats[:warnings] += 1
297
+ stats[:warning_messages] << line.strip
298
+ else
299
+ stats[:info] += 1
300
+ end
301
+ end
302
+ end
303
+
304
+ # Log analyzer that processes files in parallel
305
+ class LogAnalyzer
306
+ attr_reader :num_workers, :chunk_size
307
+
308
+ def initialize(num_workers: 4, chunk_size: 1024 * 1024)
309
+ @num_workers = num_workers
310
+ @chunk_size = chunk_size
311
+ end
312
+
313
+ def analyze(file_paths, format: :auto)
314
+ work_items = []
315
+
316
+ file_paths.each do |file_path|
317
+ unless File.exist?(file_path)
318
+ warn "File not found: #{file_path}"
319
+ next
320
+ end
321
+
322
+ file_size = File.size(file_path)
323
+ num_chunks = (file_size.to_f / chunk_size).ceil
324
+
325
+ num_chunks.times do |i|
326
+ chunk_start = i * chunk_size
327
+ current_chunk_size = [chunk_size, file_size - chunk_start].min
328
+
329
+ work_items << LogWork.new(
330
+ file_path: file_path,
331
+ chunk_start: chunk_start,
332
+ chunk_size: current_chunk_size,
333
+ format: format
334
+ )
335
+ end
336
+ end
337
+
338
+ puts "Processing #{work_items.size} chunks from #{file_paths.size} file(s)..."
339
+
340
+ time = Benchmark.realtime do
341
+ supervisor = Fractor::Supervisor.new(
342
+ worker_pools: [
343
+ { worker_class: LogAnalyzerWorker, num_workers: num_workers }
344
+ ]
345
+ )
346
+
347
+ supervisor.add_work_items(work_items)
348
+ supervisor.run
349
+
350
+ results = supervisor.results
351
+ @results = results.results + results.errors
352
+ end
353
+
354
+ aggregate_results(@results, time)
355
+ end
356
+
357
+ private
358
+
359
+ def aggregate_results(results, processing_time)
360
+ aggregated = {
361
+ total_lines: 0,
362
+ total_errors: 0,
363
+ total_warnings: 0,
364
+ total_info: 0,
365
+ total_debug: 0,
366
+ error_messages: [],
367
+ warning_messages: [],
368
+ status_codes: Hash.new(0),
369
+ response_times: [],
370
+ unique_ips: Set.new,
371
+ formats_detected: Hash.new(0),
372
+ processing_time: processing_time,
373
+ chunks_processed: 0
374
+ }
375
+
376
+ results.each do |work_result|
377
+ next unless work_result
378
+
379
+ # Extract actual result from WorkResult object
380
+ result = work_result.respond_to?(:result) ? work_result.result : work_result
381
+
382
+ next unless result.is_a?(Hash)
383
+
384
+ aggregated[:chunks_processed] += 1
385
+ aggregated[:total_lines] += result[:lines_processed] || 0
386
+ aggregated[:total_errors] += result[:errors] || 0
387
+ aggregated[:total_warnings] += result[:warnings] || 0
388
+ aggregated[:total_info] += result[:info] || 0
389
+ aggregated[:total_debug] += result[:debug] || 0
390
+ aggregated[:error_messages].concat(result[:error_messages] || [])
391
+ aggregated[:warning_messages].concat(result[:warning_messages] || [])
392
+ aggregated[:formats_detected][result[:format]] += 1 if result[:format]
393
+
394
+ if result[:status_codes]
395
+ result[:status_codes].each do |code, count|
396
+ aggregated[:status_codes][code] += count
397
+ end
398
+ end
399
+
400
+ aggregated[:response_times].concat(result[:response_times] || [])
401
+
402
+ if result[:unique_ips]
403
+ aggregated[:unique_ips].merge(result[:unique_ips])
404
+ end
405
+ end
406
+
407
+ # Limit message arrays to avoid excessive memory usage
408
+ aggregated[:error_messages] = aggregated[:error_messages].first(100)
409
+ aggregated[:warning_messages] = aggregated[:warning_messages].first(100)
410
+
411
+ aggregated
412
+ end
413
+ end
414
+
415
+ # Report generator
416
+ class LogReport
417
+ def self.generate(stats, output_file = nil)
418
+ report = build_report(stats)
419
+
420
+ if output_file
421
+ File.write(output_file, report)
422
+ puts "Report saved to #{output_file}"
423
+ else
424
+ puts report
425
+ end
426
+
427
+ report
428
+ end
429
+
430
+ def self.build_report(stats)
431
+ lines = []
432
+ lines << "=" * 80
433
+ lines << "LOG ANALYSIS REPORT"
434
+ lines << "=" * 80
435
+ lines << ""
436
+
437
+ # Summary
438
+ lines << "SUMMARY"
439
+ lines << "-" * 80
440
+ lines << format("Total lines processed: %d", stats[:total_lines])
441
+ lines << format("Processing time: %.2f seconds", stats[:processing_time])
442
+ lines << format("Lines per second: %.0f", stats[:total_lines] / stats[:processing_time])
443
+ lines << format("Chunks processed: %d", stats[:chunks_processed])
444
+ lines << ""
445
+
446
+ # Log levels
447
+ lines << "LOG LEVELS"
448
+ lines << "-" * 80
449
+ lines << format("Errors: %d (%.1f%%)", stats[:total_errors], percentage(stats[:total_errors], stats[:total_lines]))
450
+ lines << format("Warnings: %d (%.1f%%)", stats[:total_warnings], percentage(stats[:total_warnings], stats[:total_lines]))
451
+ lines << format("Info: %d (%.1f%%)", stats[:total_info], percentage(stats[:total_info], stats[:total_lines]))
452
+ lines << format("Debug: %d (%.1f%%)", stats[:total_debug], percentage(stats[:total_debug], stats[:total_lines]))
453
+ lines << ""
454
+
455
+ # HTTP Status codes
456
+ unless stats[:status_codes].empty?
457
+ lines << "HTTP STATUS CODES"
458
+ lines << "-" * 80
459
+ stats[:status_codes].sort.each do |code, count|
460
+ lines << format(" %d: %d requests", code, count)
461
+ end
462
+ lines << ""
463
+ end
464
+
465
+ # Response times
466
+ unless stats[:response_times].empty?
467
+ lines << "RESPONSE TIMES"
468
+ lines << "-" * 80
469
+ lines << format(" Average: %.3f seconds", average(stats[:response_times]))
470
+ lines << format(" Min: %.3f seconds", stats[:response_times].min)
471
+ lines << format(" Max: %.3f seconds", stats[:response_times].max)
472
+ lines << ""
473
+ end
474
+
475
+ # Unique IPs
476
+ unless stats[:unique_ips].empty?
477
+ lines << "NETWORK"
478
+ lines << "-" * 80
479
+ lines << format("Unique IP addresses: %d", stats[:unique_ips].size)
480
+ lines << ""
481
+ end
482
+
483
+ # Formats detected
484
+ lines << "LOG FORMATS DETECTED"
485
+ lines << "-" * 80
486
+ stats[:formats_detected].each do |format, count|
487
+ lines << format(" %s: %d chunks", format, count)
488
+ end
489
+ lines << ""
490
+
491
+ # Top errors
492
+ unless stats[:error_messages].empty?
493
+ lines << "TOP ERRORS (up to 10)"
494
+ lines << "-" * 80
495
+ stats[:error_messages].first(10).each_with_index do |msg, i|
496
+ lines << format("%2d. %s", i + 1, msg[0, 100])
497
+ end
498
+ lines << ""
499
+ end
500
+
501
+ # Top warnings
502
+ unless stats[:warning_messages].empty?
503
+ lines << "TOP WARNINGS (up to 10)"
504
+ lines << "-" * 80
505
+ stats[:warning_messages].first(10).each_with_index do |msg, i|
506
+ lines << format("%2d. %s", i + 1, msg[0, 100])
507
+ end
508
+ lines << ""
509
+ end
510
+
511
+ lines << "=" * 80
512
+
513
+ lines.join("\n")
514
+ end
515
+
516
+ def self.percentage(part, total)
517
+ return 0.0 if total.zero?
518
+
519
+ (part.to_f / total * 100)
520
+ end
521
+
522
+ def self.average(numbers)
523
+ return 0.0 if numbers.empty?
524
+
525
+ numbers.sum.to_f / numbers.size
526
+ end
527
+ end
528
+
529
+ # Run example if executed directly
530
+ if __FILE__ == $PROGRAM_NAME
531
+ require "optparse"
532
+
533
+ options = {
534
+ workers: 4,
535
+ chunk_size: 1024 * 1024,
536
+ format: :auto,
537
+ output: nil
538
+ }
539
+
540
+ OptionParser.new do |opts|
541
+ opts.banner = "Usage: log_analyzer.rb [options] FILE..."
542
+
543
+ opts.on("-w", "--workers NUM", Integer, "Number of worker ractors (default: 4)") do |n|
544
+ options[:workers] = n
545
+ end
546
+
547
+ opts.on("-c", "--chunk-size SIZE", Integer, "Chunk size in bytes (default: 1MB)") do |s|
548
+ options[:chunk_size] = s
549
+ end
550
+
551
+ opts.on("-f", "--format FORMAT", [:auto, :apache, :nginx, :rails, :json, :generic],
552
+ "Log format (auto, apache, nginx, rails, json, generic)") do |f|
553
+ options[:format] = f
554
+ end
555
+
556
+ opts.on("-o", "--output FILE", "Output report file") do |f|
557
+ options[:output] = f
558
+ end
559
+
560
+ opts.on("-h", "--help", "Show this message") do
561
+ puts opts
562
+ exit
563
+ end
564
+ end.parse!
565
+
566
+ if ARGV.empty?
567
+ puts "Error: No log files specified"
568
+ puts "Usage: log_analyzer.rb [options] FILE..."
569
+ exit 1
570
+ end
571
+
572
+ analyzer = LogAnalyzer.new(
573
+ num_workers: options[:workers],
574
+ chunk_size: options[:chunk_size]
575
+ )
576
+
577
+ stats = analyzer.analyze(ARGV, format: options[:format])
578
+ LogReport.generate(stats, options[:output])
579
+ end
@@ -0,0 +1,20 @@
1
+ 127.0.0.1 - - [25/Oct/2024:13:55:36 +0800] "GET /index.html HTTP/1.1" 200 2326
2
+ 192.168.1.100 - - [25/Oct/2024:13:55:37 +0800] "POST /api/users HTTP/1.1" 201 512
3
+ 10.0.0.50 - - [25/Oct/2024:13:55:38 +0800] "GET /images/logo.png HTTP/1.1" 304 0
4
+ 172.16.0.25 - - [25/Oct/2024:13:55:39 +0800] "GET /api/products HTTP/1.1" 200 4096
5
+ 127.0.0.1 - - [25/Oct/2024:13:55:40 +0800] "DELETE /api/users/123 HTTP/1.1" 404 256
6
+ 192.168.1.100 - - [25/Oct/2024:13:55:41 +0800] "GET /admin/dashboard HTTP/1.1" 403 128
7
+ 10.0.0.50 - - [25/Oct/2024:13:55:42 +0800] "POST /api/orders HTTP/1.1" 500 1024
8
+ 172.16.0.25 - - [25/Oct/2024:13:55:43 +0800] "GET /api/cart HTTP/1.1" 200 2048
9
+ 127.0.0.1 - - [25/Oct/2024:13:55:44 +0800] "PUT /api/users/456 HTTP/1.1" 200 768
10
+ 192.168.1.100 - - [25/Oct/2024:13:55:45 +0800] "GET /static/style.css HTTP/1.1" 200 15360
11
+ 10.0.0.50 - - [25/Oct/2024:13:55:46 +0800] "GET /api/search?q=laptop HTTP/1.1" 200 8192
12
+ 172.16.0.25 - - [25/Oct/2024:13:55:47 +0800] "POST /api/login HTTP/1.1" 401 64
13
+ 127.0.0.1 - - [25/Oct/2024:13:55:48 +0800] "GET /api/profile HTTP/1.1" 200 1536
14
+ 192.168.1.100 - - [25/Oct/2024:13:55:49 +0800] "GET /docs/api.html HTTP/1.1" 200 32768
15
+ 10.0.0.50 - - [25/Oct/2024:13:55:50 +0800] "DELETE /api/sessions/789 HTTP/1.1" 204 0
16
+ 172.16.0.25 - - [25/Oct/2024:13:55:51 +0800] "GET /health HTTP/1.1" 200 16
17
+ 127.0.0.1 - - [25/Oct/2024:13:55:52 +0800] "POST /api/comments HTTP/1.1" 503 256
18
+ 192.168.1.100 - - [25/Oct/2024:13:55:53 +0800] "GET /api/notifications HTTP/1.1" 200 4096
19
+ 10.0.0.50 - - [25/Oct/2024:13:55:54 +0800] "PATCH /api/settings HTTP/1.1" 200 512
20
+ 172.16.0.25 - - [25/Oct/2024:13:55:55 +0800] "GET /api/stats HTTP/1.1" 500 2048
@@ -0,0 +1,15 @@
1
+ {"timestamp":"2024-10-25T13:55:36+08:00","level":"INFO","message":"Application started","service":"api-server","version":"1.0.0"}
2
+ {"timestamp":"2024-10-25T13:55:37+08:00","level":"INFO","message":"Request received","method":"GET","path":"/api/users","ip":"192.168.1.1","status_code":200,"response_time":0.045}
3
+ {"timestamp":"2024-10-25T13:55:38+08:00","level":"ERROR","message":"Database connection failed","error":"ConnectionRefused","service":"api-server","retry_count":3}
4
+ {"timestamp":"2024-10-25T13:55:39+08:00","level":"WARN","message":"High memory usage detected","memory_mb":1024,"threshold_mb":800}
5
+ {"timestamp":"2024-10-25T13:55:40+08:00","level":"INFO","message":"Request received","method":"POST","path":"/api/orders","ip":"192.168.1.2","status_code":201,"response_time":0.156}
6
+ {"timestamp":"2024-10-25T13:55:41+08:00","level":"DEBUG","message":"Cache miss","key":"user:123","service":"cache"}
7
+ {"timestamp":"2024-10-25T13:55:42+08:00","level":"ERROR","message":"Payment processing failed","error":"InvalidCard","transaction_id":"txn_abc123","amount":99.99}
8
+ {"timestamp":"2024-10-25T13:55:43+08:00","level":"INFO","message":"Request received","method":"GET","path":"/api/products","ip":"192.168.1.3","status_code":200,"response_time":0.234}
9
+ {"timestamp":"2024-10-25T13:55:44+08:00","level":"WARN","message":"Rate limit approaching","ip":"192.168.1.1","requests":95,"limit":100}
10
+ {"timestamp":"2024-10-25T13:55:45+08:00","level":"INFO","message":"Background job started","job":"SendEmailJob","queue":"mailers"}
11
+ {"timestamp":"2024-10-25T13:55:46+08:00","level":"ERROR","message":"External API timeout","url":"https://api.external.com/data","timeout_seconds":30}
12
+ {"timestamp":"2024-10-25T13:55:47+08:00","level":"INFO","message":"Request received","method":"DELETE","path":"/api/cart/456","ip":"192.168.1.4","status_code":404,"response_time":0.012}
13
+ {"timestamp":"2024-10-25T13:55:48+08:00","level":"FATAL","message":"Critical system error","error":"OutOfMemory","available_mb":50}
14
+ {"timestamp":"2024-10-25T13:55:49+08:00","level":"INFO","message":"Background job completed","job":"SendEmailJob","duration_ms":2345}
15
+ {"timestamp":"2024-10-25T13:55:50+08:00","level":"WARN","message":"Slow query detected","query":"SELECT * FROM orders","duration_ms":5678}
@@ -0,0 +1,15 @@
1
+ 192.168.1.1 [25/Oct/2024:13:55:36 +0800] "GET /api/users HTTP/1.1" 200 1234 0.123
2
+ 192.168.1.2 [25/Oct/2024:13:55:37 +0800] "POST /api/orders HTTP/1.1" 201 512 0.056
3
+ 192.168.1.3 [25/Oct/2024:13:55:38 +0800] "GET /api/products HTTP/1.1" 200 8192 0.234
4
+ 192.168.1.4 [25/Oct/2024:13:55:39 +0800] "DELETE /api/cart/123 HTTP/1.1" 404 256 0.012
5
+ 192.168.1.1 [25/Oct/2024:13:55:40 +0800] "GET /api/categories HTTP/1.1" 200 2048 0.089
6
+ 192.168.1.5 [25/Oct/2024:13:55:41 +0800] "PUT /api/profile HTTP/1.1" 500 1024 0.456
7
+ 192.168.1.2 [25/Oct/2024:13:55:42 +0800] "GET /api/search HTTP/1.1" 200 4096 0.178
8
+ 192.168.1.6 [25/Oct/2024:13:55:43 +0800] "POST /api/login HTTP/1.1" 401 64 0.023
9
+ 192.168.1.3 [25/Oct/2024:13:55:44 +0800] "GET /api/dashboard HTTP/1.1" 403 128 0.034
10
+ 192.168.1.7 [25/Oct/2024:13:55:45 +0800] "GET /health HTTP/1.1" 200 16 0.003
11
+ 192.168.1.1 [25/Oct/2024:13:55:46 +0800] "PATCH /api/settings HTTP/1.1" 200 768 0.045
12
+ 192.168.1.8 [25/Oct/2024:13:55:47 +0800] "GET /api/notifications HTTP/1.1" 200 2048 0.098
13
+ 192.168.1.4 [25/Oct/2024:13:55:48 +0800] "POST /api/comments HTTP/1.1" 503 256 0.567
14
+ 192.168.1.9 [25/Oct/2024:13:55:49 +0800] "GET /api/stats HTTP/1.1" 200 16384 0.345
15
+ 192.168.1.2 [25/Oct/2024:13:55:50 +0800] "DELETE /api/sessions/456 HTTP/1.1" 204 0 0.012
@@ -0,0 +1,29 @@
1
+ [2024-10-25 13:55:36] INFO -- : Started GET "/api/users" for 127.0.0.1 at 2024-10-25 13:55:36 +0800
2
+ [2024-10-25 13:55:36] INFO -- : Processing by UsersController#index as JSON
3
+ [2024-10-25 13:55:36] INFO -- : Completed 200 OK in 45ms (Views: 12.3ms | ActiveRecord: 8.5ms)
4
+ [2024-10-25 13:55:37] ERROR -- : ActiveRecord::RecordNotFound: Couldn't find User with 'id'=999
5
+ [2024-10-25 13:55:37] ERROR -- : app/controllers/users_controller.rb:42:in `show'
6
+ [2024-10-25 13:55:38] WARN -- : Timeout::Error occurred while connecting to external API
7
+ [2024-10-25 13:55:38] INFO -- : Started POST "/api/orders" for 192.168.1.1 at 2024-10-25 13:55:38 +0800
8
+ [2024-10-25 13:55:38] INFO -- : Processing by OrdersController#create as JSON
9
+ [2024-10-25 13:55:38] DEBUG -- : Parameters: {"product_id"=>"123", "quantity"=>2}
10
+ [2024-10-25 13:55:39] INFO -- : Completed 201 Created in 156ms (Views: 5.2ms | ActiveRecord: 125.3ms)
11
+ [2024-10-25 13:55:40] ERROR -- : Redis::CannotConnectError: Error connecting to Redis on localhost:6379
12
+ [2024-10-25 13:55:40] ERROR -- : lib/cache_store.rb:15:in `fetch'
13
+ [2024-10-25 13:55:41] WARN -- : Slow query detected: SELECT * FROM products WHERE category = 'electronics' (2345.6ms)
14
+ [2024-10-25 13:55:42] INFO -- : Started DELETE "/api/sessions/abc123" for 10.0.0.1 at 2024-10-25 13:55:42 +0800
15
+ [2024-10-25 13:55:42] INFO -- : Processing by SessionsController#destroy as JSON
16
+ [2024-10-25 13:55:42] INFO -- : Completed 204 No Content in 23ms
17
+ [2024-10-25 13:55:43] FATAL -- : SystemExit: Database connection pool exhausted
18
+ [2024-10-25 13:55:43] FATAL -- : config/database.yml pool size may be too small
19
+ [2024-10-25 13:55:44] ERROR -- : NoMethodError: undefined method `email' for nil:NilClass
20
+ [2024-10-25 13:55:44] ERROR -- : app/mailers/user_mailer.rb:10:in `welcome_email'
21
+ [2024-10-25 13:55:45] INFO -- : Started PATCH "/api/settings" for 172.16.0.1 at 2024-10-25 13:55:45 +0800
22
+ [2024-10-25 13:55:45] INFO -- : Processing by SettingsController#update as JSON
23
+ [2024-10-25 13:55:45] DEBUG -- : Current user: #<User id: 42, email: "user@example.com">
24
+ [2024-10-25 13:55:45] INFO -- : Completed 200 OK in 78ms (Views: 3.4ms | ActiveRecord: 45.2ms)
25
+ [2024-10-25 13:55:46] WARN -- : ActionController::ParameterMissing: param is missing or the value is empty: user
26
+ [2024-10-25 13:55:47] INFO -- : Cache hit: products/index/page_1
27
+ [2024-10-25 13:55:48] DEBUG -- : SQL (1.2ms) SELECT COUNT(*) FROM orders WHERE created_at > '2024-10-24'
28
+ [2024-10-25 13:55:49] ERROR -- : Errno::ECONNREFUSED: Connection refused - connect(2) for "api.external.com" port 443
29
+ [2024-10-25 13:55:50] INFO -- : Background job completed: SendEmailJob (queue: mailers)