fractor 0.1.4 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop-https---raw-githubusercontent-com-riboseinc-oss-guides-main-ci-rubocop-yml +552 -0
  3. data/.rubocop.yml +14 -8
  4. data/.rubocop_todo.yml +284 -43
  5. data/README.adoc +111 -950
  6. data/docs/.lycheeignore +16 -0
  7. data/docs/Gemfile +24 -0
  8. data/docs/README.md +157 -0
  9. data/docs/_config.yml +151 -0
  10. data/docs/_features/error-handling.adoc +1192 -0
  11. data/docs/_features/index.adoc +80 -0
  12. data/docs/_features/monitoring.adoc +589 -0
  13. data/docs/_features/signal-handling.adoc +202 -0
  14. data/docs/_features/workflows.adoc +1235 -0
  15. data/docs/_guides/continuous-mode.adoc +736 -0
  16. data/docs/_guides/cookbook.adoc +1133 -0
  17. data/docs/_guides/index.adoc +55 -0
  18. data/docs/_guides/pipeline-mode.adoc +730 -0
  19. data/docs/_guides/troubleshooting.adoc +358 -0
  20. data/docs/_pages/architecture.adoc +1390 -0
  21. data/docs/_pages/core-concepts.adoc +1392 -0
  22. data/docs/_pages/design-principles.adoc +862 -0
  23. data/docs/_pages/getting-started.adoc +290 -0
  24. data/docs/_pages/installation.adoc +143 -0
  25. data/docs/_reference/api.adoc +1080 -0
  26. data/docs/_reference/error-reporting.adoc +670 -0
  27. data/docs/_reference/examples.adoc +181 -0
  28. data/docs/_reference/index.adoc +96 -0
  29. data/docs/_reference/troubleshooting.adoc +862 -0
  30. data/docs/_tutorials/complex-workflows.adoc +1022 -0
  31. data/docs/_tutorials/data-processing-pipeline.adoc +740 -0
  32. data/docs/_tutorials/first-application.adoc +384 -0
  33. data/docs/_tutorials/index.adoc +48 -0
  34. data/docs/_tutorials/long-running-services.adoc +931 -0
  35. data/docs/assets/images/favicon-16.png +0 -0
  36. data/docs/assets/images/favicon-32.png +0 -0
  37. data/docs/assets/images/favicon-48.png +0 -0
  38. data/docs/assets/images/favicon.ico +0 -0
  39. data/docs/assets/images/favicon.png +0 -0
  40. data/docs/assets/images/favicon.svg +45 -0
  41. data/docs/assets/images/fractor-icon.svg +49 -0
  42. data/docs/assets/images/fractor-logo.svg +61 -0
  43. data/docs/index.adoc +131 -0
  44. data/docs/lychee.toml +39 -0
  45. data/examples/api_aggregator/README.adoc +627 -0
  46. data/examples/api_aggregator/api_aggregator.rb +376 -0
  47. data/examples/auto_detection/README.adoc +407 -29
  48. data/examples/auto_detection/auto_detection.rb +9 -9
  49. data/examples/continuous_chat_common/message_protocol.rb +53 -0
  50. data/examples/continuous_chat_fractor/README.adoc +217 -0
  51. data/examples/continuous_chat_fractor/chat_client.rb +303 -0
  52. data/examples/continuous_chat_fractor/chat_common.rb +83 -0
  53. data/examples/continuous_chat_fractor/chat_server.rb +167 -0
  54. data/examples/continuous_chat_fractor/simulate.rb +345 -0
  55. data/examples/continuous_chat_server/README.adoc +135 -0
  56. data/examples/continuous_chat_server/chat_client.rb +303 -0
  57. data/examples/continuous_chat_server/chat_server.rb +359 -0
  58. data/examples/continuous_chat_server/simulate.rb +343 -0
  59. data/examples/error_reporting.rb +207 -0
  60. data/examples/file_processor/README.adoc +170 -0
  61. data/examples/file_processor/file_processor.rb +615 -0
  62. data/examples/file_processor/sample_files/invalid.csv +1 -0
  63. data/examples/file_processor/sample_files/orders.xml +24 -0
  64. data/examples/file_processor/sample_files/products.json +23 -0
  65. data/examples/file_processor/sample_files/users.csv +6 -0
  66. data/examples/hierarchical_hasher/README.adoc +629 -41
  67. data/examples/hierarchical_hasher/hierarchical_hasher.rb +12 -8
  68. data/examples/image_processor/README.adoc +610 -0
  69. data/examples/image_processor/image_processor.rb +349 -0
  70. data/examples/image_processor/processed_images/sample_10_processed.jpg.json +12 -0
  71. data/examples/image_processor/processed_images/sample_1_processed.jpg.json +12 -0
  72. data/examples/image_processor/processed_images/sample_2_processed.jpg.json +12 -0
  73. data/examples/image_processor/processed_images/sample_3_processed.jpg.json +12 -0
  74. data/examples/image_processor/processed_images/sample_4_processed.jpg.json +12 -0
  75. data/examples/image_processor/processed_images/sample_5_processed.jpg.json +12 -0
  76. data/examples/image_processor/processed_images/sample_6_processed.jpg.json +12 -0
  77. data/examples/image_processor/processed_images/sample_7_processed.jpg.json +12 -0
  78. data/examples/image_processor/processed_images/sample_8_processed.jpg.json +12 -0
  79. data/examples/image_processor/processed_images/sample_9_processed.jpg.json +12 -0
  80. data/examples/image_processor/test_images/sample_1.png +1 -0
  81. data/examples/image_processor/test_images/sample_10.png +1 -0
  82. data/examples/image_processor/test_images/sample_2.png +1 -0
  83. data/examples/image_processor/test_images/sample_3.png +1 -0
  84. data/examples/image_processor/test_images/sample_4.png +1 -0
  85. data/examples/image_processor/test_images/sample_5.png +1 -0
  86. data/examples/image_processor/test_images/sample_6.png +1 -0
  87. data/examples/image_processor/test_images/sample_7.png +1 -0
  88. data/examples/image_processor/test_images/sample_8.png +1 -0
  89. data/examples/image_processor/test_images/sample_9.png +1 -0
  90. data/examples/log_analyzer/README.adoc +662 -0
  91. data/examples/log_analyzer/log_analyzer.rb +579 -0
  92. data/examples/log_analyzer/sample_logs/apache.log +20 -0
  93. data/examples/log_analyzer/sample_logs/json.log +15 -0
  94. data/examples/log_analyzer/sample_logs/nginx.log +15 -0
  95. data/examples/log_analyzer/sample_logs/rails.log +29 -0
  96. data/examples/multi_work_type/README.adoc +576 -26
  97. data/examples/multi_work_type/multi_work_type.rb +30 -29
  98. data/examples/performance_monitoring.rb +120 -0
  99. data/examples/pipeline_processing/README.adoc +740 -26
  100. data/examples/pipeline_processing/pipeline_processing.rb +16 -16
  101. data/examples/priority_work_example.rb +155 -0
  102. data/examples/producer_subscriber/README.adoc +889 -46
  103. data/examples/producer_subscriber/producer_subscriber.rb +20 -16
  104. data/examples/scatter_gather/README.adoc +829 -27
  105. data/examples/scatter_gather/scatter_gather.rb +29 -28
  106. data/examples/simple/README.adoc +347 -0
  107. data/examples/simple/sample.rb +5 -5
  108. data/examples/specialized_workers/README.adoc +622 -26
  109. data/examples/specialized_workers/specialized_workers.rb +88 -45
  110. data/examples/stream_processor/README.adoc +206 -0
  111. data/examples/stream_processor/stream_processor.rb +284 -0
  112. data/examples/web_scraper/README.adoc +625 -0
  113. data/examples/web_scraper/web_scraper.rb +285 -0
  114. data/examples/workflow/README.adoc +406 -0
  115. data/examples/workflow/circuit_breaker/README.adoc +360 -0
  116. data/examples/workflow/circuit_breaker/circuit_breaker_workflow.rb +225 -0
  117. data/examples/workflow/conditional/README.adoc +483 -0
  118. data/examples/workflow/conditional/conditional_workflow.rb +215 -0
  119. data/examples/workflow/dead_letter_queue/README.adoc +374 -0
  120. data/examples/workflow/dead_letter_queue/dead_letter_queue_workflow.rb +217 -0
  121. data/examples/workflow/fan_out/README.adoc +381 -0
  122. data/examples/workflow/fan_out/fan_out_workflow.rb +202 -0
  123. data/examples/workflow/retry/README.adoc +248 -0
  124. data/examples/workflow/retry/retry_workflow.rb +195 -0
  125. data/examples/workflow/simple_linear/README.adoc +267 -0
  126. data/examples/workflow/simple_linear/simple_linear_workflow.rb +175 -0
  127. data/examples/workflow/simplified/README.adoc +329 -0
  128. data/examples/workflow/simplified/simplified_workflow.rb +222 -0
  129. data/exe/fractor +10 -0
  130. data/lib/fractor/cli.rb +288 -0
  131. data/lib/fractor/configuration.rb +307 -0
  132. data/lib/fractor/continuous_server.rb +183 -0
  133. data/lib/fractor/error_formatter.rb +72 -0
  134. data/lib/fractor/error_report_generator.rb +152 -0
  135. data/lib/fractor/error_reporter.rb +244 -0
  136. data/lib/fractor/error_statistics.rb +147 -0
  137. data/lib/fractor/execution_tracer.rb +162 -0
  138. data/lib/fractor/logger.rb +230 -0
  139. data/lib/fractor/main_loop_handler.rb +406 -0
  140. data/lib/fractor/main_loop_handler3.rb +135 -0
  141. data/lib/fractor/main_loop_handler4.rb +299 -0
  142. data/lib/fractor/performance_metrics_collector.rb +181 -0
  143. data/lib/fractor/performance_monitor.rb +215 -0
  144. data/lib/fractor/performance_report_generator.rb +202 -0
  145. data/lib/fractor/priority_work.rb +93 -0
  146. data/lib/fractor/priority_work_queue.rb +189 -0
  147. data/lib/fractor/result_aggregator.rb +33 -1
  148. data/lib/fractor/shutdown_handler.rb +168 -0
  149. data/lib/fractor/signal_handler.rb +80 -0
  150. data/lib/fractor/supervisor.rb +430 -144
  151. data/lib/fractor/supervisor_logger.rb +88 -0
  152. data/lib/fractor/version.rb +1 -1
  153. data/lib/fractor/work.rb +12 -0
  154. data/lib/fractor/work_distribution_manager.rb +151 -0
  155. data/lib/fractor/work_queue.rb +88 -0
  156. data/lib/fractor/work_result.rb +181 -9
  157. data/lib/fractor/worker.rb +75 -1
  158. data/lib/fractor/workflow/builder.rb +210 -0
  159. data/lib/fractor/workflow/chain_builder.rb +169 -0
  160. data/lib/fractor/workflow/circuit_breaker.rb +183 -0
  161. data/lib/fractor/workflow/circuit_breaker_orchestrator.rb +208 -0
  162. data/lib/fractor/workflow/circuit_breaker_registry.rb +112 -0
  163. data/lib/fractor/workflow/dead_letter_queue.rb +334 -0
  164. data/lib/fractor/workflow/execution_hooks.rb +39 -0
  165. data/lib/fractor/workflow/execution_strategy.rb +225 -0
  166. data/lib/fractor/workflow/execution_trace.rb +134 -0
  167. data/lib/fractor/workflow/helpers.rb +191 -0
  168. data/lib/fractor/workflow/job.rb +290 -0
  169. data/lib/fractor/workflow/job_dependency_validator.rb +120 -0
  170. data/lib/fractor/workflow/logger.rb +110 -0
  171. data/lib/fractor/workflow/pre_execution_context.rb +193 -0
  172. data/lib/fractor/workflow/retry_config.rb +156 -0
  173. data/lib/fractor/workflow/retry_orchestrator.rb +184 -0
  174. data/lib/fractor/workflow/retry_strategy.rb +93 -0
  175. data/lib/fractor/workflow/structured_logger.rb +30 -0
  176. data/lib/fractor/workflow/type_compatibility_validator.rb +222 -0
  177. data/lib/fractor/workflow/visualizer.rb +211 -0
  178. data/lib/fractor/workflow/workflow_context.rb +132 -0
  179. data/lib/fractor/workflow/workflow_executor.rb +669 -0
  180. data/lib/fractor/workflow/workflow_result.rb +55 -0
  181. data/lib/fractor/workflow/workflow_validator.rb +295 -0
  182. data/lib/fractor/workflow.rb +333 -0
  183. data/lib/fractor/wrapped_ractor.rb +66 -91
  184. data/lib/fractor/wrapped_ractor3.rb +161 -0
  185. data/lib/fractor/wrapped_ractor4.rb +242 -0
  186. data/lib/fractor.rb +93 -3
  187. metadata +192 -6
  188. data/tests/sample.rb.bak +0 -309
  189. data/tests/sample_working.rb.bak +0 -209
@@ -0,0 +1,615 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require_relative "../../lib/fractor"
5
+ require "csv"
6
+ require "json"
7
+ require "fileutils"
8
+ require "digest"
9
+
10
+ # File processing work item
11
+ class FileWork < Fractor::Work
12
+ def initialize(file_path, output_dir, options = {})
13
+ super({
14
+ file_path: file_path,
15
+ output_dir: output_dir,
16
+ validate: options.fetch(:validate, true),
17
+ transform: options.fetch(:transform, true),
18
+ # Pre-parsed data (for CSV/JSON - must be parsed OUTSIDE ractors)
19
+ pre_parsed_data: options[:pre_parsed_data]
20
+ })
21
+ end
22
+
23
+ def file_path
24
+ input[:file_path]
25
+ end
26
+
27
+ def output_dir
28
+ input[:output_dir]
29
+ end
30
+
31
+ def validate?
32
+ input[:validate]
33
+ end
34
+
35
+ def transform?
36
+ input[:transform]
37
+ end
38
+
39
+ def pre_parsed_data
40
+ input[:pre_parsed_data]
41
+ end
42
+
43
+ def to_s
44
+ "FileWork(#{File.basename(file_path)})"
45
+ end
46
+ end
47
+
48
+ # Worker for processing files
49
+ class FileProcessorWorker < Fractor::Worker
50
+ def process(work)
51
+ return nil unless work.is_a?(FileWork)
52
+
53
+ file_path = work.file_path
54
+
55
+ unless File.exist?(file_path)
56
+ raise "File not found: #{file_path}"
57
+ end
58
+
59
+ format = detect_format(file_path)
60
+
61
+ # Use pre-parsed data if available (for CSV/JSON), otherwise parse in-place
62
+ # CSV and JSON must be parsed OUTSIDE ractors to avoid segfault
63
+ if work.pre_parsed_data
64
+ data = work.pre_parsed_data
65
+ else
66
+ # Only for XML which is Ractor-safe
67
+ data = parse_file(file_path, format)
68
+ end
69
+
70
+ # Validate if requested
71
+ if work.validate?
72
+ validate_data(data, format)
73
+ end
74
+
75
+ # Transform if requested
76
+ if work.transform?
77
+ data = transform_data(data, format)
78
+ end
79
+
80
+ # Write output
81
+ output_path = write_output(data, file_path, work.output_dir, format)
82
+
83
+ {
84
+ file: File.basename(file_path),
85
+ format: format,
86
+ records: data.is_a?(Array) ? data.size : 1,
87
+ output: output_path,
88
+ checksum: calculate_checksum(file_path),
89
+ status: "success"
90
+ }
91
+ rescue StandardError => e
92
+ {
93
+ file: File.basename(work.file_path),
94
+ status: "error",
95
+ error: e.message,
96
+ error_class: e.class.name
97
+ }
98
+ end
99
+
100
+ private
101
+
102
+ def detect_format(file_path)
103
+ ext = File.extname(file_path).downcase
104
+ case ext
105
+ when ".csv"
106
+ :csv
107
+ when ".json"
108
+ :json
109
+ when ".xml"
110
+ :xml
111
+ else
112
+ raise "Unsupported file format: #{ext}"
113
+ end
114
+ end
115
+
116
+ def parse_file(file_path, format)
117
+ case format
118
+ when :csv
119
+ parse_csv(file_path)
120
+ when :json
121
+ parse_json(file_path)
122
+ when :xml
123
+ parse_xml(file_path)
124
+ end
125
+ end
126
+
127
+ def parse_csv(file_path)
128
+ # Parse CSV outside of Ractors
129
+ # CSV parsing must be sequential, so we parse before distributing work
130
+ content = File.read(file_path)
131
+ csv_table = CSV.parse(content, headers: true)
132
+
133
+ # Convert to array immediately (CSV::Table is not Ractor-safe)
134
+ result = []
135
+ csv_table.each { |row| result << row.to_hash }
136
+ result
137
+ end
138
+
139
+ def parse_json(file_path)
140
+ content = File.read(file_path)
141
+ JSON.parse(content)
142
+ end
143
+
144
+ def parse_xml(file_path)
145
+ content = File.read(file_path)
146
+
147
+ # Simple XML parsing without REXML to avoid Ractor issues
148
+ records = []
149
+ content.scan(/<record>(.*?)<\/record>/m).each do |match|
150
+ record_content = match[0]
151
+ hash = {}
152
+
153
+ record_content.scan(/<(\w+)>(.*?)<\/\1>/m).each do |tag, value|
154
+ hash[tag] = value.strip
155
+ end
156
+
157
+ records << hash unless hash.empty?
158
+ end
159
+ records
160
+ end
161
+
162
+ def validate_data(data, format)
163
+ case format
164
+ when :csv, :xml
165
+ raise "No records found" if data.empty?
166
+ data.each_with_index do |record, idx|
167
+ raise "Record #{idx + 1} is not a hash" unless record.is_a?(Hash)
168
+ raise "Record #{idx + 1} is empty" if record.empty?
169
+ end
170
+ when :json
171
+ if data.is_a?(Array)
172
+ raise "No records found" if data.empty?
173
+ elsif !data.is_a?(Hash)
174
+ raise "Invalid JSON structure"
175
+ end
176
+ end
177
+ end
178
+
179
+ def transform_data(data, format)
180
+ case format
181
+ when :csv, :xml
182
+ data.map do |record|
183
+ transform_record(record)
184
+ end
185
+ when :json
186
+ if data.is_a?(Array)
187
+ data.map { |record| transform_record(record) }
188
+ else
189
+ transform_record(data)
190
+ end
191
+ end
192
+ end
193
+
194
+ def transform_record(record)
195
+ transformed = {}
196
+ record.each do |key, value|
197
+ # Convert keys to symbols
198
+ sym_key = key.to_sym
199
+
200
+ # Transform values
201
+ transformed[sym_key] = case value
202
+ when /^\d+$/
203
+ value.to_i
204
+ when /^\d+\.\d+$/
205
+ value.to_f
206
+ when /^(true|false)$/i
207
+ value.downcase == "true"
208
+ else
209
+ value
210
+ end
211
+ end
212
+ transformed
213
+ end
214
+
215
+ def write_output(data, original_path, output_dir, format)
216
+ FileUtils.mkdir_p(output_dir)
217
+
218
+ base_name = File.basename(original_path, ".*")
219
+ output_path = File.join(output_dir, "#{base_name}_processed.json")
220
+
221
+ output_data = {
222
+ source_file: File.basename(original_path),
223
+ format: format,
224
+ records: data,
225
+ processed_at: Time.now.iso8601,
226
+ record_count: data.is_a?(Array) ? data.size : 1
227
+ }
228
+
229
+ File.write(output_path, JSON.pretty_generate(output_data))
230
+ output_path
231
+ end
232
+
233
+ def calculate_checksum(file_path)
234
+ Digest::SHA256.file(file_path).hexdigest
235
+ end
236
+ end
237
+
238
+ # Batch file processor
239
+ class BatchFileProcessor
240
+ attr_reader :files, :results, :errors, :dlq_files
241
+
242
+ def initialize(output_dir: "processed", dlq_dir: "dlq")
243
+ @output_dir = output_dir
244
+ @dlq_dir = dlq_dir
245
+ @files = []
246
+ @results = []
247
+ @errors = []
248
+ @dlq_files = []
249
+ end
250
+
251
+ def add_file(file_path)
252
+ @files << file_path if File.exist?(file_path)
253
+ end
254
+
255
+ def add_files(file_paths)
256
+ file_paths.each { |path| add_file(path) }
257
+ end
258
+
259
+ def process_all(num_workers: 4, validate: true, transform: true)
260
+ return { processed: [], errors: [], dlq: [] } if @files.empty?
261
+
262
+ puts "Processing #{@files.size} files with #{num_workers} workers..."
263
+ puts "Validation: #{validate ? 'enabled' : 'disabled'}"
264
+ puts "Transformation: #{transform ? 'enabled' : 'disabled'}"
265
+ puts
266
+
267
+ # Parse CSV/JSON files OUTSIDE ractors to avoid segfault
268
+ # XML can be parsed inside ractors (Ractor-safe)
269
+ work_items = @files.map do |file_path|
270
+ format = detect_format_from_path(file_path)
271
+ pre_parsed_data = nil
272
+
273
+ # Parse CSV and JSON outside ractors
274
+ if format == :csv
275
+ pre_parsed_data = parse_csv_outside_ractor(file_path)
276
+ elsif format == :json
277
+ pre_parsed_data = parse_json_outside_ractor(file_path)
278
+ end
279
+
280
+ FileWork.new(file_path, @output_dir,
281
+ validate: validate,
282
+ transform: transform,
283
+ pre_parsed_data: pre_parsed_data)
284
+ end
285
+
286
+ supervisor = Fractor::Supervisor.new(
287
+ worker_pools: [
288
+ { worker_class: FileProcessorWorker, num_workers: num_workers }
289
+ ]
290
+ )
291
+
292
+ supervisor.add_work_items(work_items)
293
+ supervisor.run
294
+
295
+ results_obj = supervisor.results
296
+ all_results = results_obj.results + results_obj.errors
297
+
298
+ @results = []
299
+ @errors = []
300
+ @dlq_files = []
301
+
302
+ all_results.each do |work_result|
303
+ result = work_result.respond_to?(:result) ? work_result.result : work_result
304
+
305
+ next unless result.is_a?(Hash)
306
+
307
+ if result[:status] == "success"
308
+ @results << result
309
+ puts "[✓] #{result[:file]}: #{result[:records]} records processed"
310
+ else
311
+ @errors << result
312
+ puts "[✗] #{result[:file]}: #{result[:error]}"
313
+
314
+ # Move to DLQ if it's a validation or parsing error
315
+ if should_move_to_dlq?(result)
316
+ move_to_dlq(result)
317
+ end
318
+ end
319
+ end
320
+
321
+ puts "\n=== Processing Complete ==="
322
+ puts "Successful: #{@results.size}"
323
+ puts "Errors: #{@errors.size}"
324
+ puts "DLQ: #{@dlq_files.size}"
325
+ puts
326
+
327
+ {
328
+ processed: @results,
329
+ errors: @errors,
330
+ dlq: @dlq_files
331
+ }
332
+ end
333
+
334
+ def inspect_dlq
335
+ return [] unless Dir.exist?(@dlq_dir)
336
+
337
+ dlq_files = Dir.glob(File.join(@dlq_dir, "*.json"))
338
+
339
+ dlq_files.map do |file_path|
340
+ JSON.parse(File.read(file_path), symbolize_names: true)
341
+ end
342
+ end
343
+
344
+ def retry_dlq_file(dlq_file_name)
345
+ dlq_path = File.join(@dlq_dir, dlq_file_name)
346
+
347
+ unless File.exist?(dlq_path)
348
+ puts "DLQ file not found: #{dlq_file_name}"
349
+ return false
350
+ end
351
+
352
+ dlq_entry = JSON.parse(File.read(dlq_path), symbolize_names: true)
353
+ original_file = dlq_entry[:original_file]
354
+
355
+ unless File.exist?(original_file)
356
+ puts "Original file not found: #{original_file}"
357
+ return false
358
+ end
359
+
360
+ puts "Retrying #{File.basename(original_file)}..."
361
+
362
+ @files = [original_file]
363
+ result = process_all(num_workers: 1)
364
+
365
+ if result[:processed].any?
366
+ # Remove from DLQ if successful
367
+ File.delete(dlq_path)
368
+ puts "Successfully processed and removed from DLQ"
369
+ true
370
+ else
371
+ puts "Retry failed, file remains in DLQ"
372
+ false
373
+ end
374
+ end
375
+
376
+ private
377
+
378
+ def should_move_to_dlq?(result)
379
+ # Move to DLQ for validation errors or parse errors
380
+ error_class = result[:error_class] || ""
381
+ error_msg = result[:error] || ""
382
+
383
+ error_class.include?("JSON::ParserError") ||
384
+ error_class.include?("CSV::") ||
385
+ error_msg.include?("No records found") ||
386
+ error_msg.include?("empty") ||
387
+ error_msg.include?("Invalid")
388
+ end
389
+
390
+ def move_to_dlq(result)
391
+ FileUtils.mkdir_p(@dlq_dir)
392
+
393
+ original_file = @files.find { |f| File.basename(f) == result[:file] }
394
+ return unless original_file
395
+
396
+ dlq_entry = {
397
+ file: result[:file],
398
+ original_file: original_file,
399
+ error: result[:error],
400
+ error_class: result[:error_class],
401
+ moved_at: Time.now.iso8601,
402
+ checksum: calculate_file_checksum(original_file)
403
+ }
404
+
405
+ dlq_file = File.join(@dlq_dir, "#{File.basename(result[:file], '.*')}_dlq.json")
406
+ File.write(dlq_file, JSON.pretty_generate(dlq_entry))
407
+
408
+ @dlq_files << dlq_entry
409
+ end
410
+
411
+ def calculate_file_checksum(file_path)
412
+ return nil unless File.exist?(file_path)
413
+
414
+ Digest::SHA256.file(file_path).hexdigest
415
+ end
416
+
417
+ # Helper methods to parse files OUTSIDE ractors
418
+ # CSV and JSON must be parsed sequentially due to library limitations
419
+
420
+ def detect_format_from_path(file_path)
421
+ ext = File.extname(file_path).downcase
422
+ case ext
423
+ when ".csv"
424
+ :csv
425
+ when ".json"
426
+ :json
427
+ when ".xml"
428
+ :xml
429
+ else
430
+ raise "Unsupported file format: #{ext}"
431
+ end
432
+ end
433
+
434
+ def parse_csv_outside_ractor(file_path)
435
+ content = File.read(file_path)
436
+ csv_table = CSV.parse(content, headers: true)
437
+
438
+ # Convert to array immediately (CSV::Table is not Ractor-safe)
439
+ result = []
440
+ csv_table.each { |row| result << row.to_hash }
441
+ result
442
+ rescue StandardError => e
443
+ raise "Failed to parse CSV #{file_path}: #{e.message}"
444
+ end
445
+
446
+ def parse_json_outside_ractor(file_path)
447
+ content = File.read(file_path)
448
+ data = JSON.parse(content)
449
+
450
+ # Normalize to array format
451
+ data.is_a?(Array) ? data : [data]
452
+ rescue StandardError => e
453
+ raise "Failed to parse JSON #{file_path}: #{e.message}"
454
+ end
455
+ end
456
+
457
+ # Progress report generator
458
+ class ProcessingReport
459
+ def self.generate(results, output_file = nil)
460
+ report = build_report(results)
461
+
462
+ if output_file
463
+ File.write(output_file, report)
464
+ puts "Report saved to #{output_file}"
465
+ else
466
+ puts report
467
+ end
468
+
469
+ report
470
+ end
471
+
472
+ def self.build_report(results)
473
+ lines = []
474
+ lines << "=" * 80
475
+ lines << "BATCH FILE PROCESSING REPORT"
476
+ lines << "=" * 80
477
+ lines << ""
478
+
479
+ processed = results[:processed] || []
480
+ errors = results[:errors] || []
481
+ dlq = results[:dlq] || []
482
+
483
+ # Summary
484
+ lines << "SUMMARY"
485
+ lines << "-" * 80
486
+ lines << format("Total files: %d", processed.size + errors.size)
487
+ lines << format("Successful: %d", processed.size)
488
+ lines << format("Errors: %d", errors.size)
489
+ lines << format("Dead Letter Queue: %d", dlq.size)
490
+ lines << ""
491
+
492
+ # Successful files
493
+ if processed.any?
494
+ lines << "SUCCESSFUL PROCESSING (#{processed.size})"
495
+ lines << "-" * 80
496
+ processed.each do |result|
497
+ lines << format(" %s [%s]: %d records",
498
+ result[:file], result[:format], result[:records])
499
+ end
500
+ lines << ""
501
+ end
502
+
503
+ # Errors
504
+ if errors.any?
505
+ lines << "ERRORS (#{errors.size})"
506
+ lines << "-" * 80
507
+ errors.each do |result|
508
+ lines << format(" %s: %s", result[:file], result[:error])
509
+ end
510
+ lines << ""
511
+ end
512
+
513
+ # DLQ
514
+ if dlq.any?
515
+ lines << "DEAD LETTER QUEUE (#{dlq.size})"
516
+ lines << "-" * 80
517
+ dlq.each do |entry|
518
+ lines << format(" %s: %s", entry[:file], entry[:error])
519
+ end
520
+ lines << ""
521
+ end
522
+
523
+ lines << "=" * 80
524
+
525
+ lines.join("\n")
526
+ end
527
+ end
528
+
529
+ # Run example if executed directly
530
+ if __FILE__ == $PROGRAM_NAME
531
+ require "optparse"
532
+
533
+ options = {
534
+ workers: 4,
535
+ validate: true,
536
+ transform: true,
537
+ output: nil,
538
+ inspect_dlq: false,
539
+ retry_dlq: nil
540
+ }
541
+
542
+ OptionParser.new do |opts|
543
+ opts.banner = "Usage: file_processor.rb [options] FILES..."
544
+
545
+ opts.on("-w", "--workers NUM", Integer, "Number of worker ractors (default: 4)") do |n|
546
+ options[:workers] = n
547
+ end
548
+
549
+ opts.on("--[no-]validate", "Enable/disable validation (default: true)") do |v|
550
+ options[:validate] = v
551
+ end
552
+
553
+ opts.on("--[no-]transform", "Enable/disable transformation (default: true)") do |t|
554
+ options[:transform] = t
555
+ end
556
+
557
+ opts.on("-o", "--output FILE", "Output report file") do |f|
558
+ options[:output] = f
559
+ end
560
+
561
+ opts.on("--inspect-dlq", "Inspect dead letter queue") do
562
+ options[:inspect_dlq] = true
563
+ end
564
+
565
+ opts.on("--retry-dlq FILE", "Retry a file from DLQ") do |f|
566
+ options[:retry_dlq] = f
567
+ end
568
+
569
+ opts.on("-h", "--help", "Show this message") do
570
+ puts opts
571
+ exit
572
+ end
573
+ end.parse!
574
+
575
+ processor = BatchFileProcessor.new
576
+
577
+ if options[:inspect_dlq]
578
+ puts "=== Dead Letter Queue Inspection ==="
579
+ puts
580
+ dlq_entries = processor.inspect_dlq
581
+
582
+ if dlq_entries.empty?
583
+ puts "DLQ is empty"
584
+ else
585
+ dlq_entries.each do |entry|
586
+ puts "File: #{entry[:file]}"
587
+ puts " Error: #{entry[:error]}"
588
+ puts " Moved at: #{entry[:moved_at]}"
589
+ puts " Checksum: #{entry[:checksum]}"
590
+ puts
591
+ end
592
+ end
593
+ exit
594
+ end
595
+
596
+ if options[:retry_dlq]
597
+ processor.retry_dlq_file(options[:retry_dlq])
598
+ exit
599
+ end
600
+
601
+ if ARGV.empty?
602
+ puts "Error: No files specified"
603
+ puts "Usage: file_processor.rb [options] FILES..."
604
+ exit 1
605
+ end
606
+
607
+ processor.add_files(ARGV)
608
+ results = processor.process_all(
609
+ num_workers: options[:workers],
610
+ validate: options[:validate],
611
+ transform: options[:transform]
612
+ )
613
+
614
+ ProcessingReport.generate(results, options[:output])
615
+ end
@@ -0,0 +1 @@
1
+ id,name,email
@@ -0,0 +1,24 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <records>
3
+ <record>
4
+ <id>1001</id>
5
+ <user_id>1</user_id>
6
+ <product_id>101</product_id>
7
+ <quantity>1</quantity>
8
+ <total>999.99</total>
9
+ </record>
10
+ <record>
11
+ <id>1002</id>
12
+ <user_id>2</user_id>
13
+ <product_id>102</product_id>
14
+ <quantity>2</quantity>
15
+ <total>59.98</total>
16
+ </record>
17
+ <record>
18
+ <id>1003</id>
19
+ <user_id>3</user_id>
20
+ <product_id>103</product_id>
21
+ <quantity>1</quantity>
22
+ <total>79.99</total>
23
+ </record>
24
+ </records>
@@ -0,0 +1,23 @@
1
+ [
2
+ {
3
+ "id": "101",
4
+ "name": "Laptop",
5
+ "price": "999.99",
6
+ "stock": "15",
7
+ "available": "true"
8
+ },
9
+ {
10
+ "id": "102",
11
+ "name": "Mouse",
12
+ "price": "29.99",
13
+ "stock": "50",
14
+ "available": "true"
15
+ },
16
+ {
17
+ "id": "103",
18
+ "name": "Keyboard",
19
+ "price": "79.99",
20
+ "stock": "30",
21
+ "available": "false"
22
+ }
23
+ ]
@@ -0,0 +1,6 @@
1
+ id,name,email,age,active
2
+ 1,Alice Johnson,alice@example.com,30,true
3
+ 2,Bob Smith,bob@example.com,25,true
4
+ 3,Carol Williams,carol@example.com,35,false
5
+ 4,David Brown,david@example.com,28,true
6
+ 5,Eve Davis,eve@example.com,32,true