fractor 0.1.4 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop-https---raw-githubusercontent-com-riboseinc-oss-guides-main-ci-rubocop-yml +552 -0
- data/.rubocop.yml +14 -8
- data/.rubocop_todo.yml +284 -43
- data/README.adoc +111 -950
- data/docs/.lycheeignore +16 -0
- data/docs/Gemfile +24 -0
- data/docs/README.md +157 -0
- data/docs/_config.yml +151 -0
- data/docs/_features/error-handling.adoc +1192 -0
- data/docs/_features/index.adoc +80 -0
- data/docs/_features/monitoring.adoc +589 -0
- data/docs/_features/signal-handling.adoc +202 -0
- data/docs/_features/workflows.adoc +1235 -0
- data/docs/_guides/continuous-mode.adoc +736 -0
- data/docs/_guides/cookbook.adoc +1133 -0
- data/docs/_guides/index.adoc +55 -0
- data/docs/_guides/pipeline-mode.adoc +730 -0
- data/docs/_guides/troubleshooting.adoc +358 -0
- data/docs/_pages/architecture.adoc +1390 -0
- data/docs/_pages/core-concepts.adoc +1392 -0
- data/docs/_pages/design-principles.adoc +862 -0
- data/docs/_pages/getting-started.adoc +290 -0
- data/docs/_pages/installation.adoc +143 -0
- data/docs/_reference/api.adoc +1080 -0
- data/docs/_reference/error-reporting.adoc +670 -0
- data/docs/_reference/examples.adoc +181 -0
- data/docs/_reference/index.adoc +96 -0
- data/docs/_reference/troubleshooting.adoc +862 -0
- data/docs/_tutorials/complex-workflows.adoc +1022 -0
- data/docs/_tutorials/data-processing-pipeline.adoc +740 -0
- data/docs/_tutorials/first-application.adoc +384 -0
- data/docs/_tutorials/index.adoc +48 -0
- data/docs/_tutorials/long-running-services.adoc +931 -0
- data/docs/assets/images/favicon-16.png +0 -0
- data/docs/assets/images/favicon-32.png +0 -0
- data/docs/assets/images/favicon-48.png +0 -0
- data/docs/assets/images/favicon.ico +0 -0
- data/docs/assets/images/favicon.png +0 -0
- data/docs/assets/images/favicon.svg +45 -0
- data/docs/assets/images/fractor-icon.svg +49 -0
- data/docs/assets/images/fractor-logo.svg +61 -0
- data/docs/index.adoc +131 -0
- data/docs/lychee.toml +39 -0
- data/examples/api_aggregator/README.adoc +627 -0
- data/examples/api_aggregator/api_aggregator.rb +376 -0
- data/examples/auto_detection/README.adoc +407 -29
- data/examples/auto_detection/auto_detection.rb +9 -9
- data/examples/continuous_chat_common/message_protocol.rb +53 -0
- data/examples/continuous_chat_fractor/README.adoc +217 -0
- data/examples/continuous_chat_fractor/chat_client.rb +303 -0
- data/examples/continuous_chat_fractor/chat_common.rb +83 -0
- data/examples/continuous_chat_fractor/chat_server.rb +167 -0
- data/examples/continuous_chat_fractor/simulate.rb +345 -0
- data/examples/continuous_chat_server/README.adoc +135 -0
- data/examples/continuous_chat_server/chat_client.rb +303 -0
- data/examples/continuous_chat_server/chat_server.rb +359 -0
- data/examples/continuous_chat_server/simulate.rb +343 -0
- data/examples/error_reporting.rb +207 -0
- data/examples/file_processor/README.adoc +170 -0
- data/examples/file_processor/file_processor.rb +615 -0
- data/examples/file_processor/sample_files/invalid.csv +1 -0
- data/examples/file_processor/sample_files/orders.xml +24 -0
- data/examples/file_processor/sample_files/products.json +23 -0
- data/examples/file_processor/sample_files/users.csv +6 -0
- data/examples/hierarchical_hasher/README.adoc +629 -41
- data/examples/hierarchical_hasher/hierarchical_hasher.rb +12 -8
- data/examples/image_processor/README.adoc +610 -0
- data/examples/image_processor/image_processor.rb +349 -0
- data/examples/image_processor/processed_images/sample_10_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_1_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_2_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_3_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_4_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_5_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_6_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_7_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_8_processed.jpg.json +12 -0
- data/examples/image_processor/processed_images/sample_9_processed.jpg.json +12 -0
- data/examples/image_processor/test_images/sample_1.png +1 -0
- data/examples/image_processor/test_images/sample_10.png +1 -0
- data/examples/image_processor/test_images/sample_2.png +1 -0
- data/examples/image_processor/test_images/sample_3.png +1 -0
- data/examples/image_processor/test_images/sample_4.png +1 -0
- data/examples/image_processor/test_images/sample_5.png +1 -0
- data/examples/image_processor/test_images/sample_6.png +1 -0
- data/examples/image_processor/test_images/sample_7.png +1 -0
- data/examples/image_processor/test_images/sample_8.png +1 -0
- data/examples/image_processor/test_images/sample_9.png +1 -0
- data/examples/log_analyzer/README.adoc +662 -0
- data/examples/log_analyzer/log_analyzer.rb +579 -0
- data/examples/log_analyzer/sample_logs/apache.log +20 -0
- data/examples/log_analyzer/sample_logs/json.log +15 -0
- data/examples/log_analyzer/sample_logs/nginx.log +15 -0
- data/examples/log_analyzer/sample_logs/rails.log +29 -0
- data/examples/multi_work_type/README.adoc +576 -26
- data/examples/multi_work_type/multi_work_type.rb +30 -29
- data/examples/performance_monitoring.rb +120 -0
- data/examples/pipeline_processing/README.adoc +740 -26
- data/examples/pipeline_processing/pipeline_processing.rb +16 -16
- data/examples/priority_work_example.rb +155 -0
- data/examples/producer_subscriber/README.adoc +889 -46
- data/examples/producer_subscriber/producer_subscriber.rb +20 -16
- data/examples/scatter_gather/README.adoc +829 -27
- data/examples/scatter_gather/scatter_gather.rb +29 -28
- data/examples/simple/README.adoc +347 -0
- data/examples/simple/sample.rb +5 -5
- data/examples/specialized_workers/README.adoc +622 -26
- data/examples/specialized_workers/specialized_workers.rb +88 -45
- data/examples/stream_processor/README.adoc +206 -0
- data/examples/stream_processor/stream_processor.rb +284 -0
- data/examples/web_scraper/README.adoc +625 -0
- data/examples/web_scraper/web_scraper.rb +285 -0
- data/examples/workflow/README.adoc +406 -0
- data/examples/workflow/circuit_breaker/README.adoc +360 -0
- data/examples/workflow/circuit_breaker/circuit_breaker_workflow.rb +225 -0
- data/examples/workflow/conditional/README.adoc +483 -0
- data/examples/workflow/conditional/conditional_workflow.rb +215 -0
- data/examples/workflow/dead_letter_queue/README.adoc +374 -0
- data/examples/workflow/dead_letter_queue/dead_letter_queue_workflow.rb +217 -0
- data/examples/workflow/fan_out/README.adoc +381 -0
- data/examples/workflow/fan_out/fan_out_workflow.rb +202 -0
- data/examples/workflow/retry/README.adoc +248 -0
- data/examples/workflow/retry/retry_workflow.rb +195 -0
- data/examples/workflow/simple_linear/README.adoc +267 -0
- data/examples/workflow/simple_linear/simple_linear_workflow.rb +175 -0
- data/examples/workflow/simplified/README.adoc +329 -0
- data/examples/workflow/simplified/simplified_workflow.rb +222 -0
- data/exe/fractor +10 -0
- data/lib/fractor/cli.rb +288 -0
- data/lib/fractor/configuration.rb +307 -0
- data/lib/fractor/continuous_server.rb +183 -0
- data/lib/fractor/error_formatter.rb +72 -0
- data/lib/fractor/error_report_generator.rb +152 -0
- data/lib/fractor/error_reporter.rb +244 -0
- data/lib/fractor/error_statistics.rb +147 -0
- data/lib/fractor/execution_tracer.rb +162 -0
- data/lib/fractor/logger.rb +230 -0
- data/lib/fractor/main_loop_handler.rb +406 -0
- data/lib/fractor/main_loop_handler3.rb +135 -0
- data/lib/fractor/main_loop_handler4.rb +299 -0
- data/lib/fractor/performance_metrics_collector.rb +181 -0
- data/lib/fractor/performance_monitor.rb +215 -0
- data/lib/fractor/performance_report_generator.rb +202 -0
- data/lib/fractor/priority_work.rb +93 -0
- data/lib/fractor/priority_work_queue.rb +189 -0
- data/lib/fractor/result_aggregator.rb +33 -1
- data/lib/fractor/shutdown_handler.rb +168 -0
- data/lib/fractor/signal_handler.rb +80 -0
- data/lib/fractor/supervisor.rb +430 -144
- data/lib/fractor/supervisor_logger.rb +88 -0
- data/lib/fractor/version.rb +1 -1
- data/lib/fractor/work.rb +12 -0
- data/lib/fractor/work_distribution_manager.rb +151 -0
- data/lib/fractor/work_queue.rb +88 -0
- data/lib/fractor/work_result.rb +181 -9
- data/lib/fractor/worker.rb +75 -1
- data/lib/fractor/workflow/builder.rb +210 -0
- data/lib/fractor/workflow/chain_builder.rb +169 -0
- data/lib/fractor/workflow/circuit_breaker.rb +183 -0
- data/lib/fractor/workflow/circuit_breaker_orchestrator.rb +208 -0
- data/lib/fractor/workflow/circuit_breaker_registry.rb +112 -0
- data/lib/fractor/workflow/dead_letter_queue.rb +334 -0
- data/lib/fractor/workflow/execution_hooks.rb +39 -0
- data/lib/fractor/workflow/execution_strategy.rb +225 -0
- data/lib/fractor/workflow/execution_trace.rb +134 -0
- data/lib/fractor/workflow/helpers.rb +191 -0
- data/lib/fractor/workflow/job.rb +290 -0
- data/lib/fractor/workflow/job_dependency_validator.rb +120 -0
- data/lib/fractor/workflow/logger.rb +110 -0
- data/lib/fractor/workflow/pre_execution_context.rb +193 -0
- data/lib/fractor/workflow/retry_config.rb +156 -0
- data/lib/fractor/workflow/retry_orchestrator.rb +184 -0
- data/lib/fractor/workflow/retry_strategy.rb +93 -0
- data/lib/fractor/workflow/structured_logger.rb +30 -0
- data/lib/fractor/workflow/type_compatibility_validator.rb +222 -0
- data/lib/fractor/workflow/visualizer.rb +211 -0
- data/lib/fractor/workflow/workflow_context.rb +132 -0
- data/lib/fractor/workflow/workflow_executor.rb +669 -0
- data/lib/fractor/workflow/workflow_result.rb +55 -0
- data/lib/fractor/workflow/workflow_validator.rb +295 -0
- data/lib/fractor/workflow.rb +333 -0
- data/lib/fractor/wrapped_ractor.rb +66 -91
- data/lib/fractor/wrapped_ractor3.rb +161 -0
- data/lib/fractor/wrapped_ractor4.rb +242 -0
- data/lib/fractor.rb +93 -3
- metadata +192 -6
- data/tests/sample.rb.bak +0 -309
- data/tests/sample_working.rb.bak +0 -209
|
@@ -0,0 +1,615 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require_relative "../../lib/fractor"
|
|
5
|
+
require "csv"
|
|
6
|
+
require "json"
|
|
7
|
+
require "fileutils"
|
|
8
|
+
require "digest"
|
|
9
|
+
|
|
10
|
+
# File processing work item
|
|
11
|
+
class FileWork < Fractor::Work
|
|
12
|
+
def initialize(file_path, output_dir, options = {})
|
|
13
|
+
super({
|
|
14
|
+
file_path: file_path,
|
|
15
|
+
output_dir: output_dir,
|
|
16
|
+
validate: options.fetch(:validate, true),
|
|
17
|
+
transform: options.fetch(:transform, true),
|
|
18
|
+
# Pre-parsed data (for CSV/JSON - must be parsed OUTSIDE ractors)
|
|
19
|
+
pre_parsed_data: options[:pre_parsed_data]
|
|
20
|
+
})
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def file_path
|
|
24
|
+
input[:file_path]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def output_dir
|
|
28
|
+
input[:output_dir]
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def validate?
|
|
32
|
+
input[:validate]
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def transform?
|
|
36
|
+
input[:transform]
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def pre_parsed_data
|
|
40
|
+
input[:pre_parsed_data]
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def to_s
|
|
44
|
+
"FileWork(#{File.basename(file_path)})"
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Worker for processing files
|
|
49
|
+
class FileProcessorWorker < Fractor::Worker
|
|
50
|
+
def process(work)
|
|
51
|
+
return nil unless work.is_a?(FileWork)
|
|
52
|
+
|
|
53
|
+
file_path = work.file_path
|
|
54
|
+
|
|
55
|
+
unless File.exist?(file_path)
|
|
56
|
+
raise "File not found: #{file_path}"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
format = detect_format(file_path)
|
|
60
|
+
|
|
61
|
+
# Use pre-parsed data if available (for CSV/JSON), otherwise parse in-place
|
|
62
|
+
# CSV and JSON must be parsed OUTSIDE ractors to avoid segfault
|
|
63
|
+
if work.pre_parsed_data
|
|
64
|
+
data = work.pre_parsed_data
|
|
65
|
+
else
|
|
66
|
+
# Only for XML which is Ractor-safe
|
|
67
|
+
data = parse_file(file_path, format)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Validate if requested
|
|
71
|
+
if work.validate?
|
|
72
|
+
validate_data(data, format)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Transform if requested
|
|
76
|
+
if work.transform?
|
|
77
|
+
data = transform_data(data, format)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Write output
|
|
81
|
+
output_path = write_output(data, file_path, work.output_dir, format)
|
|
82
|
+
|
|
83
|
+
{
|
|
84
|
+
file: File.basename(file_path),
|
|
85
|
+
format: format,
|
|
86
|
+
records: data.is_a?(Array) ? data.size : 1,
|
|
87
|
+
output: output_path,
|
|
88
|
+
checksum: calculate_checksum(file_path),
|
|
89
|
+
status: "success"
|
|
90
|
+
}
|
|
91
|
+
rescue StandardError => e
|
|
92
|
+
{
|
|
93
|
+
file: File.basename(work.file_path),
|
|
94
|
+
status: "error",
|
|
95
|
+
error: e.message,
|
|
96
|
+
error_class: e.class.name
|
|
97
|
+
}
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
private
|
|
101
|
+
|
|
102
|
+
def detect_format(file_path)
|
|
103
|
+
ext = File.extname(file_path).downcase
|
|
104
|
+
case ext
|
|
105
|
+
when ".csv"
|
|
106
|
+
:csv
|
|
107
|
+
when ".json"
|
|
108
|
+
:json
|
|
109
|
+
when ".xml"
|
|
110
|
+
:xml
|
|
111
|
+
else
|
|
112
|
+
raise "Unsupported file format: #{ext}"
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def parse_file(file_path, format)
|
|
117
|
+
case format
|
|
118
|
+
when :csv
|
|
119
|
+
parse_csv(file_path)
|
|
120
|
+
when :json
|
|
121
|
+
parse_json(file_path)
|
|
122
|
+
when :xml
|
|
123
|
+
parse_xml(file_path)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def parse_csv(file_path)
|
|
128
|
+
# Parse CSV outside of Ractors
|
|
129
|
+
# CSV parsing must be sequential, so we parse before distributing work
|
|
130
|
+
content = File.read(file_path)
|
|
131
|
+
csv_table = CSV.parse(content, headers: true)
|
|
132
|
+
|
|
133
|
+
# Convert to array immediately (CSV::Table is not Ractor-safe)
|
|
134
|
+
result = []
|
|
135
|
+
csv_table.each { |row| result << row.to_hash }
|
|
136
|
+
result
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def parse_json(file_path)
|
|
140
|
+
content = File.read(file_path)
|
|
141
|
+
JSON.parse(content)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def parse_xml(file_path)
|
|
145
|
+
content = File.read(file_path)
|
|
146
|
+
|
|
147
|
+
# Simple XML parsing without REXML to avoid Ractor issues
|
|
148
|
+
records = []
|
|
149
|
+
content.scan(/<record>(.*?)<\/record>/m).each do |match|
|
|
150
|
+
record_content = match[0]
|
|
151
|
+
hash = {}
|
|
152
|
+
|
|
153
|
+
record_content.scan(/<(\w+)>(.*?)<\/\1>/m).each do |tag, value|
|
|
154
|
+
hash[tag] = value.strip
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
records << hash unless hash.empty?
|
|
158
|
+
end
|
|
159
|
+
records
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def validate_data(data, format)
|
|
163
|
+
case format
|
|
164
|
+
when :csv, :xml
|
|
165
|
+
raise "No records found" if data.empty?
|
|
166
|
+
data.each_with_index do |record, idx|
|
|
167
|
+
raise "Record #{idx + 1} is not a hash" unless record.is_a?(Hash)
|
|
168
|
+
raise "Record #{idx + 1} is empty" if record.empty?
|
|
169
|
+
end
|
|
170
|
+
when :json
|
|
171
|
+
if data.is_a?(Array)
|
|
172
|
+
raise "No records found" if data.empty?
|
|
173
|
+
elsif !data.is_a?(Hash)
|
|
174
|
+
raise "Invalid JSON structure"
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def transform_data(data, format)
|
|
180
|
+
case format
|
|
181
|
+
when :csv, :xml
|
|
182
|
+
data.map do |record|
|
|
183
|
+
transform_record(record)
|
|
184
|
+
end
|
|
185
|
+
when :json
|
|
186
|
+
if data.is_a?(Array)
|
|
187
|
+
data.map { |record| transform_record(record) }
|
|
188
|
+
else
|
|
189
|
+
transform_record(data)
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def transform_record(record)
|
|
195
|
+
transformed = {}
|
|
196
|
+
record.each do |key, value|
|
|
197
|
+
# Convert keys to symbols
|
|
198
|
+
sym_key = key.to_sym
|
|
199
|
+
|
|
200
|
+
# Transform values
|
|
201
|
+
transformed[sym_key] = case value
|
|
202
|
+
when /^\d+$/
|
|
203
|
+
value.to_i
|
|
204
|
+
when /^\d+\.\d+$/
|
|
205
|
+
value.to_f
|
|
206
|
+
when /^(true|false)$/i
|
|
207
|
+
value.downcase == "true"
|
|
208
|
+
else
|
|
209
|
+
value
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
transformed
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def write_output(data, original_path, output_dir, format)
|
|
216
|
+
FileUtils.mkdir_p(output_dir)
|
|
217
|
+
|
|
218
|
+
base_name = File.basename(original_path, ".*")
|
|
219
|
+
output_path = File.join(output_dir, "#{base_name}_processed.json")
|
|
220
|
+
|
|
221
|
+
output_data = {
|
|
222
|
+
source_file: File.basename(original_path),
|
|
223
|
+
format: format,
|
|
224
|
+
records: data,
|
|
225
|
+
processed_at: Time.now.iso8601,
|
|
226
|
+
record_count: data.is_a?(Array) ? data.size : 1
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
File.write(output_path, JSON.pretty_generate(output_data))
|
|
230
|
+
output_path
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def calculate_checksum(file_path)
|
|
234
|
+
Digest::SHA256.file(file_path).hexdigest
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# Batch file processor
|
|
239
|
+
class BatchFileProcessor
|
|
240
|
+
attr_reader :files, :results, :errors, :dlq_files
|
|
241
|
+
|
|
242
|
+
def initialize(output_dir: "processed", dlq_dir: "dlq")
|
|
243
|
+
@output_dir = output_dir
|
|
244
|
+
@dlq_dir = dlq_dir
|
|
245
|
+
@files = []
|
|
246
|
+
@results = []
|
|
247
|
+
@errors = []
|
|
248
|
+
@dlq_files = []
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def add_file(file_path)
|
|
252
|
+
@files << file_path if File.exist?(file_path)
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def add_files(file_paths)
|
|
256
|
+
file_paths.each { |path| add_file(path) }
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def process_all(num_workers: 4, validate: true, transform: true)
|
|
260
|
+
return { processed: [], errors: [], dlq: [] } if @files.empty?
|
|
261
|
+
|
|
262
|
+
puts "Processing #{@files.size} files with #{num_workers} workers..."
|
|
263
|
+
puts "Validation: #{validate ? 'enabled' : 'disabled'}"
|
|
264
|
+
puts "Transformation: #{transform ? 'enabled' : 'disabled'}"
|
|
265
|
+
puts
|
|
266
|
+
|
|
267
|
+
# Parse CSV/JSON files OUTSIDE ractors to avoid segfault
|
|
268
|
+
# XML can be parsed inside ractors (Ractor-safe)
|
|
269
|
+
work_items = @files.map do |file_path|
|
|
270
|
+
format = detect_format_from_path(file_path)
|
|
271
|
+
pre_parsed_data = nil
|
|
272
|
+
|
|
273
|
+
# Parse CSV and JSON outside ractors
|
|
274
|
+
if format == :csv
|
|
275
|
+
pre_parsed_data = parse_csv_outside_ractor(file_path)
|
|
276
|
+
elsif format == :json
|
|
277
|
+
pre_parsed_data = parse_json_outside_ractor(file_path)
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
FileWork.new(file_path, @output_dir,
|
|
281
|
+
validate: validate,
|
|
282
|
+
transform: transform,
|
|
283
|
+
pre_parsed_data: pre_parsed_data)
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
supervisor = Fractor::Supervisor.new(
|
|
287
|
+
worker_pools: [
|
|
288
|
+
{ worker_class: FileProcessorWorker, num_workers: num_workers }
|
|
289
|
+
]
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
supervisor.add_work_items(work_items)
|
|
293
|
+
supervisor.run
|
|
294
|
+
|
|
295
|
+
results_obj = supervisor.results
|
|
296
|
+
all_results = results_obj.results + results_obj.errors
|
|
297
|
+
|
|
298
|
+
@results = []
|
|
299
|
+
@errors = []
|
|
300
|
+
@dlq_files = []
|
|
301
|
+
|
|
302
|
+
all_results.each do |work_result|
|
|
303
|
+
result = work_result.respond_to?(:result) ? work_result.result : work_result
|
|
304
|
+
|
|
305
|
+
next unless result.is_a?(Hash)
|
|
306
|
+
|
|
307
|
+
if result[:status] == "success"
|
|
308
|
+
@results << result
|
|
309
|
+
puts "[✓] #{result[:file]}: #{result[:records]} records processed"
|
|
310
|
+
else
|
|
311
|
+
@errors << result
|
|
312
|
+
puts "[✗] #{result[:file]}: #{result[:error]}"
|
|
313
|
+
|
|
314
|
+
# Move to DLQ if it's a validation or parsing error
|
|
315
|
+
if should_move_to_dlq?(result)
|
|
316
|
+
move_to_dlq(result)
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
puts "\n=== Processing Complete ==="
|
|
322
|
+
puts "Successful: #{@results.size}"
|
|
323
|
+
puts "Errors: #{@errors.size}"
|
|
324
|
+
puts "DLQ: #{@dlq_files.size}"
|
|
325
|
+
puts
|
|
326
|
+
|
|
327
|
+
{
|
|
328
|
+
processed: @results,
|
|
329
|
+
errors: @errors,
|
|
330
|
+
dlq: @dlq_files
|
|
331
|
+
}
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
def inspect_dlq
|
|
335
|
+
return [] unless Dir.exist?(@dlq_dir)
|
|
336
|
+
|
|
337
|
+
dlq_files = Dir.glob(File.join(@dlq_dir, "*.json"))
|
|
338
|
+
|
|
339
|
+
dlq_files.map do |file_path|
|
|
340
|
+
JSON.parse(File.read(file_path), symbolize_names: true)
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def retry_dlq_file(dlq_file_name)
|
|
345
|
+
dlq_path = File.join(@dlq_dir, dlq_file_name)
|
|
346
|
+
|
|
347
|
+
unless File.exist?(dlq_path)
|
|
348
|
+
puts "DLQ file not found: #{dlq_file_name}"
|
|
349
|
+
return false
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
dlq_entry = JSON.parse(File.read(dlq_path), symbolize_names: true)
|
|
353
|
+
original_file = dlq_entry[:original_file]
|
|
354
|
+
|
|
355
|
+
unless File.exist?(original_file)
|
|
356
|
+
puts "Original file not found: #{original_file}"
|
|
357
|
+
return false
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
puts "Retrying #{File.basename(original_file)}..."
|
|
361
|
+
|
|
362
|
+
@files = [original_file]
|
|
363
|
+
result = process_all(num_workers: 1)
|
|
364
|
+
|
|
365
|
+
if result[:processed].any?
|
|
366
|
+
# Remove from DLQ if successful
|
|
367
|
+
File.delete(dlq_path)
|
|
368
|
+
puts "Successfully processed and removed from DLQ"
|
|
369
|
+
true
|
|
370
|
+
else
|
|
371
|
+
puts "Retry failed, file remains in DLQ"
|
|
372
|
+
false
|
|
373
|
+
end
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
private
|
|
377
|
+
|
|
378
|
+
def should_move_to_dlq?(result)
|
|
379
|
+
# Move to DLQ for validation errors or parse errors
|
|
380
|
+
error_class = result[:error_class] || ""
|
|
381
|
+
error_msg = result[:error] || ""
|
|
382
|
+
|
|
383
|
+
error_class.include?("JSON::ParserError") ||
|
|
384
|
+
error_class.include?("CSV::") ||
|
|
385
|
+
error_msg.include?("No records found") ||
|
|
386
|
+
error_msg.include?("empty") ||
|
|
387
|
+
error_msg.include?("Invalid")
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
def move_to_dlq(result)
|
|
391
|
+
FileUtils.mkdir_p(@dlq_dir)
|
|
392
|
+
|
|
393
|
+
original_file = @files.find { |f| File.basename(f) == result[:file] }
|
|
394
|
+
return unless original_file
|
|
395
|
+
|
|
396
|
+
dlq_entry = {
|
|
397
|
+
file: result[:file],
|
|
398
|
+
original_file: original_file,
|
|
399
|
+
error: result[:error],
|
|
400
|
+
error_class: result[:error_class],
|
|
401
|
+
moved_at: Time.now.iso8601,
|
|
402
|
+
checksum: calculate_file_checksum(original_file)
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
dlq_file = File.join(@dlq_dir, "#{File.basename(result[:file], '.*')}_dlq.json")
|
|
406
|
+
File.write(dlq_file, JSON.pretty_generate(dlq_entry))
|
|
407
|
+
|
|
408
|
+
@dlq_files << dlq_entry
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
def calculate_file_checksum(file_path)
|
|
412
|
+
return nil unless File.exist?(file_path)
|
|
413
|
+
|
|
414
|
+
Digest::SHA256.file(file_path).hexdigest
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
# Helper methods to parse files OUTSIDE ractors
|
|
418
|
+
# CSV and JSON must be parsed sequentially due to library limitations
|
|
419
|
+
|
|
420
|
+
def detect_format_from_path(file_path)
|
|
421
|
+
ext = File.extname(file_path).downcase
|
|
422
|
+
case ext
|
|
423
|
+
when ".csv"
|
|
424
|
+
:csv
|
|
425
|
+
when ".json"
|
|
426
|
+
:json
|
|
427
|
+
when ".xml"
|
|
428
|
+
:xml
|
|
429
|
+
else
|
|
430
|
+
raise "Unsupported file format: #{ext}"
|
|
431
|
+
end
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
def parse_csv_outside_ractor(file_path)
|
|
435
|
+
content = File.read(file_path)
|
|
436
|
+
csv_table = CSV.parse(content, headers: true)
|
|
437
|
+
|
|
438
|
+
# Convert to array immediately (CSV::Table is not Ractor-safe)
|
|
439
|
+
result = []
|
|
440
|
+
csv_table.each { |row| result << row.to_hash }
|
|
441
|
+
result
|
|
442
|
+
rescue StandardError => e
|
|
443
|
+
raise "Failed to parse CSV #{file_path}: #{e.message}"
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
def parse_json_outside_ractor(file_path)
|
|
447
|
+
content = File.read(file_path)
|
|
448
|
+
data = JSON.parse(content)
|
|
449
|
+
|
|
450
|
+
# Normalize to array format
|
|
451
|
+
data.is_a?(Array) ? data : [data]
|
|
452
|
+
rescue StandardError => e
|
|
453
|
+
raise "Failed to parse JSON #{file_path}: #{e.message}"
|
|
454
|
+
end
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
# Progress report generator
|
|
458
|
+
class ProcessingReport
|
|
459
|
+
def self.generate(results, output_file = nil)
|
|
460
|
+
report = build_report(results)
|
|
461
|
+
|
|
462
|
+
if output_file
|
|
463
|
+
File.write(output_file, report)
|
|
464
|
+
puts "Report saved to #{output_file}"
|
|
465
|
+
else
|
|
466
|
+
puts report
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
report
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
def self.build_report(results)
|
|
473
|
+
lines = []
|
|
474
|
+
lines << "=" * 80
|
|
475
|
+
lines << "BATCH FILE PROCESSING REPORT"
|
|
476
|
+
lines << "=" * 80
|
|
477
|
+
lines << ""
|
|
478
|
+
|
|
479
|
+
processed = results[:processed] || []
|
|
480
|
+
errors = results[:errors] || []
|
|
481
|
+
dlq = results[:dlq] || []
|
|
482
|
+
|
|
483
|
+
# Summary
|
|
484
|
+
lines << "SUMMARY"
|
|
485
|
+
lines << "-" * 80
|
|
486
|
+
lines << format("Total files: %d", processed.size + errors.size)
|
|
487
|
+
lines << format("Successful: %d", processed.size)
|
|
488
|
+
lines << format("Errors: %d", errors.size)
|
|
489
|
+
lines << format("Dead Letter Queue: %d", dlq.size)
|
|
490
|
+
lines << ""
|
|
491
|
+
|
|
492
|
+
# Successful files
|
|
493
|
+
if processed.any?
|
|
494
|
+
lines << "SUCCESSFUL PROCESSING (#{processed.size})"
|
|
495
|
+
lines << "-" * 80
|
|
496
|
+
processed.each do |result|
|
|
497
|
+
lines << format(" %s [%s]: %d records",
|
|
498
|
+
result[:file], result[:format], result[:records])
|
|
499
|
+
end
|
|
500
|
+
lines << ""
|
|
501
|
+
end
|
|
502
|
+
|
|
503
|
+
# Errors
|
|
504
|
+
if errors.any?
|
|
505
|
+
lines << "ERRORS (#{errors.size})"
|
|
506
|
+
lines << "-" * 80
|
|
507
|
+
errors.each do |result|
|
|
508
|
+
lines << format(" %s: %s", result[:file], result[:error])
|
|
509
|
+
end
|
|
510
|
+
lines << ""
|
|
511
|
+
end
|
|
512
|
+
|
|
513
|
+
# DLQ
|
|
514
|
+
if dlq.any?
|
|
515
|
+
lines << "DEAD LETTER QUEUE (#{dlq.size})"
|
|
516
|
+
lines << "-" * 80
|
|
517
|
+
dlq.each do |entry|
|
|
518
|
+
lines << format(" %s: %s", entry[:file], entry[:error])
|
|
519
|
+
end
|
|
520
|
+
lines << ""
|
|
521
|
+
end
|
|
522
|
+
|
|
523
|
+
lines << "=" * 80
|
|
524
|
+
|
|
525
|
+
lines.join("\n")
|
|
526
|
+
end
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
# Run example if executed directly
|
|
530
|
+
if __FILE__ == $PROGRAM_NAME
|
|
531
|
+
require "optparse"
|
|
532
|
+
|
|
533
|
+
options = {
|
|
534
|
+
workers: 4,
|
|
535
|
+
validate: true,
|
|
536
|
+
transform: true,
|
|
537
|
+
output: nil,
|
|
538
|
+
inspect_dlq: false,
|
|
539
|
+
retry_dlq: nil
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
OptionParser.new do |opts|
|
|
543
|
+
opts.banner = "Usage: file_processor.rb [options] FILES..."
|
|
544
|
+
|
|
545
|
+
opts.on("-w", "--workers NUM", Integer, "Number of worker ractors (default: 4)") do |n|
|
|
546
|
+
options[:workers] = n
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
opts.on("--[no-]validate", "Enable/disable validation (default: true)") do |v|
|
|
550
|
+
options[:validate] = v
|
|
551
|
+
end
|
|
552
|
+
|
|
553
|
+
opts.on("--[no-]transform", "Enable/disable transformation (default: true)") do |t|
|
|
554
|
+
options[:transform] = t
|
|
555
|
+
end
|
|
556
|
+
|
|
557
|
+
opts.on("-o", "--output FILE", "Output report file") do |f|
|
|
558
|
+
options[:output] = f
|
|
559
|
+
end
|
|
560
|
+
|
|
561
|
+
opts.on("--inspect-dlq", "Inspect dead letter queue") do
|
|
562
|
+
options[:inspect_dlq] = true
|
|
563
|
+
end
|
|
564
|
+
|
|
565
|
+
opts.on("--retry-dlq FILE", "Retry a file from DLQ") do |f|
|
|
566
|
+
options[:retry_dlq] = f
|
|
567
|
+
end
|
|
568
|
+
|
|
569
|
+
opts.on("-h", "--help", "Show this message") do
|
|
570
|
+
puts opts
|
|
571
|
+
exit
|
|
572
|
+
end
|
|
573
|
+
end.parse!
|
|
574
|
+
|
|
575
|
+
processor = BatchFileProcessor.new
|
|
576
|
+
|
|
577
|
+
if options[:inspect_dlq]
|
|
578
|
+
puts "=== Dead Letter Queue Inspection ==="
|
|
579
|
+
puts
|
|
580
|
+
dlq_entries = processor.inspect_dlq
|
|
581
|
+
|
|
582
|
+
if dlq_entries.empty?
|
|
583
|
+
puts "DLQ is empty"
|
|
584
|
+
else
|
|
585
|
+
dlq_entries.each do |entry|
|
|
586
|
+
puts "File: #{entry[:file]}"
|
|
587
|
+
puts " Error: #{entry[:error]}"
|
|
588
|
+
puts " Moved at: #{entry[:moved_at]}"
|
|
589
|
+
puts " Checksum: #{entry[:checksum]}"
|
|
590
|
+
puts
|
|
591
|
+
end
|
|
592
|
+
end
|
|
593
|
+
exit
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
if options[:retry_dlq]
|
|
597
|
+
processor.retry_dlq_file(options[:retry_dlq])
|
|
598
|
+
exit
|
|
599
|
+
end
|
|
600
|
+
|
|
601
|
+
if ARGV.empty?
|
|
602
|
+
puts "Error: No files specified"
|
|
603
|
+
puts "Usage: file_processor.rb [options] FILES..."
|
|
604
|
+
exit 1
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
processor.add_files(ARGV)
|
|
608
|
+
results = processor.process_all(
|
|
609
|
+
num_workers: options[:workers],
|
|
610
|
+
validate: options[:validate],
|
|
611
|
+
transform: options[:transform]
|
|
612
|
+
)
|
|
613
|
+
|
|
614
|
+
ProcessingReport.generate(results, options[:output])
|
|
615
|
+
end
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
id,name,email
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<records>
|
|
3
|
+
<record>
|
|
4
|
+
<id>1001</id>
|
|
5
|
+
<user_id>1</user_id>
|
|
6
|
+
<product_id>101</product_id>
|
|
7
|
+
<quantity>1</quantity>
|
|
8
|
+
<total>999.99</total>
|
|
9
|
+
</record>
|
|
10
|
+
<record>
|
|
11
|
+
<id>1002</id>
|
|
12
|
+
<user_id>2</user_id>
|
|
13
|
+
<product_id>102</product_id>
|
|
14
|
+
<quantity>2</quantity>
|
|
15
|
+
<total>59.98</total>
|
|
16
|
+
</record>
|
|
17
|
+
<record>
|
|
18
|
+
<id>1003</id>
|
|
19
|
+
<user_id>3</user_id>
|
|
20
|
+
<product_id>103</product_id>
|
|
21
|
+
<quantity>1</quantity>
|
|
22
|
+
<total>79.99</total>
|
|
23
|
+
</record>
|
|
24
|
+
</records>
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"id": "101",
|
|
4
|
+
"name": "Laptop",
|
|
5
|
+
"price": "999.99",
|
|
6
|
+
"stock": "15",
|
|
7
|
+
"available": "true"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"id": "102",
|
|
11
|
+
"name": "Mouse",
|
|
12
|
+
"price": "29.99",
|
|
13
|
+
"stock": "50",
|
|
14
|
+
"available": "true"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": "103",
|
|
18
|
+
"name": "Keyboard",
|
|
19
|
+
"price": "79.99",
|
|
20
|
+
"stock": "30",
|
|
21
|
+
"available": "false"
|
|
22
|
+
}
|
|
23
|
+
]
|