simple_flow 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.envrc +1 -0
  3. data/.github/workflows/deploy-github-pages.yml +52 -0
  4. data/.rubocop.yml +57 -0
  5. data/CHANGELOG.md +4 -0
  6. data/COMMITS.md +196 -0
  7. data/LICENSE +21 -0
  8. data/README.md +481 -0
  9. data/Rakefile +15 -0
  10. data/benchmarks/parallel_vs_sequential.rb +98 -0
  11. data/benchmarks/pipeline_overhead.rb +130 -0
  12. data/docs/api/middleware.md +468 -0
  13. data/docs/api/parallel-step.md +363 -0
  14. data/docs/api/pipeline.md +382 -0
  15. data/docs/api/result.md +375 -0
  16. data/docs/concurrent/best-practices.md +687 -0
  17. data/docs/concurrent/introduction.md +246 -0
  18. data/docs/concurrent/parallel-steps.md +418 -0
  19. data/docs/concurrent/performance.md +481 -0
  20. data/docs/core-concepts/flow-control.md +452 -0
  21. data/docs/core-concepts/middleware.md +389 -0
  22. data/docs/core-concepts/overview.md +219 -0
  23. data/docs/core-concepts/pipeline.md +315 -0
  24. data/docs/core-concepts/result.md +168 -0
  25. data/docs/core-concepts/steps.md +391 -0
  26. data/docs/development/benchmarking.md +443 -0
  27. data/docs/development/contributing.md +380 -0
  28. data/docs/development/dagwood-concepts.md +435 -0
  29. data/docs/development/testing.md +514 -0
  30. data/docs/getting-started/examples.md +197 -0
  31. data/docs/getting-started/installation.md +62 -0
  32. data/docs/getting-started/quick-start.md +218 -0
  33. data/docs/guides/choosing-concurrency-model.md +441 -0
  34. data/docs/guides/complex-workflows.md +440 -0
  35. data/docs/guides/data-fetching.md +478 -0
  36. data/docs/guides/error-handling.md +635 -0
  37. data/docs/guides/file-processing.md +505 -0
  38. data/docs/guides/validation-patterns.md +496 -0
  39. data/docs/index.md +169 -0
  40. data/examples/.gitignore +3 -0
  41. data/examples/01_basic_pipeline.rb +112 -0
  42. data/examples/02_error_handling.rb +178 -0
  43. data/examples/03_middleware.rb +186 -0
  44. data/examples/04_parallel_automatic.rb +221 -0
  45. data/examples/05_parallel_explicit.rb +279 -0
  46. data/examples/06_real_world_ecommerce.rb +288 -0
  47. data/examples/07_real_world_etl.rb +277 -0
  48. data/examples/08_graph_visualization.rb +246 -0
  49. data/examples/09_pipeline_visualization.rb +266 -0
  50. data/examples/10_concurrency_control.rb +235 -0
  51. data/examples/11_sequential_dependencies.rb +243 -0
  52. data/examples/12_none_constant.rb +161 -0
  53. data/examples/README.md +374 -0
  54. data/examples/regression_test/01_basic_pipeline.txt +38 -0
  55. data/examples/regression_test/02_error_handling.txt +92 -0
  56. data/examples/regression_test/03_middleware.txt +61 -0
  57. data/examples/regression_test/04_parallel_automatic.txt +86 -0
  58. data/examples/regression_test/05_parallel_explicit.txt +80 -0
  59. data/examples/regression_test/06_real_world_ecommerce.txt +53 -0
  60. data/examples/regression_test/07_real_world_etl.txt +58 -0
  61. data/examples/regression_test/08_graph_visualization.txt +429 -0
  62. data/examples/regression_test/09_pipeline_visualization.txt +305 -0
  63. data/examples/regression_test/10_concurrency_control.txt +96 -0
  64. data/examples/regression_test/11_sequential_dependencies.txt +86 -0
  65. data/examples/regression_test/12_none_constant.txt +64 -0
  66. data/examples/regression_test.rb +105 -0
  67. data/lib/simple_flow/dependency_graph.rb +120 -0
  68. data/lib/simple_flow/dependency_graph_visualizer.rb +326 -0
  69. data/lib/simple_flow/middleware.rb +36 -0
  70. data/lib/simple_flow/parallel_executor.rb +80 -0
  71. data/lib/simple_flow/pipeline.rb +405 -0
  72. data/lib/simple_flow/result.rb +88 -0
  73. data/lib/simple_flow/step_tracker.rb +58 -0
  74. data/lib/simple_flow/version.rb +5 -0
  75. data/lib/simple_flow.rb +41 -0
  76. data/mkdocs.yml +146 -0
  77. data/pipeline_graph.dot +51 -0
  78. data/pipeline_graph.html +60 -0
  79. data/pipeline_graph.mmd +19 -0
  80. metadata +127 -0
@@ -0,0 +1,505 @@
1
+ # File Processing Guide
2
+
3
+ This guide demonstrates how to process files efficiently using SimpleFlow, including reading, writing, transforming, and validating file content.
4
+
5
+ ## Reading Files
6
+
7
+ ### Basic File Reading
8
+
9
+ ```ruby
10
+ step :read_file, ->(result) {
11
+ begin
12
+ filepath = result.value
13
+ content = File.read(filepath)
14
+ result.with_context(:content, content).continue(filepath)
15
+ rescue Errno::ENOENT
16
+ result.halt.with_error(:file, "File not found: #{filepath}")
17
+ rescue Errno::EACCES
18
+ result.halt.with_error(:file, "Permission denied: #{filepath}")
19
+ end
20
+ }
21
+ ```
22
+
23
+ ### Reading JSON Files
24
+
25
+ ```ruby
26
+ step :read_json, ->(result) {
27
+ begin
28
+ content = File.read(result.value)
29
+ data = JSON.parse(content)
30
+ result.continue(data)
31
+ rescue JSON::ParserError => e
32
+ result.halt.with_error(:parse, "Invalid JSON: #{e.message}")
33
+ end
34
+ }
35
+ ```
36
+
37
+ ### Reading CSV Files
38
+
39
+ ```ruby
40
+ step :read_csv, ->(result) {
41
+ begin
42
+ rows = CSV.read(result.value, headers: true)
43
+ data = rows.map(&:to_h)
44
+ result.continue(data)
45
+ rescue CSV::MalformedCSVError => e
46
+ result.halt.with_error(:parse, "Malformed CSV: #{e.message}")
47
+ end
48
+ }
49
+ ```
50
+
51
+ ### Reading YAML Files
52
+
53
+ ```ruby
54
+ step :read_yaml, ->(result) {
55
+ begin
56
+ data = YAML.load_file(result.value)
57
+ result.continue(data)
58
+ rescue Psych::SyntaxError => e
59
+ result.halt.with_error(:parse, "Invalid YAML: #{e.message}")
60
+ end
61
+ }
62
+ ```
63
+
64
+ ## Writing Files
65
+
66
+ ### Writing Text Files
67
+
68
+ ```ruby
69
+ step :write_file, ->(result) {
70
+ begin
71
+ filepath = result.value[:path]
72
+ content = result.value[:content]
73
+
74
+ File.write(filepath, content)
75
+ result.with_context(:bytes_written, content.bytesize).continue(filepath)
76
+ rescue Errno::EACCES
77
+ result.halt.with_error(:file, "Permission denied: #{filepath}")
78
+ rescue Errno::ENOSPC
79
+ result.halt.with_error(:file, "No space left on device")
80
+ end
81
+ }
82
+ ```
83
+
84
+ ### Writing JSON Files
85
+
86
+ ```ruby
87
+ step :write_json, ->(result) {
88
+ filepath = result.value[:path]
89
+ data = result.value[:data]
90
+
91
+ json_content = JSON.pretty_generate(data)
92
+ File.write(filepath, json_content)
93
+
94
+ result.with_context(:path, filepath).continue(data)
95
+ }
96
+ ```
97
+
98
+ ### Writing CSV Files
99
+
100
+ ```ruby
101
+ step :write_csv, ->(result) {
102
+ filepath = result.value[:path]
103
+ rows = result.value[:rows]
104
+
105
+ CSV.open(filepath, 'w', write_headers: true, headers: rows.first.keys) do |csv|
106
+ rows.each { |row| csv << row.values }
107
+ end
108
+
109
+ result.with_context(:rows_written, rows.size).continue(filepath)
110
+ }
111
+ ```
112
+
113
+ ## Processing Large Files
114
+
115
+ ### Line-by-Line Processing
116
+
117
+ ```ruby
118
+ step :process_large_file, ->(result) {
119
+ filepath = result.value
120
+ processed = 0
121
+
122
+ File.foreach(filepath) do |line|
123
+ process_line(line.strip)
124
+ processed += 1
125
+ end
126
+
127
+ result.with_context(:lines_processed, processed).continue(filepath)
128
+ }
129
+ ```
130
+
131
+ ### Batch Processing
132
+
133
+ ```ruby
134
+ step :process_in_batches, ->(result) {
135
+ filepath = result.value
136
+ batch_size = 1000
137
+ batches_processed = 0
138
+
139
+ File.foreach(filepath).each_slice(batch_size) do |batch|
140
+ # Process batch
141
+ transformed = batch.map { |line| transform(line) }
142
+ save_batch(transformed)
143
+ batches_processed += 1
144
+ end
145
+
146
+ result.with_context(:batches_processed, batches_processed).continue(filepath)
147
+ }
148
+ ```
149
+
150
+ ### Streaming Large Files
151
+
152
+ ```ruby
153
+ step :stream_process, ->(result) {
154
+ input_path = result.value[:input]
155
+ output_path = result.value[:output]
156
+
157
+ File.open(output_path, 'w') do |output|
158
+ File.foreach(input_path) do |line|
159
+ transformed = transform_line(line)
160
+ output.write(transformed)
161
+ end
162
+ end
163
+
164
+ result.continue(output_path)
165
+ }
166
+ ```
167
+
168
+ ## Multi-File Processing
169
+
170
+ ### Processing Multiple Files in Parallel
171
+
172
+ ```ruby
173
+ pipeline = SimpleFlow::Pipeline.new do
174
+ step :process_config, ->(result) {
175
+ config = JSON.parse(File.read('config/app.json'))
176
+ result.with_context(:config, config).continue(result.value)
177
+ }, depends_on: []
178
+
179
+ step :process_users, ->(result) {
180
+ users = CSV.read('data/users.csv', headers: true).map(&:to_h)
181
+ result.with_context(:users, users).continue(result.value)
182
+ }, depends_on: []
183
+
184
+ step :process_logs, ->(result) {
185
+ logs = File.readlines('logs/app.log').map(&:strip)
186
+ result.with_context(:logs, logs).continue(result.value)
187
+ }, depends_on: []
188
+
189
+ step :combine_results, ->(result) {
190
+ {
191
+ config: result.context[:config],
192
+ user_count: result.context[:users].size,
193
+ log_count: result.context[:logs].size
194
+ }
195
+ }, depends_on: [:process_config, :process_users, :process_logs]
196
+ end
197
+
198
+ result = pipeline.call_parallel(SimpleFlow::Result.new(nil))
199
+ ```
200
+
201
+ ### Directory Processing
202
+
203
+ ```ruby
204
+ step :process_directory, ->(result) {
205
+ dir_path = result.value
206
+ processed_files = []
207
+
208
+ Dir.glob(File.join(dir_path, '*.json')).each do |filepath|
209
+ data = JSON.parse(File.read(filepath))
210
+ transformed = transform_data(data)
211
+ processed_files << { file: filepath, records: transformed.size }
212
+ end
213
+
214
+ result.with_context(:processed_files, processed_files).continue(dir_path)
215
+ }
216
+ ```
217
+
218
+ ## Data Transformation
219
+
220
+ ### CSV to JSON Conversion
221
+
222
+ ```ruby
223
+ pipeline = SimpleFlow::Pipeline.new do
224
+ step :read_csv, ->(result) {
225
+ rows = CSV.read(result.value, headers: true)
226
+ result.continue(rows.map(&:to_h))
227
+ }
228
+
229
+ step :transform_data, ->(result) {
230
+ transformed = result.value.map do |row|
231
+ {
232
+ id: row['id'].to_i,
233
+ name: row['name'].strip,
234
+ email: row['email'].downcase,
235
+ active: row['active'] == 'true'
236
+ }
237
+ end
238
+ result.continue(transformed)
239
+ }
240
+
241
+ step :write_json, ->(result) {
242
+ output_path = result.value.first['source'] + '.json'
243
+ File.write(output_path, JSON.pretty_generate(result.value))
244
+ result.continue(output_path)
245
+ }
246
+ end
247
+ ```
248
+
249
+ ### File Format Conversion Pipeline
250
+
251
+ ```ruby
252
+ class FileConverter
253
+ def self.build(input_format:, output_format:)
254
+ SimpleFlow::Pipeline.new do
255
+ step :read_input, reader_for(input_format), depends_on: []
256
+ step :transform, ->(result) {
257
+ # Normalize to common format
258
+ result.continue(normalize_data(result.value))
259
+ }, depends_on: [:read_input]
260
+ step :write_output, writer_for(output_format), depends_on: [:transform]
261
+ end
262
+ end
263
+
264
+ def self.reader_for(format)
265
+ case format
266
+ when :json then ->(result) { JSON.parse(File.read(result.value)) }
267
+ when :csv then ->(result) { CSV.read(result.value, headers: true).map(&:to_h) }
268
+ when :yaml then ->(result) { YAML.load_file(result.value) }
269
+ end
270
+ end
271
+
272
+ def self.writer_for(format)
273
+ case format
274
+ when :json then ->(result) { File.write(result.value[:output], JSON.pretty_generate(result.value[:data])) }
275
+ when :csv then ->(result) { write_csv(result.value[:output], result.value[:data]) }
276
+ when :yaml then ->(result) { File.write(result.value[:output], result.value[:data].to_yaml) }
277
+ end
278
+ end
279
+ end
280
+ ```
281
+
282
+ ## File Validation
283
+
284
+ ### Validating File Existence
285
+
286
+ ```ruby
287
+ step :validate_file_exists, ->(result) {
288
+ filepath = result.value
289
+
290
+ unless File.exist?(filepath)
291
+ return result.halt.with_error(:file, "File does not exist: #{filepath}")
292
+ end
293
+
294
+ unless File.readable?(filepath)
295
+ return result.halt.with_error(:file, "File is not readable: #{filepath}")
296
+ end
297
+
298
+ result.continue(filepath)
299
+ }
300
+ ```
301
+
302
+ ### Validating File Format
303
+
304
+ ```ruby
305
+ step :validate_json_format, ->(result) {
306
+ begin
307
+ content = File.read(result.value)
308
+ JSON.parse(content) # Just validate, don't use result yet
309
+ result.continue(result.value)
310
+ rescue JSON::ParserError => e
311
+ result.halt.with_error(:format, "Invalid JSON file: #{e.message}")
312
+ end
313
+ }
314
+ ```
315
+
316
+ ### Validating File Size
317
+
318
+ ```ruby
319
+ step :validate_file_size, ->(result) {
320
+ filepath = result.value
321
+ max_size = 10 * 1024 * 1024 # 10 MB
322
+
323
+ file_size = File.size(filepath)
324
+
325
+ if file_size > max_size
326
+ result.halt.with_error(:size, "File too large: #{file_size} bytes (max #{max_size})")
327
+ else
328
+ result.with_context(:file_size, file_size).continue(filepath)
329
+ end
330
+ }
331
+ ```
332
+
333
+ ## Complete File Processing Example
334
+
335
+ ```ruby
336
+ class CSVProcessor
337
+ def self.build
338
+ SimpleFlow::Pipeline.new do
339
+ # Validate file
340
+ step :validate_exists, ->(result) {
341
+ filepath = result.value
342
+ unless File.exist?(filepath)
343
+ return result.halt.with_error(:file, "File not found")
344
+ end
345
+ result.continue(filepath)
346
+ }, depends_on: []
347
+
348
+ step :validate_size, ->(result) {
349
+ size = File.size(result.value)
350
+ max_size = 50 * 1024 * 1024 # 50 MB
351
+
352
+ if size > max_size
353
+ return result.halt.with_error(:size, "File too large")
354
+ end
355
+
356
+ result.with_context(:file_size, size).continue(result.value)
357
+ }, depends_on: [:validate_exists]
358
+
359
+ # Read and parse
360
+ step :read_csv, ->(result) {
361
+ rows = CSV.read(result.value, headers: true)
362
+ result.continue(rows.map(&:to_h))
363
+ }, depends_on: [:validate_size]
364
+
365
+ # Validate data
366
+ step :validate_headers, ->(result) {
367
+ required = ['id', 'name', 'email']
368
+ actual = result.value.first.keys
369
+
370
+ missing = required - actual
371
+ if missing.any?
372
+ return result.halt.with_error(:headers, "Missing columns: #{missing.join(', ')}")
373
+ end
374
+
375
+ result.continue(result.value)
376
+ }, depends_on: [:read_csv]
377
+
378
+ # Transform data
379
+ step :clean_data, ->(result) {
380
+ cleaned = result.value.map do |row|
381
+ {
382
+ id: row['id'].to_i,
383
+ name: row['name'].strip.capitalize,
384
+ email: row['email'].downcase.strip
385
+ }
386
+ end
387
+ result.continue(cleaned)
388
+ }, depends_on: [:validate_headers]
389
+
390
+ step :filter_invalid, ->(result) {
391
+ valid = result.value.select do |row|
392
+ row[:email] =~ /\A[\w+\-.]+@[a-z\d\-]+(\.[a-z\d\-]+)*\.[a-z]+\z/i
393
+ end
394
+
395
+ invalid_count = result.value.size - valid.size
396
+ if invalid_count > 0
397
+ result = result.with_context(:invalid_count, invalid_count)
398
+ end
399
+
400
+ result.continue(valid)
401
+ }, depends_on: [:clean_data]
402
+
403
+ # Save results
404
+ step :write_output, ->(result) {
405
+ output = 'output/cleaned.json'
406
+ File.write(output, JSON.pretty_generate(result.value))
407
+
408
+ result
409
+ .with_context(:output_file, output)
410
+ .with_context(:records_written, result.value.size)
411
+ .continue(output)
412
+ }, depends_on: [:filter_invalid]
413
+ end
414
+ end
415
+ end
416
+
417
+ # Usage
418
+ result = CSVProcessor.build.call(
419
+ SimpleFlow::Result.new('data/users.csv')
420
+ )
421
+
422
+ if result.continue?
423
+ puts "Processed successfully:"
424
+ puts " File size: #{result.context[:file_size]} bytes"
425
+ puts " Records written: #{result.context[:records_written]}"
426
+ puts " Invalid records skipped: #{result.context[:invalid_count] || 0}"
427
+ puts " Output: #{result.context[:output_file]}"
428
+ else
429
+ puts "Processing failed:"
430
+ result.errors.each do |category, messages|
431
+ puts " #{category}: #{messages.join(', ')}"
432
+ end
433
+ end
434
+ ```
435
+
436
+ ## Binary File Processing
437
+
438
+ ### Reading Binary Files
439
+
440
+ ```ruby
441
+ step :read_binary, ->(result) {
442
+ filepath = result.value
443
+ content = File.binread(filepath)
444
+
445
+ result
446
+ .with_context(:file_size, content.bytesize)
447
+ .with_context(:encoding, content.encoding.name)
448
+ .continue(content)
449
+ }
450
+ ```
451
+
452
+ ### Processing Images
453
+
454
+ ```ruby
455
+ require 'mini_magick'
456
+
457
+ step :process_image, ->(result) {
458
+ filepath = result.value
459
+
460
+ image = MiniMagick::Image.open(filepath)
461
+
462
+ # Resize if too large
463
+ if image.width > 1920 || image.height > 1080
464
+ image.resize '1920x1080'
465
+ end
466
+
467
+ # Generate thumbnail
468
+ thumbnail = image.clone
469
+ thumbnail.resize '200x200'
470
+
471
+ result
472
+ .with_context(:original_size, [image.width, image.height])
473
+ .with_context(:thumbnail_path, filepath.gsub('.jpg', '_thumb.jpg'))
474
+ .continue(filepath)
475
+ }
476
+ ```
477
+
478
+ ## Temporary Files
479
+
480
+ ### Using Temporary Files
481
+
482
+ ```ruby
483
+ step :use_temp_file, ->(result) {
484
+ require 'tempfile'
485
+
486
+ Tempfile.create(['process', '.json']) do |temp|
487
+ # Write intermediate data
488
+ temp.write(JSON.generate(result.value))
489
+ temp.rewind
490
+
491
+ # Process temp file
492
+ processed = process_file(temp.path)
493
+
494
+ # Temp file automatically deleted when block exits
495
+ result.continue(processed)
496
+ end
497
+ }
498
+ ```
499
+
500
+ ## Related Documentation
501
+
502
+ - [Data Fetching](data-fetching.md) - Fetching data from various sources
503
+ - [Error Handling](error-handling.md) - Error handling strategies
504
+ - [Complex Workflows](complex-workflows.md) - Building complete pipelines
505
+ - [Performance Guide](../concurrent/performance.md) - File processing optimization