csvops 0.4.0.alpha → 0.5.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +15 -9
  3. data/docs/architecture.md +148 -18
  4. data/docs/release-v0.5.0-alpha.md +89 -0
  5. data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +17 -14
  6. data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
  7. data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
  8. data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
  9. data/lib/csvtool/cli.rb +6 -6
  10. data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
  11. data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
  12. data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
  13. data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
  14. data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
  15. data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
  16. data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
  17. data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
  18. data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
  19. data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
  20. data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
  21. data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
  22. data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
  23. data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
  24. data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
  25. data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
  26. data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
  27. data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +48 -125
  28. data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
  29. data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
  30. data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
  31. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
  32. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
  33. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
  34. data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
  35. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
  36. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
  37. data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
  38. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
  39. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
  40. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
  41. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
  42. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
  43. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
  44. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
  45. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
  46. data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
  47. data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
  48. data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
  49. data/lib/csvtool/version.rb +1 -1
  50. data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
  51. data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +28 -0
  52. data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
  53. data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
  54. data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
  55. data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
  56. data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
  57. data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
  58. data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
  59. data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
  60. data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
  61. data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
  62. data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
  63. data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
  64. data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
  65. data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
  66. data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
  67. data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
  68. data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
  69. data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
  70. data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
  71. data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
  72. data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
  73. data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
  74. data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
  75. data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
  76. data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
  77. data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
  78. data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
  79. data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
  80. data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
  81. data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
  82. data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
  83. data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
  84. metadata +60 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c5a0c00272c2d10751b234384ac50ee8caa90681860906419ccdec7a6e3c110
4
- data.tar.gz: 849d377bec9acd507c0fd37a75e823bb9458295e12a31a5000b9ba599084092d
3
+ metadata.gz: b96fb7e03fa0629d3412a97d3abff5414492ac46ad08ede2c872e2176fcbfc62
4
+ data.tar.gz: 856b7735a472b5810d5f19dff6371a565a7fcc538ce5b6eba52260fff0028760
5
5
  SHA512:
6
- metadata.gz: ba96ce18b4e6d2fd8eb018f406c17e7b810010a788a6be1acb51a714b87dad614d822edb97f780e2c745e257bbc68c89266427876fcc4b3fee57fadb29232630
7
- data.tar.gz: 378c4a47b96cf210b28f689d9ef0aa1056c95777d3128d4044d8462cf802eeb53ca148062f4244182105e4b760bbf7dcf48d19705f69f18dfb78e3fc2e935413
6
+ metadata.gz: 5f643d331c6b54cb5feb0fe5db4ff7f8f7bc5c28461f74e3bfca5cf93d25703b84f497e72377302874b2b6302ef0fb542995c72d2d21798e3a998f6d5b294704
7
+ data.tar.gz: 0e254fa75780ce0605054c24b28301d8786535a0f2bbff7adfb45a75f09e60e5315e950648208fa5772d08cdd6abce95ea382838f568947af05ceaa77ba1888f
data/README.md CHANGED
@@ -45,14 +45,20 @@ Select `1` for column extraction, `2` for row-range extraction, `3` for row rand
45
45
 
46
46
  ### 3. Follow prompts
47
47
 
48
- Prompt flow:
48
+ Each menu action runs through a dedicated CLI workflow (`interface/cli/workflows/*`) that handles prompts/output and delegates execution to an interface-agnostic application use case.
49
49
 
50
- - CSV file path
51
- - Separator (`comma`, `tab`, `semicolon`, `pipe`, or `custom`)
52
- - Optional header filter + column selection
53
- - Skip blanks (`Y/n`, default `Y`)
54
- - Preview + confirmation
55
- - Output destination (`console` or `file`)
50
+ Workflow internals are split into small composable parts:
51
+
52
+ - `workflows/builders/*` for session construction
53
+ - `workflows/support/*` for shared mapping/dispatch utilities
54
+ - `workflows/presenters/*` for output formatting and summaries
55
+
56
+ Prompt flow by action:
57
+
58
+ - `Extract column`: file path, separator, optional header filter + column select, skip blanks, preview/confirm, output destination.
59
+ - `Extract rows (range)`: file path, separator, start row, end row, output destination.
60
+ - `Randomize rows`: file path, separator, headers present, optional seed, output destination.
61
+ - `Dedupe using another CSV`: source/reference files, separators, header modes, key selectors, match options, output destination.
56
62
 
57
63
  ### 4. Example interaction (console output)
58
64
 
@@ -176,7 +182,7 @@ bundle exec rake test
176
182
 
177
183
  ## Alpha release
178
184
 
179
- Current prerelease version: `0.4.0.alpha`
185
+ Current prerelease version: `0.5.0.alpha`
180
186
 
181
187
  Install prerelease from RubyGems:
182
188
 
@@ -186,7 +192,7 @@ gem install csvops --pre
186
192
 
187
193
  Release runbook:
188
194
 
189
- - `docs/release-v0.4.0-alpha.md`
195
+ - `docs/release-v0.5.0-alpha.md`
190
196
 
191
197
 
192
198
  ## Architecture
data/docs/architecture.md CHANGED
@@ -8,6 +8,103 @@ The codebase follows a DDD-lite layered structure:
8
8
  - `interface/cli/`: menu, prompts, workflows, and user-facing error presentation.
9
9
  - `Csvtool::CLI`: entrypoint wiring from command args to interface/application flow.
10
10
 
11
+ ## Workflow boundary (standardized)
12
+
13
+ For all interactive domains (`Column Extraction`, `Row Extraction`, `Row Randomization`, `Cross-CSV Dedupe`), the boundary is:
14
+
15
+ - `interface/cli/workflows/*`: owns prompts, stdout rendering, and user-facing error presentation.
16
+ - `interface/cli/workflows/builders/*`: builds domain sessions/aggregates from prompt results.
17
+ - `interface/cli/workflows/support/*`: shared workflow utilities (error routing, output destination mapping).
18
+ - `interface/cli/workflows/presenters/*`: workflow-level output/summary rendering.
19
+ - `interface/cli/workflows/steps/*`: optional step-pipeline units for complex workflow orchestration.
20
+ - `application/use_cases/*`: interface-agnostic orchestration with request/result style contracts.
21
+ - `domain/*`: invariants and domain policies.
22
+ - `infrastructure/*`: CSV mechanics and output adapters.
23
+
24
+ Write-boundary rule:
25
+ - Use cases coordinate write paths but do not perform direct file writes.
26
+ - Direct write APIs (`CSV.open`, writable `File.open`, `File.write`, `IO.write`) are infrastructure-only.
27
+ - File output behavior is implemented in `infrastructure/output/*` writer adapters.
28
+
29
+ Current usage:
30
+
31
+ - `RunExtractionWorkflow` uses `WorkflowStepPipeline` + `Steps::Extraction::*`.
32
+ - `RunRowExtractionWorkflow` uses `WorkflowStepPipeline` + `Steps::RowExtraction::*`.
33
+ - `RunRowRandomizationWorkflow` uses `WorkflowStepPipeline` + `Steps::RowRandomization::*`.
34
+ - `RunCrossCsvDedupeWorkflow` uses `WorkflowStepPipeline` + `Steps::CrossCsvDedupe::*`.
35
+
36
+ ## Adding New Concepts
37
+
38
+ Use this checklist when introducing a new capability (for example: a new transformation function, validator, comparer, or exporter).
39
+
40
+ ### 1) Classify the concept first
41
+
42
+ - `Workflow concept`: interactive flow and prompt sequence.
43
+ - `Domain concept`: business rule/invariant and core vocabulary.
44
+ - `Application concept`: use-case orchestration and request/result contract.
45
+ - `Infrastructure concept`: file/CSV mechanics, streaming, persistence, or external IO.
46
+
47
+ If it does not clearly fit one layer, split it until each part has one responsibility.
48
+
49
+ ### 2) Add the feature vertically (thin slice)
50
+
51
+ Implement in this order:
52
+
53
+ 1. `interface/cli/workflows/*`: new workflow entry or new branch in an existing workflow.
54
+ 2. `interface/cli/prompts/*`: prompts for user inputs.
55
+ 3. `interface/cli/workflows/builders/*`: build domain session/request objects.
56
+ 4. `application/use_cases/*`: interface-agnostic use case with `Result` success/failure.
57
+ 5. `domain/*`: new entities/value objects/aggregate changes for invariants.
58
+ 6. `infrastructure/*`: adapters needed by the use case.
59
+ 7. `interface/cli/workflows/presenters/*`: output and summaries.
60
+
61
+ Keep each step testable on its own before moving to the next.
62
+
63
+ ### 3) Function type patterns
64
+
65
+ For a new function type, prefer one of these patterns:
66
+
67
+ - `Transform` (changes output rows/values):
68
+ - Domain: transformation options/value objects.
69
+ - Application: orchestrate transform over streamed rows.
70
+ - Infrastructure: stream reader/writer implementation.
71
+ - `Validate` (checks and reports findings):
72
+ - Domain: validation policy and finding model.
73
+ - Application: run checks and return findings in result data.
74
+ - Presenter: format findings and summary.
75
+ - `Compare` (source vs reference logic):
76
+ - Domain: mapping/selectors/match options.
77
+ - Application: compare strategy and stats.
78
+ - Infrastructure: dual-source readers and selector helpers.
79
+ - `Export` (destination-focused):
80
+ - Domain: output destination value object.
81
+ - Application: orchestrate write path only.
82
+ - Infrastructure: writer adapter.
83
+
84
+ ### 4) Required boundaries and rules
85
+
86
+ - Workflows do not contain business rules.
87
+ - Use cases do not prompt or print.
88
+ - Domain does not depend on interface or infrastructure.
89
+ - Infrastructure does not own workflow decisions.
90
+ - Shared workflow helpers belong under `workflows/support/*`.
91
+ - Reusable construction logic belongs under `workflows/builders/*`.
92
+ - Rendering/summary formatting belongs under `workflows/presenters/*`.
93
+
94
+ ### 5) Minimum tests for each new concept
95
+
96
+ - Prompt tests for each new prompt class.
97
+ - Builder tests for each new builder class.
98
+ - Use-case tests for request/result behavior.
99
+ - Workflow behavior tests for prompt + output integration.
100
+ - One end-to-end CLI test for the happy path.
101
+
102
+ ### 6) Naming and structure guidance
103
+
104
+ - Prefer domain-first names (`RowRange`, `ColumnSelection`, `MatchOptions`) over technical names.
105
+ - Use `Run<Concept>` for use cases and `Run<Concept>Workflow` for workflows.
106
+ - Keep one file per class and mirror structure under `test/csvtool/...`.
107
+
11
108
  ## Domain model
12
109
 
13
110
  Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, and `Cross-CSV Dedupe`.
@@ -37,7 +134,7 @@ Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, an
37
134
  - `ExtractionValue`
38
135
  - Shared `OutputDestination` (`console` or `file(path)`)
39
136
  - Application service:
40
- - `Application::UseCases::RunExtraction` orchestrates one extraction request.
137
+ - `Application::UseCases::RunExtraction` is interface-agnostic and exposes request/result operations.
41
138
  - Infrastructure adapters:
42
139
  - `Infrastructure::CSV::HeaderReader`
43
140
  - `Infrastructure::CSV::ValueStreamer`
@@ -45,6 +142,12 @@ Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, an
45
142
  - `Infrastructure::Output::CsvFileWriter`
46
143
  - Interface adapters:
47
144
  - `Interface::CLI::MenuLoop`
145
+ - `Interface::CLI::Workflows::RunExtractionWorkflow`
146
+ - `Interface::CLI::Workflows::Builders::ColumnSessionBuilder`
147
+ - `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
148
+ - `Interface::CLI::Workflows::Steps::Extraction::*`
149
+ - `Interface::CLI::Workflows::Presenters::ColumnExtractionPresenter`
150
+ - `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
48
151
  - `Interface::CLI::Prompts::*`
49
152
  - `Interface::CLI::Errors::Presenter`
50
153
 
@@ -52,6 +155,7 @@ Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, an
52
155
  classDiagram
53
156
  direction LR
54
157
  class MenuLoop
158
+ class RunExtractionWorkflow
55
159
  class Prompts
56
160
  class Errors
57
161
  class RunExtraction
@@ -64,12 +168,12 @@ classDiagram
64
168
  class OutputDestination
65
169
  class HeaderReader
66
170
  class ValueStreamer
67
- class ConsoleWriter
68
171
  class CsvFileWriter
69
172
 
70
- MenuLoop --> RunExtraction : invokes
71
- Prompts --> RunExtraction : provides input
72
- RunExtraction --> Errors : reports failures
173
+ MenuLoop --> RunExtractionWorkflow : invokes
174
+ RunExtractionWorkflow --> Prompts : uses
175
+ RunExtractionWorkflow --> Errors : reports failures
176
+ RunExtractionWorkflow --> RunExtraction : calls
73
177
  RunExtraction --> ColumnSession : orchestrates
74
178
  ColumnSession o-- CsvSource
75
179
  ColumnSession o-- ColumnSelection
@@ -79,7 +183,6 @@ classDiagram
79
183
  ColumnSession o-- OutputDestination
80
184
  RunExtraction --> HeaderReader
81
185
  RunExtraction --> ValueStreamer
82
- RunExtraction --> ConsoleWriter
83
186
  RunExtraction --> CsvFileWriter
84
187
  ```
85
188
 
@@ -96,14 +199,19 @@ Core DDD structure:
96
199
  - `RowRange` (`start_row`, `end_row`) plus row-range validation errors
97
200
  - Shared `OutputDestination` (`console` or `file(path)`)
98
201
  - Application service:
99
- - `Application::UseCases::RunRowExtraction` orchestrates row-range extraction.
202
+ - `Application::UseCases::RunRowExtraction` is interface-agnostic and exposes request/result operations.
100
203
  - Infrastructure adapters:
101
204
  - `Infrastructure::CSV::HeaderReader`
102
205
  - `Infrastructure::CSV::RowStreamer`
103
- - `Infrastructure::Output::CsvRowConsoleWriter`
104
206
  - `Infrastructure::Output::CsvRowFileWriter`
105
207
  - Interface adapters:
106
208
  - `Interface::CLI::MenuLoop`
209
+ - `Interface::CLI::Workflows::RunRowExtractionWorkflow`
210
+ - `Interface::CLI::Workflows::Builders::RowExtractionSessionBuilder`
211
+ - `Interface::CLI::Workflows::Presenters::RowExtractionPresenter`
212
+ - `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
213
+ - `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
214
+ - `Interface::CLI::Workflows::Steps::RowExtraction::*`
107
215
  - `Interface::CLI::Prompts::*`
108
216
  - `Interface::CLI::Errors::Presenter`
109
217
 
@@ -111,6 +219,7 @@ Core DDD structure:
111
219
  classDiagram
112
220
  direction LR
113
221
  class MenuLoop
222
+ class RunRowExtractionWorkflow
114
223
  class Prompts
115
224
  class Errors
116
225
  class RunRowExtraction
@@ -120,19 +229,17 @@ classDiagram
120
229
  class OutputDestination
121
230
  class HeaderReader
122
231
  class RowStreamer
123
- class CsvRowConsoleWriter
124
232
  class CsvRowFileWriter
125
-
126
- MenuLoop --> RunRowExtraction : invokes
127
- Prompts --> RunRowExtraction : provides input
128
- RunRowExtraction --> Errors : reports failures
233
+ MenuLoop --> RunRowExtractionWorkflow : invokes
234
+ RunRowExtractionWorkflow --> Prompts : uses
235
+ RunRowExtractionWorkflow --> Errors : reports failures
236
+ RunRowExtractionWorkflow --> RunRowExtraction : calls
129
237
  RunRowExtraction --> RowSession : orchestrates
130
238
  RowSession o-- RowSource
131
239
  RowSession o-- RowRange
132
240
  RowSession o-- OutputDestination
133
241
  RunRowExtraction --> HeaderReader
134
242
  RunRowExtraction --> RowStreamer
135
- RunRowExtraction --> CsvRowConsoleWriter
136
243
  RunRowExtraction --> CsvRowFileWriter
137
244
  ```
138
245
 
@@ -148,12 +255,19 @@ Core DDD structure:
148
255
  - `RandomizationOptions` (optional deterministic `seed`)
149
256
  - Shared `OutputDestination` (`console` or `file(path)`)
150
257
  - Application service:
151
- - `Application::UseCases::RunRowRandomization` orchestrates row randomization.
258
+ - `Application::UseCases::RunRowRandomization` is interface-agnostic and exposes request/result operations.
152
259
  - Infrastructure adapters:
153
260
  - `Infrastructure::CSV::HeaderReader`
154
261
  - `Infrastructure::CSV::RowRandomizer` (external chunked `RAND + sort` + merge)
262
+ - `Infrastructure::Output::CsvRandomizedRowFileWriter`
155
263
  - Interface adapters:
156
264
  - `Interface::CLI::MenuLoop`
265
+ - `Interface::CLI::Workflows::RunRowRandomizationWorkflow`
266
+ - `Interface::CLI::Workflows::Builders::RowRandomizationSessionBuilder`
267
+ - `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
268
+ - `Interface::CLI::Workflows::Steps::RowRandomization::*`
269
+ - `Interface::CLI::Workflows::Presenters::RowRandomizationPresenter`
270
+ - `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
157
271
  - `Interface::CLI::Prompts::*`
158
272
  - `Interface::CLI::Errors::Presenter`
159
273
 
@@ -161,6 +275,7 @@ Core DDD structure:
161
275
  classDiagram
162
276
  direction LR
163
277
  class MenuLoop
278
+ class RunRowRandomizationWorkflow
164
279
  class Prompts
165
280
  class Errors
166
281
  class RunRowRandomization
@@ -170,16 +285,19 @@ classDiagram
170
285
  class OutputDestination
171
286
  class HeaderReader
172
287
  class RowRandomizer
288
+ class CsvRandomizedRowFileWriter
173
289
 
174
- MenuLoop --> RunRowRandomization : invokes
175
- Prompts --> RunRowRandomization : provides input
176
- RunRowRandomization --> Errors : reports failures
290
+ MenuLoop --> RunRowRandomizationWorkflow : invokes
291
+ RunRowRandomizationWorkflow --> Prompts : uses
292
+ RunRowRandomizationWorkflow --> Errors : reports failures
293
+ RunRowRandomizationWorkflow --> RunRowRandomization : calls
177
294
  RunRowRandomization --> RandomizationSession : orchestrates
178
295
  RandomizationSession o-- RandomizationSource
179
296
  RandomizationSession o-- RandomizationOptions
180
297
  RandomizationSession o-- OutputDestination
181
298
  RunRowRandomization --> HeaderReader
182
299
  RunRowRandomization --> RowRandomizer
300
+ RunRowRandomization --> CsvRandomizedRowFileWriter
183
301
  ```
184
302
 
185
303
  ### Cross-CSV Dedupe
@@ -201,9 +319,15 @@ Core DDD structure:
201
319
  - `Infrastructure::CSV::HeaderReader`
202
320
  - `Infrastructure::CSV::SelectorValidator`
203
321
  - `Infrastructure::CSV::CrossCsvDeduper` (streams source rows while checking membership against reference key set)
322
+ - `Infrastructure::Output::CsvCrossCsvDedupeFileWriter`
204
323
  - Interface adapters:
205
324
  - `Interface::CLI::MenuLoop`
206
325
  - `Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow`
326
+ - `Interface::CLI::Workflows::Builders::CrossCsvDedupeSessionBuilder`
327
+ - `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
328
+ - `Interface::CLI::Workflows::Steps::CrossCsvDedupe::*`
329
+ - `Interface::CLI::Workflows::Presenters::CrossCsvDedupePresenter`
330
+ - `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
207
331
  - `Interface::CLI::Prompts::*`
208
332
  - `Interface::CLI::Errors::Presenter`
209
333
 
@@ -224,6 +348,7 @@ classDiagram
224
348
  class HeaderReader
225
349
  class SelectorValidator
226
350
  class CrossCsvDeduper
351
+ class CsvCrossCsvDedupeFileWriter
227
352
 
228
353
  MenuLoop --> RunCrossCsvDedupeWorkflow : invokes
229
354
  Prompts --> RunCrossCsvDedupeWorkflow : provides input
@@ -238,6 +363,7 @@ classDiagram
238
363
  RunCrossCsvDedupe --> HeaderReader
239
364
  RunCrossCsvDedupe --> SelectorValidator
240
365
  RunCrossCsvDedupe --> CrossCsvDeduper
366
+ RunCrossCsvDedupe --> CsvCrossCsvDedupeFileWriter
241
367
  ```
242
368
 
243
369
  ## Project layout
@@ -258,6 +384,10 @@ lib/csvtool/infrastructure/csv/*
258
384
  lib/csvtool/infrastructure/output/*
259
385
  lib/csvtool/interface/cli/menu_loop.rb
260
386
  lib/csvtool/interface/cli/workflows/*
387
+ lib/csvtool/interface/cli/workflows/builders/*
388
+ lib/csvtool/interface/cli/workflows/support/*
389
+ lib/csvtool/interface/cli/workflows/presenters/*
390
+ lib/csvtool/interface/cli/workflows/steps/*
261
391
  lib/csvtool/interface/cli/prompts/*
262
392
  lib/csvtool/interface/cli/errors/presenter.rb
263
393
  test/csvtool/cli_test.rb # end-to-end workflow tests
@@ -0,0 +1,89 @@
1
+ # Release Checklist: v0.5.0-alpha
2
+
3
+ ## 1. Verify environment
4
+
5
+ ```bash
6
+ ruby -v
7
+ bundle -v
8
+ ```
9
+
10
+ Expected:
11
+ - Ruby `3.3.x`
12
+
13
+ ## 2. Install dependencies
14
+
15
+ ```bash
16
+ bundle install
17
+ ```
18
+
19
+ ## 3. Run quality checks
20
+
21
+ ```bash
22
+ bundle exec rake test
23
+ ```
24
+
25
+ ## 4. Smoke test CLI commands
26
+
27
+ ```bash
28
+ bundle exec csvtool menu
29
+ bundle exec csvtool column test/fixtures/sample_people.csv name
30
+ ```
31
+
32
+ ## 5. Smoke test workflows
33
+
34
+ ### Row extraction workflow
35
+
36
+ Use menu option `2` (`Extract rows (range)`) and verify:
37
+ - headered CSV rows print correctly in console mode
38
+ - out-of-bounds row range shows friendly message
39
+ - file output mode writes expected CSV rows
40
+
41
+ ### Row randomization workflow
42
+
43
+ Use menu option `3` (`Randomize rows`) and verify:
44
+ - seeded mode is reproducible
45
+ - headered and headerless modes both work
46
+ - file output path writes valid randomized CSV
47
+
48
+ ### Cross-CSV dedupe workflow
49
+
50
+ Use menu option `4` (`Dedupe using another CSV`) and verify:
51
+ - expected retained rows for headered source/reference files
52
+ - separator/header-mode combinations still work
53
+ - file output mode writes expected deduped CSV
54
+
55
+ ## 6. Build and validate gem package
56
+
57
+ ```bash
58
+ gem build csvops.gemspec
59
+ gem install ./csvops-0.5.0.alpha.gem
60
+ csvtool menu
61
+ ```
62
+
63
+ ## 7. Commit release prep
64
+
65
+ ```bash
66
+ git add -A
67
+ git commit -m "chore(release): prepare v0.5.0-alpha"
68
+ ```
69
+
70
+ ## 8. Tag release
71
+
72
+ ```bash
73
+ git tag -a v0.5.0-alpha -m "v0.5.0-alpha"
74
+ git push origin main --tags
75
+ ```
76
+
77
+ ## 9. Publish gem (optional for alpha)
78
+
79
+ ```bash
80
+ gem push csvops-0.5.0.alpha.gem
81
+ ```
82
+
83
+ ## 10. Create GitHub release
84
+
85
+ Create release `v0.5.0-alpha` with:
86
+ - Use-case file-write boundary cleanup across all workflows
87
+ - New infrastructure file-writer adapters for row randomization and cross-CSV dedupe
88
+ - Final architecture boundary audit with guard test for direct write APIs in use cases
89
+ - Updated architecture diagrams to reflect current writer adapter dependencies
@@ -4,6 +4,7 @@ require "csv"
4
4
  require "csvtool/infrastructure/csv/header_reader"
5
5
  require "csvtool/infrastructure/csv/cross_csv_deduper"
6
6
  require "csvtool/infrastructure/csv/selector_validator"
7
+ require "csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer"
7
8
 
8
9
  module Csvtool
9
10
  module Application
@@ -18,11 +19,15 @@ module Csvtool
18
19
  def initialize(
19
20
  header_reader: Infrastructure::CSV::HeaderReader.new,
20
21
  deduper: Infrastructure::CSV::CrossCsvDeduper.new,
21
- selector_validator: Infrastructure::CSV::SelectorValidator.new(header_reader: header_reader)
22
+ selector_validator: Infrastructure::CSV::SelectorValidator.new(header_reader: header_reader),
23
+ csv_cross_csv_dedupe_file_writer: nil
22
24
  )
23
25
  @header_reader = header_reader
24
26
  @deduper = deduper
25
27
  @selector_validator = selector_validator
28
+ @csv_cross_csv_dedupe_file_writer = csv_cross_csv_dedupe_file_writer || Infrastructure::Output::CsvCrossCsvDedupeFileWriter.new(
29
+ deduper: @deduper
30
+ )
26
31
  end
27
32
 
28
33
  def call(session:, on_header: nil, on_row: nil)
@@ -46,26 +51,24 @@ module Csvtool
46
51
  end
47
52
  rescue CSV::MalformedCSVError
48
53
  failure(:could_not_parse_csv)
49
- rescue Errno::EACCES
50
- failure(:cannot_read_file, path: current_read_path || session.source.path)
54
+ rescue Errno::EACCES, Errno::ENOENT => e
55
+ if session.output_destination.file?
56
+ failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class)
57
+ else
58
+ failure(:cannot_read_file, path: current_read_path || session.source.path)
59
+ end
51
60
  end
52
61
 
53
62
  private
54
63
 
55
64
  def write_file(session:, source_headers:)
56
- stats = nil
57
- ::CSV.open(
58
- session.output_destination.path,
59
- "w",
60
- write_headers: !source_headers.nil?,
65
+ stats = @csv_cross_csv_dedupe_file_writer.call(
66
+ path: session.output_destination.path,
61
67
  headers: source_headers,
62
- col_sep: session.source.separator
63
- ) do |csv|
64
- stats = @deduper.each_retained(**dedupe_options(session)) { |fields| csv << fields }
65
- end
68
+ col_sep: session.source.separator,
69
+ dedupe_options: dedupe_options(session)
70
+ )
66
71
  success(stats: stats, output_path: session.output_destination.path)
67
- rescue Errno::EACCES, Errno::ENOENT => e
68
- failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class)
69
72
  end
70
73
 
71
74
  def dedupe_options(session)