csvops 0.4.0.alpha → 0.6.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +60 -12
  3. data/docs/architecture.md +208 -21
  4. data/docs/release-v0.5.0-alpha.md +89 -0
  5. data/docs/release-v0.6.0-alpha.md +84 -0
  6. data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +17 -14
  7. data/lib/csvtool/application/use_cases/run_csv_parity.rb +70 -0
  8. data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
  9. data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
  10. data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
  11. data/lib/csvtool/cli.rb +11 -7
  12. data/lib/csvtool/domain/csv_parity_session/parity_options.rb +22 -0
  13. data/lib/csvtool/domain/csv_parity_session/parity_session.rb +20 -0
  14. data/lib/csvtool/domain/csv_parity_session/source_pair.rb +19 -0
  15. data/lib/csvtool/infrastructure/csv/csv_parity_comparator.rb +71 -0
  16. data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
  17. data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
  18. data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
  19. data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
  20. data/lib/csvtool/interface/cli/errors/presenter.rb +4 -0
  21. data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
  22. data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
  23. data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
  24. data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
  25. data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
  26. data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
  27. data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
  28. data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
  29. data/lib/csvtool/interface/cli/workflows/builders/csv_parity_session_builder.rb +33 -0
  30. data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
  31. data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
  32. data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
  33. data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
  34. data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +38 -0
  35. data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
  36. data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
  37. data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +48 -125
  38. data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +66 -0
  39. data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
  40. data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
  41. data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
  42. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
  43. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
  44. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
  45. data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
  46. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
  47. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
  48. data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
  49. data/lib/csvtool/interface/cli/workflows/steps/parity/build_session_step.rb +25 -0
  50. data/lib/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step.rb +32 -0
  51. data/lib/csvtool/interface/cli/workflows/steps/parity/execute_step.rb +26 -0
  52. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
  53. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
  54. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
  55. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
  56. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
  57. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
  58. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
  59. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
  60. data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
  61. data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
  62. data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
  63. data/lib/csvtool/version.rb +1 -1
  64. data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
  65. data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +28 -0
  66. data/test/csvtool/application/use_cases/run_csv_parity_test.rb +160 -0
  67. data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
  68. data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
  69. data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
  70. data/test/csvtool/cli_test.rb +175 -21
  71. data/test/csvtool/cli_unit_test.rb +4 -4
  72. data/test/csvtool/domain/csv_parity_session/parity_options_test.rb +17 -0
  73. data/test/csvtool/domain/csv_parity_session/parity_session_test.rb +18 -0
  74. data/test/csvtool/domain/csv_parity_session/source_pair_test.rb +11 -0
  75. data/test/csvtool/infrastructure/csv/csv_parity_comparator_test.rb +78 -0
  76. data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
  77. data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
  78. data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
  79. data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
  80. data/test/csvtool/interface/cli/errors/presenter_test.rb +2 -0
  81. data/test/csvtool/interface/cli/menu_loop_test.rb +59 -16
  82. data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
  83. data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
  84. data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
  85. data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
  86. data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
  87. data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
  88. data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
  89. data/test/csvtool/interface/cli/workflows/builders/csv_parity_session_builder_test.rb +20 -0
  90. data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
  91. data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
  92. data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
  93. data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
  94. data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +43 -0
  95. data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
  96. data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
  97. data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +94 -0
  98. data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
  99. data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
  100. data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
  101. data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
  102. data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
  103. data/test/csvtool/interface/cli/workflows/steps/parity/build_session_step_test.rb +41 -0
  104. data/test/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step_test.rb +30 -0
  105. data/test/csvtool/interface/cli/workflows/steps/parity/execute_step_test.rb +40 -0
  106. data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
  107. data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
  108. data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
  109. data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
  110. data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
  111. data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
  112. data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
  113. data/test/fixtures/parity_duplicates_left.csv +4 -0
  114. data/test/fixtures/parity_duplicates_right.csv +3 -0
  115. data/test/fixtures/parity_people_header_mismatch.csv +4 -0
  116. data/test/fixtures/parity_people_many_reordered.csv +13 -0
  117. data/test/fixtures/parity_people_mismatch.csv +4 -0
  118. data/test/fixtures/parity_people_reordered.csv +4 -0
  119. data/test/fixtures/parity_people_reordered.tsv +4 -0
  120. metadata +90 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c5a0c00272c2d10751b234384ac50ee8caa90681860906419ccdec7a6e3c110
4
- data.tar.gz: 849d377bec9acd507c0fd37a75e823bb9458295e12a31a5000b9ba599084092d
3
+ metadata.gz: f7db22cb84c1d08c58b473368f9ad37575a217d6293539309277ed2b032a2852
4
+ data.tar.gz: 124bebc822fefa5d1f71286701959876260c82164067c36ff94b712a0b4cc1b3
5
5
  SHA512:
6
- metadata.gz: ba96ce18b4e6d2fd8eb018f406c17e7b810010a788a6be1acb51a714b87dad614d822edb97f780e2c745e257bbc68c89266427876fcc4b3fee57fadb29232630
7
- data.tar.gz: 378c4a47b96cf210b28f689d9ef0aa1056c95777d3128d4044d8462cf802eeb53ca148062f4244182105e4b760bbf7dcf48d19705f69f18dfb78e3fc2e935413
6
+ metadata.gz: a8b8dbcfb66073f46f0ecc625267081fbe730e69ef9295f5d2303af6b831a9d71ef564f78f5b44212eb33c4ad7a5fdb78b54fa98e21dd58669e9494a5d3325fb
7
+ data.tar.gz: 05cbcaa2ca3116ad463413e53600d32a53df0941ceb8873ed22c2ef2d4cfe1afc8f90e44c7ff4400212ebbd5083a2ceb6a983281291436e9478d5087cc98b9ad
data/README.md CHANGED
@@ -37,22 +37,30 @@ CSV Tool Menu
37
37
  2. Extract rows (range)
38
38
  3. Randomize rows
39
39
  4. Dedupe using another CSV
40
- 5. Exit
40
+ 5. Validate parity
41
+ 6. Exit
41
42
  >
42
43
  ```
43
44
 
44
- Select `1` for column extraction, `2` for row-range extraction, `3` for row randomization, or `4` for cross-CSV dedupe.
45
+ Select `1` for column extraction, `2` for row-range extraction, `3` for row randomization, `4` for cross-CSV dedupe, or `5` for parity validation.
45
46
 
46
47
  ### 3. Follow prompts
47
48
 
48
- Prompt flow:
49
+ Each menu action runs through a dedicated CLI workflow (`interface/cli/workflows/*`) that handles prompts/output and delegates execution to an interface-agnostic application use case.
49
50
 
50
- - CSV file path
51
- - Separator (`comma`, `tab`, `semicolon`, `pipe`, or `custom`)
52
- - Optional header filter + column selection
53
- - Skip blanks (`Y/n`, default `Y`)
54
- - Preview + confirmation
55
- - Output destination (`console` or `file`)
51
+ Workflow internals are split into small composable parts:
52
+
53
+ - `workflows/builders/*` for session construction
54
+ - `workflows/support/*` for shared mapping/dispatch utilities
55
+ - `workflows/presenters/*` for output formatting and summaries
56
+
57
+ Prompt flow by action:
58
+
59
+ - `Extract column`: file path, separator, optional header filter + column select, skip blanks, preview/confirm, output destination.
60
+ - `Extract rows (range)`: file path, separator, start row, end row, output destination.
61
+ - `Randomize rows`: file path, separator, headers present, optional seed, output destination.
62
+ - `Dedupe using another CSV`: source/reference files, separators, header modes, key selectors, match options, output destination.
63
+ - `Validate parity`: left/right files, separator, header mode, parity summary, mismatch samples.
56
64
 
57
65
  ### 4. Example interaction (console output)
58
66
 
@@ -123,7 +131,8 @@ Legend: ` ` = prompt/menu, `+` = user input, `-` = tool output
123
131
  2. Extract rows (range)
124
132
  3. Randomize rows
125
133
  4. Dedupe using another CSV
126
- 5. Exit
134
+ 5. Validate parity
135
+ 6. Exit
127
136
  +> 4
128
137
  CSV file path: /tmp/source.csv
129
138
  Source CSV separator:
@@ -160,6 +169,45 @@ Legend: ` ` = prompt/menu, `+` = user input, `-` = tool output
160
169
  -Summary: source_rows=5 removed_rows=3 kept_rows=2
161
170
  ```
162
171
 
172
+ ### 8. Parity interaction example
173
+
174
+ Legend: ` ` = prompt/menu, `+` = user input, `-` = tool output
175
+
176
+ ```diff
177
+ CSV Tool Menu
178
+ 1. Extract column
179
+ 2. Extract rows (range)
180
+ 3. Randomize rows
181
+ 4. Dedupe using another CSV
182
+ 5. Validate parity
183
+ 6. Exit
184
+ +> 5
185
+ Left CSV file path: /tmp/left.csv
186
+ Right CSV file path: /tmp/right.csv
187
+ Choose separator:
188
+ 1. comma (,)
189
+ 2. tab (\t)
190
+ 3. semicolon (;)
191
+ 4. pipe (|)
192
+ 5. custom
193
+ +Separator choice [1]: 1
194
+ Headers present? [Y/n]:
195
+ -MISMATCH
196
+ -Summary: left_rows=10 right_rows=10 left_only=2 right_only=2
197
+ -Left-only examples:
198
+ - 4,Dina (count +1)
199
+ -Right-only examples:
200
+ - 4,Dina-Updated (count +1)
201
+ ```
202
+
203
+ ### 9. Parity large-file behavior
204
+
205
+ - Parity uses a streaming count-delta strategy:
206
+ - Stream left rows and increment row-key counts.
207
+ - Stream right rows and decrement row-key counts.
208
+ - Exact duplicate semantics are preserved by count deltas per normalized row value.
209
+ - Memory scales with the number of distinct row keys in the parity map, not the total input row count.
210
+
163
211
  ## Testing
164
212
 
165
213
  Run tests:
@@ -176,7 +224,7 @@ bundle exec rake test
176
224
 
177
225
  ## Alpha release
178
226
 
179
- Current prerelease version: `0.4.0.alpha`
227
+ Current prerelease version: `0.5.0.alpha`
180
228
 
181
229
  Install prerelease from RubyGems:
182
230
 
@@ -186,7 +234,7 @@ gem install csvops --pre
186
234
 
187
235
  Release runbook:
188
236
 
189
- - `docs/release-v0.4.0-alpha.md`
237
+ - `docs/release-v0.5.0-alpha.md`
190
238
 
191
239
 
192
240
  ## Architecture
data/docs/architecture.md CHANGED
@@ -3,14 +3,112 @@
3
3
  The codebase follows a DDD-lite layered structure:
4
4
 
5
5
  - `domain/`: core domain models and invariants (`ColumnSession`, `RowSession`, `RandomizationSession`, and `CrossCsvDedupeSession` aggregates + supporting entities/value objects).
6
- - `application/`: use-case orchestration (`RunExtraction`, `RunRowExtraction`, `RunRowRandomization`, `RunCrossCsvDedupe`).
7
- - `infrastructure/`: CSV reading/streaming and output adapters (console/file), plus cross-CSV dedupe adapter.
6
+ - `application/`: use-case orchestration (`RunExtraction`, `RunRowExtraction`, `RunRowRandomization`, `RunCrossCsvDedupe`, `RunCsvParity`).
7
+ - `infrastructure/`: CSV reading/streaming/comparison and output adapters (console/file).
8
8
  - `interface/cli/`: menu, prompts, workflows, and user-facing error presentation.
9
9
  - `Csvtool::CLI`: entrypoint wiring from command args to interface/application flow.
10
10
 
11
+ ## Workflow boundary (standardized)
12
+
13
+ For all interactive domains (`Column Extraction`, `Row Extraction`, `Row Randomization`, `Cross-CSV Dedupe`, `CSV Parity`), the boundary is:
14
+
15
+ - `interface/cli/workflows/*`: owns prompts, stdout rendering, and user-facing error presentation.
16
+ - `interface/cli/workflows/builders/*`: builds domain sessions/aggregates from prompt results.
17
+ - `interface/cli/workflows/support/*`: shared workflow utilities (error routing, output destination mapping).
18
+ - `interface/cli/workflows/presenters/*`: workflow-level output/summary rendering.
19
+ - `interface/cli/workflows/steps/*`: optional step-pipeline units for complex workflow orchestration.
20
+ - `application/use_cases/*`: interface-agnostic orchestration with request/result style contracts.
21
+ - `domain/*`: invariants and domain policies.
22
+ - `infrastructure/*`: CSV mechanics and output adapters.
23
+
24
+ Write-boundary rule:
25
+ - Use cases coordinate write paths but do not perform direct file writes.
26
+ - Direct write APIs (`CSV.open`, writable `File.open`, `File.write`, `IO.write`) are infrastructure-only.
27
+ - File output behavior is implemented in `infrastructure/output/*` writer adapters.
28
+
29
+ Current usage:
30
+
31
+ - `RunExtractionWorkflow` uses `WorkflowStepPipeline` + `Steps::Extraction::*`.
32
+ - `RunRowExtractionWorkflow` uses `WorkflowStepPipeline` + `Steps::RowExtraction::*`.
33
+ - `RunRowRandomizationWorkflow` uses `WorkflowStepPipeline` + `Steps::RowRandomization::*`.
34
+ - `RunCrossCsvDedupeWorkflow` uses `WorkflowStepPipeline` + `Steps::CrossCsvDedupe::*`.
35
+ - `RunCsvParityWorkflow` uses `WorkflowStepPipeline` + `Steps::Parity::*`.
36
+
37
+ ## Adding New Concepts
38
+
39
+ Use this checklist when introducing a new capability (for example: a new transformation function, validator, comparer, or exporter).
40
+
41
+ ### 1) Classify the concept first
42
+
43
+ - `Workflow concept`: interactive flow and prompt sequence.
44
+ - `Domain concept`: business rule/invariant and core vocabulary.
45
+ - `Application concept`: use-case orchestration and request/result contract.
46
+ - `Infrastructure concept`: file/CSV mechanics, streaming, persistence, or external IO.
47
+
48
+ If it does not clearly fit one layer, split it until each part has one responsibility.
49
+
50
+ ### 2) Add the feature vertically (thin slice)
51
+
52
+ Implement in this order:
53
+
54
+ 1. `interface/cli/workflows/*`: new workflow entry or new branch in an existing workflow.
55
+ 2. `interface/cli/prompts/*`: prompts for user inputs.
56
+ 3. `interface/cli/workflows/builders/*`: build domain session/request objects.
57
+ 4. `application/use_cases/*`: interface-agnostic use case with `Result` success/failure.
58
+ 5. `domain/*`: new entities/value objects/aggregate changes for invariants.
59
+ 6. `infrastructure/*`: adapters needed by the use case.
60
+ 7. `interface/cli/workflows/presenters/*`: output and summaries.
61
+
62
+ Keep each step testable on its own before moving to the next.
63
+
64
+ ### 3) Function type patterns
65
+
66
+ For a new function type, prefer one of these patterns:
67
+
68
+ - `Transform` (changes output rows/values):
69
+ - Domain: transformation options/value objects.
70
+ - Application: orchestrate transform over streamed rows.
71
+ - Infrastructure: stream reader/writer implementation.
72
+ - `Validate` (checks and reports findings):
73
+ - Domain: validation policy and finding model.
74
+ - Application: run checks and return findings in result data.
75
+ - Presenter: format findings and summary.
76
+ - `Compare` (source vs reference logic):
77
+ - Domain: mapping/selectors/match options.
78
+ - Application: compare strategy and stats.
79
+ - Infrastructure: dual-source readers and selector helpers.
80
+ - `Export` (destination-focused):
81
+ - Domain: output destination value object.
82
+ - Application: orchestrate write path only.
83
+ - Infrastructure: writer adapter.
84
+
85
+ ### 4) Required boundaries and rules
86
+
87
+ - Workflows do not contain business rules.
88
+ - Use cases do not prompt or print.
89
+ - Domain does not depend on interface or infrastructure.
90
+ - Infrastructure does not own workflow decisions.
91
+ - Shared workflow helpers belong under `workflows/support/*`.
92
+ - Reusable construction logic belongs under `workflows/builders/*`.
93
+ - Rendering/summary formatting belongs under `workflows/presenters/*`.
94
+
95
+ ### 5) Minimum tests for each new concept
96
+
97
+ - Prompt tests for each new prompt class.
98
+ - Builder tests for each new builder class.
99
+ - Use-case tests for request/result behavior.
100
+ - Workflow behavior tests for prompt + output integration.
101
+ - One end-to-end CLI test for the happy path.
102
+
103
+ ### 6) Naming and structure guidance
104
+
105
+ - Prefer domain-first names (`RowRange`, `ColumnSelection`, `MatchOptions`) over technical names.
106
+ - Use `Run<Concept>` for use cases and `Run<Concept>Workflow` for workflows.
107
+ - Keep one file per class and mirror structure under `test/csvtool/...`.
108
+
11
109
  ## Domain model
12
110
 
13
- Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, and `Cross-CSV Dedupe`.
111
+ Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, `Cross-CSV Dedupe`, and `CSV Parity`.
14
112
 
15
113
  ### Cross-CSV Dedupe (Large-file behavior)
16
114
 
@@ -37,7 +135,7 @@ Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, an
37
135
  - `ExtractionValue`
38
136
  - Shared `OutputDestination` (`console` or `file(path)`)
39
137
  - Application service:
40
- - `Application::UseCases::RunExtraction` orchestrates one extraction request.
138
+ - `Application::UseCases::RunExtraction` is interface-agnostic and exposes request/result operations.
41
139
  - Infrastructure adapters:
42
140
  - `Infrastructure::CSV::HeaderReader`
43
141
  - `Infrastructure::CSV::ValueStreamer`
@@ -45,6 +143,12 @@ Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, an
45
143
  - `Infrastructure::Output::CsvFileWriter`
46
144
  - Interface adapters:
47
145
  - `Interface::CLI::MenuLoop`
146
+ - `Interface::CLI::Workflows::RunExtractionWorkflow`
147
+ - `Interface::CLI::Workflows::Builders::ColumnSessionBuilder`
148
+ - `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
149
+ - `Interface::CLI::Workflows::Steps::Extraction::*`
150
+ - `Interface::CLI::Workflows::Presenters::ColumnExtractionPresenter`
151
+ - `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
48
152
  - `Interface::CLI::Prompts::*`
49
153
  - `Interface::CLI::Errors::Presenter`
50
154
 
@@ -52,6 +156,7 @@ Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, an
52
156
  classDiagram
53
157
  direction LR
54
158
  class MenuLoop
159
+ class RunExtractionWorkflow
55
160
  class Prompts
56
161
  class Errors
57
162
  class RunExtraction
@@ -64,12 +169,12 @@ classDiagram
64
169
  class OutputDestination
65
170
  class HeaderReader
66
171
  class ValueStreamer
67
- class ConsoleWriter
68
172
  class CsvFileWriter
69
173
 
70
- MenuLoop --> RunExtraction : invokes
71
- Prompts --> RunExtraction : provides input
72
- RunExtraction --> Errors : reports failures
174
+ MenuLoop --> RunExtractionWorkflow : invokes
175
+ RunExtractionWorkflow --> Prompts : uses
176
+ RunExtractionWorkflow --> Errors : reports failures
177
+ RunExtractionWorkflow --> RunExtraction : calls
73
178
  RunExtraction --> ColumnSession : orchestrates
74
179
  ColumnSession o-- CsvSource
75
180
  ColumnSession o-- ColumnSelection
@@ -79,7 +184,6 @@ classDiagram
79
184
  ColumnSession o-- OutputDestination
80
185
  RunExtraction --> HeaderReader
81
186
  RunExtraction --> ValueStreamer
82
- RunExtraction --> ConsoleWriter
83
187
  RunExtraction --> CsvFileWriter
84
188
  ```
85
189
 
@@ -96,14 +200,19 @@ Core DDD structure:
96
200
  - `RowRange` (`start_row`, `end_row`) plus row-range validation errors
97
201
  - Shared `OutputDestination` (`console` or `file(path)`)
98
202
  - Application service:
99
- - `Application::UseCases::RunRowExtraction` orchestrates row-range extraction.
203
+ - `Application::UseCases::RunRowExtraction` is interface-agnostic and exposes request/result operations.
100
204
  - Infrastructure adapters:
101
205
  - `Infrastructure::CSV::HeaderReader`
102
206
  - `Infrastructure::CSV::RowStreamer`
103
- - `Infrastructure::Output::CsvRowConsoleWriter`
104
207
  - `Infrastructure::Output::CsvRowFileWriter`
105
208
  - Interface adapters:
106
209
  - `Interface::CLI::MenuLoop`
210
+ - `Interface::CLI::Workflows::RunRowExtractionWorkflow`
211
+ - `Interface::CLI::Workflows::Builders::RowExtractionSessionBuilder`
212
+ - `Interface::CLI::Workflows::Presenters::RowExtractionPresenter`
213
+ - `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
214
+ - `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
215
+ - `Interface::CLI::Workflows::Steps::RowExtraction::*`
107
216
  - `Interface::CLI::Prompts::*`
108
217
  - `Interface::CLI::Errors::Presenter`
109
218
 
@@ -111,6 +220,7 @@ Core DDD structure:
111
220
  classDiagram
112
221
  direction LR
113
222
  class MenuLoop
223
+ class RunRowExtractionWorkflow
114
224
  class Prompts
115
225
  class Errors
116
226
  class RunRowExtraction
@@ -120,19 +230,17 @@ classDiagram
120
230
  class OutputDestination
121
231
  class HeaderReader
122
232
  class RowStreamer
123
- class CsvRowConsoleWriter
124
233
  class CsvRowFileWriter
125
-
126
- MenuLoop --> RunRowExtraction : invokes
127
- Prompts --> RunRowExtraction : provides input
128
- RunRowExtraction --> Errors : reports failures
234
+ MenuLoop --> RunRowExtractionWorkflow : invokes
235
+ RunRowExtractionWorkflow --> Prompts : uses
236
+ RunRowExtractionWorkflow --> Errors : reports failures
237
+ RunRowExtractionWorkflow --> RunRowExtraction : calls
129
238
  RunRowExtraction --> RowSession : orchestrates
130
239
  RowSession o-- RowSource
131
240
  RowSession o-- RowRange
132
241
  RowSession o-- OutputDestination
133
242
  RunRowExtraction --> HeaderReader
134
243
  RunRowExtraction --> RowStreamer
135
- RunRowExtraction --> CsvRowConsoleWriter
136
244
  RunRowExtraction --> CsvRowFileWriter
137
245
  ```
138
246
 
@@ -148,12 +256,19 @@ Core DDD structure:
148
256
  - `RandomizationOptions` (optional deterministic `seed`)
149
257
  - Shared `OutputDestination` (`console` or `file(path)`)
150
258
  - Application service:
151
- - `Application::UseCases::RunRowRandomization` orchestrates row randomization.
259
+ - `Application::UseCases::RunRowRandomization` is interface-agnostic and exposes request/result operations.
152
260
  - Infrastructure adapters:
153
261
  - `Infrastructure::CSV::HeaderReader`
154
262
  - `Infrastructure::CSV::RowRandomizer` (external chunked `RAND + sort` + merge)
263
+ - `Infrastructure::Output::CsvRandomizedRowFileWriter`
155
264
  - Interface adapters:
156
265
  - `Interface::CLI::MenuLoop`
266
+ - `Interface::CLI::Workflows::RunRowRandomizationWorkflow`
267
+ - `Interface::CLI::Workflows::Builders::RowRandomizationSessionBuilder`
268
+ - `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
269
+ - `Interface::CLI::Workflows::Steps::RowRandomization::*`
270
+ - `Interface::CLI::Workflows::Presenters::RowRandomizationPresenter`
271
+ - `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
157
272
  - `Interface::CLI::Prompts::*`
158
273
  - `Interface::CLI::Errors::Presenter`
159
274
 
@@ -161,6 +276,7 @@ Core DDD structure:
161
276
  classDiagram
162
277
  direction LR
163
278
  class MenuLoop
279
+ class RunRowRandomizationWorkflow
164
280
  class Prompts
165
281
  class Errors
166
282
  class RunRowRandomization
@@ -170,16 +286,19 @@ classDiagram
170
286
  class OutputDestination
171
287
  class HeaderReader
172
288
  class RowRandomizer
289
+ class CsvRandomizedRowFileWriter
173
290
 
174
- MenuLoop --> RunRowRandomization : invokes
175
- Prompts --> RunRowRandomization : provides input
176
- RunRowRandomization --> Errors : reports failures
291
+ MenuLoop --> RunRowRandomizationWorkflow : invokes
292
+ RunRowRandomizationWorkflow --> Prompts : uses
293
+ RunRowRandomizationWorkflow --> Errors : reports failures
294
+ RunRowRandomizationWorkflow --> RunRowRandomization : calls
177
295
  RunRowRandomization --> RandomizationSession : orchestrates
178
296
  RandomizationSession o-- RandomizationSource
179
297
  RandomizationSession o-- RandomizationOptions
180
298
  RandomizationSession o-- OutputDestination
181
299
  RunRowRandomization --> HeaderReader
182
300
  RunRowRandomization --> RowRandomizer
301
+ RunRowRandomization --> CsvRandomizedRowFileWriter
183
302
  ```
184
303
 
185
304
  ### Cross-CSV Dedupe
@@ -201,9 +320,15 @@ Core DDD structure:
201
320
  - `Infrastructure::CSV::HeaderReader`
202
321
  - `Infrastructure::CSV::SelectorValidator`
203
322
  - `Infrastructure::CSV::CrossCsvDeduper` (streams source rows while checking membership against reference key set)
323
+ - `Infrastructure::Output::CsvCrossCsvDedupeFileWriter`
204
324
  - Interface adapters:
205
325
  - `Interface::CLI::MenuLoop`
206
326
  - `Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow`
327
+ - `Interface::CLI::Workflows::Builders::CrossCsvDedupeSessionBuilder`
328
+ - `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
329
+ - `Interface::CLI::Workflows::Steps::CrossCsvDedupe::*`
330
+ - `Interface::CLI::Workflows::Presenters::CrossCsvDedupePresenter`
331
+ - `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
207
332
  - `Interface::CLI::Prompts::*`
208
333
  - `Interface::CLI::Errors::Presenter`
209
334
 
@@ -224,6 +349,7 @@ classDiagram
224
349
  class HeaderReader
225
350
  class SelectorValidator
226
351
  class CrossCsvDeduper
352
+ class CsvCrossCsvDedupeFileWriter
227
353
 
228
354
  MenuLoop --> RunCrossCsvDedupeWorkflow : invokes
229
355
  Prompts --> RunCrossCsvDedupeWorkflow : provides input
@@ -238,6 +364,61 @@ classDiagram
238
364
  RunCrossCsvDedupe --> HeaderReader
239
365
  RunCrossCsvDedupe --> SelectorValidator
240
366
  RunCrossCsvDedupe --> CrossCsvDeduper
367
+ RunCrossCsvDedupe --> CsvCrossCsvDedupeFileWriter
368
+ ```
369
+
370
+ ### CSV Parity
371
+
372
+ Core DDD structure:
373
+
374
+ - Aggregate root: `ParitySession`
375
+ - Captures one parity check request.
376
+ - Holds left/right source paths and parity options.
377
+ - Entities:
378
+ - `SourcePair` (left and right file paths)
379
+ - Value objects:
380
+ - `ParityOptions` (separator + header mode)
381
+ - Application service:
382
+ - `Application::UseCases::RunCsvParity` orchestrates parity validation and returns request/result style payloads.
383
+ - Infrastructure adapters:
384
+ - `Infrastructure::CSV::HeaderReader`
385
+ - `Infrastructure::CSV::CsvParityComparator` (streaming count-delta strategy with duplicate-aware semantics)
386
+ - Interface adapters:
387
+ - `Interface::CLI::MenuLoop`
388
+ - `Interface::CLI::Workflows::RunCsvParityWorkflow`
389
+ - `Interface::CLI::Workflows::Builders::CsvParitySessionBuilder`
390
+ - `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
391
+ - `Interface::CLI::Workflows::Steps::Parity::*`
392
+ - `Interface::CLI::Workflows::Presenters::CsvParityPresenter`
393
+ - `Interface::CLI::Workflows::Support::ResultErrorHandler`
394
+ - `Interface::CLI::Prompts::*`
395
+ - `Interface::CLI::Errors::Presenter`
396
+
397
+ ```mermaid
398
+ classDiagram
399
+ direction LR
400
+ class MenuLoop
401
+ class RunCsvParityWorkflow
402
+ class Prompts
403
+ class Errors
404
+ class RunCsvParity
405
+ class ParitySession
406
+ class SourcePair
407
+ class ParityOptions
408
+ class HeaderReader
409
+ class CsvParityComparator
410
+ class CsvParityPresenter
411
+
412
+ MenuLoop --> RunCsvParityWorkflow : invokes
413
+ RunCsvParityWorkflow --> Prompts : uses
414
+ RunCsvParityWorkflow --> Errors : reports failures
415
+ RunCsvParityWorkflow --> CsvParityPresenter : renders
416
+ RunCsvParityWorkflow --> RunCsvParity : calls
417
+ RunCsvParity --> ParitySession : orchestrates
418
+ ParitySession o-- SourcePair
419
+ ParitySession o-- ParityOptions
420
+ RunCsvParity --> HeaderReader
421
+ RunCsvParity --> CsvParityComparator
241
422
  ```
242
423
 
243
424
  ## Project layout
@@ -249,15 +430,21 @@ lib/csvtool/domain/column_session/*
249
430
  lib/csvtool/domain/row_session/*
250
431
  lib/csvtool/domain/row_randomization_session/*
251
432
  lib/csvtool/domain/cross_csv_dedupe_session/*
433
+ lib/csvtool/domain/csv_parity_session/*
252
434
  lib/csvtool/domain/shared/output_destination.rb
253
435
  lib/csvtool/application/use_cases/run_extraction.rb
254
436
  lib/csvtool/application/use_cases/run_row_extraction.rb
255
437
  lib/csvtool/application/use_cases/run_row_randomization.rb
256
438
  lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb
439
+ lib/csvtool/application/use_cases/run_csv_parity.rb
257
440
  lib/csvtool/infrastructure/csv/*
258
441
  lib/csvtool/infrastructure/output/*
259
442
  lib/csvtool/interface/cli/menu_loop.rb
260
443
  lib/csvtool/interface/cli/workflows/*
444
+ lib/csvtool/interface/cli/workflows/builders/*
445
+ lib/csvtool/interface/cli/workflows/support/*
446
+ lib/csvtool/interface/cli/workflows/presenters/*
447
+ lib/csvtool/interface/cli/workflows/steps/*
261
448
  lib/csvtool/interface/cli/prompts/*
262
449
  lib/csvtool/interface/cli/errors/presenter.rb
263
450
  test/csvtool/cli_test.rb # end-to-end workflow tests
@@ -0,0 +1,89 @@
1
+ # Release Checklist: v0.5.0-alpha
2
+
3
+ ## 1. Verify environment
4
+
5
+ ```bash
6
+ ruby -v
7
+ bundle -v
8
+ ```
9
+
10
+ Expected:
11
+ - Ruby `3.3.x`
12
+
13
+ ## 2. Install dependencies
14
+
15
+ ```bash
16
+ bundle install
17
+ ```
18
+
19
+ ## 3. Run quality checks
20
+
21
+ ```bash
22
+ bundle exec rake test
23
+ ```
24
+
25
+ ## 4. Smoke test CLI commands
26
+
27
+ ```bash
28
+ bundle exec csvtool menu
29
+ bundle exec csvtool column test/fixtures/sample_people.csv name
30
+ ```
31
+
32
+ ## 5. Smoke test workflows
33
+
34
+ ### Row extraction workflow
35
+
36
+ Use menu option `2` (`Extract rows (range)`) and verify:
37
+ - headered CSV rows print correctly in console mode
38
+ - out-of-bounds row range shows friendly message
39
+ - file output mode writes expected CSV rows
40
+
41
+ ### Row randomization workflow
42
+
43
+ Use menu option `3` (`Randomize rows`) and verify:
44
+ - seeded mode is reproducible
45
+ - headered and headerless modes both work
46
+ - file output path writes valid randomized CSV
47
+
48
+ ### Cross-CSV dedupe workflow
49
+
50
+ Use menu option `4` (`Dedupe using another CSV`) and verify:
51
+ - expected retained rows for headered source/reference files
52
+ - separator/header-mode combinations still work
53
+ - file output mode writes expected deduped CSV
54
+
55
+ ## 6. Build and validate gem package
56
+
57
+ ```bash
58
+ gem build csvops.gemspec
59
+ gem install ./csvops-0.5.0.alpha.gem
60
+ csvtool menu
61
+ ```
62
+
63
+ ## 7. Commit release prep
64
+
65
+ ```bash
66
+ git add -A
67
+ git commit -m "chore(release): prepare v0.5.0-alpha"
68
+ ```
69
+
70
+ ## 8. Tag release
71
+
72
+ ```bash
73
+ git tag -a v0.5.0-alpha -m "v0.5.0-alpha"
74
+ git push origin main --tags
75
+ ```
76
+
77
+ ## 9. Publish gem (optional for alpha)
78
+
79
+ ```bash
80
+ gem push csvops-0.5.0.alpha.gem
81
+ ```
82
+
83
+ ## 10. Create GitHub release
84
+
85
+ Create release `v0.5.0-alpha` with:
86
+ - Use-case file-write boundary cleanup across all workflows
87
+ - New infrastructure file-writer adapters for row randomization and cross-CSV dedupe
88
+ - Final architecture boundary audit with guard test for direct write APIs in use cases
89
+ - Updated architecture diagrams to reflect current writer adapter dependencies
@@ -0,0 +1,84 @@
1
+ # Release Checklist: v0.6.0-alpha
2
+
3
+ ## 1. Verify environment
4
+
5
+ ```bash
6
+ ruby -v
7
+ bundle -v
8
+ ```
9
+
10
+ Expected:
11
+ - Ruby `3.3.x`
12
+
13
+ ## 2. Install dependencies
14
+
15
+ ```bash
16
+ bundle install
17
+ ```
18
+
19
+ ## 3. Run quality checks
20
+
21
+ ```bash
22
+ bundle exec rake test
23
+ ```
24
+
25
+ ## 4. Smoke test CLI commands
26
+
27
+ ```bash
28
+ bundle exec csvtool menu
29
+ bundle exec csvtool column test/fixtures/sample_people.csv name
30
+ ```
31
+
32
+ ## 5. Smoke test workflows
33
+
34
+ ### CSV parity workflow
35
+
36
+ Use menu option `5` (`Validate parity`) and verify:
37
+ - matching files with reordered rows return parity success
38
+ - mismatch files return friendly mismatch summary with sample deltas
39
+ - separator and header-mode selections are respected
40
+
41
+ ### Existing workflows regression pass
42
+
43
+ Run quick checks for menu options `1-4` and confirm:
44
+ - column extraction still works
45
+ - row-range extraction still works
46
+ - row randomization still works
47
+ - cross-CSV dedupe still works
48
+
49
+ ## 6. Build and validate gem package
50
+
51
+ ```bash
52
+ gem build csvops.gemspec
53
+ gem install ./csvops-0.6.0.alpha.gem
54
+ csvtool menu
55
+ ```
56
+
57
+ ## 7. Commit release prep
58
+
59
+ ```bash
60
+ git add -A
61
+ git commit -m "chore(release): prepare v0.6.0-alpha"
62
+ ```
63
+
64
+ ## 8. Tag release
65
+
66
+ ```bash
67
+ git tag -a v0.6.0-alpha -m "v0.6.0-alpha"
68
+ git push origin main --tags
69
+ ```
70
+
71
+ ## 9. Publish gem
72
+
73
+ ```bash
74
+ gem push csvops-0.6.0.alpha.gem
75
+ ```
76
+
77
+ ## 10. Create GitHub release
78
+
79
+ Create release `v0.6.0-alpha` with:
80
+ - Dedicated CSV parity validation workflow
81
+ - Header/separator parity options
82
+ - Friendly parity mismatch reporting
83
+ - Streaming delta-count parity comparator
84
+ - Parity architecture convergence (session model, workflow steps, presenter, docs)