csvops 0.3.0.alpha → 0.5.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -149
- data/docs/architecture.md +396 -0
- data/docs/release-v0.4.0-alpha.md +87 -0
- data/docs/release-v0.5.0-alpha.md +89 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +96 -0
- data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
- data/lib/csvtool/cli.rb +11 -7
- data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
- data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +1 -0
- data/lib/csvtool/domain/row_session/row_source.rb +3 -0
- data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
- data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
- data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
- data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
- data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
- data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
- data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
- data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
- data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +141 -0
- data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
- data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
- data/test/csvtool/cli_test.rb +130 -16
- data/test/csvtool/cli_unit_test.rb +16 -3
- data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
- data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
- data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +2 -2
- data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +15 -1
- data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
- data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
- data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
- data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
- data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
- data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
- data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
- data/test/csvtool/interface/cli/menu_loop_test.rb +50 -13
- data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
- data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
- data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
- data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
- data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
- data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
- data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
- data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
- data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
- data/test/fixtures/dedupe_reference.csv +3 -0
- data/test/fixtures/dedupe_reference.tsv +3 -0
- data/test/fixtures/dedupe_reference_all.csv +5 -0
- data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
- data/test/fixtures/dedupe_reference_none.csv +2 -0
- data/test/fixtures/dedupe_reference_normalization.csv +3 -0
- data/test/fixtures/dedupe_source.csv +6 -0
- data/test/fixtures/dedupe_source.tsv +6 -0
- data/test/fixtures/dedupe_source_no_headers.csv +5 -0
- data/test/fixtures/dedupe_source_normalization.csv +4 -0
- metadata +93 -8
- data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +0 -31
- data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
- data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
- data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +0 -21
- data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b96fb7e03fa0629d3412a97d3abff5414492ac46ad08ede2c872e2176fcbfc62
|
|
4
|
+
data.tar.gz: 856b7735a472b5810d5f19dff6371a565a7fcc538ce5b6eba52260fff0028760
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5f643d331c6b54cb5feb0fe5db4ff7f8f7bc5c28461f74e3bfca5cf93d25703b84f497e72377302874b2b6302ef0fb542995c72d2d21798e3a998f6d5b294704
|
|
7
|
+
data.tar.gz: 0e254fa75780ce0605054c24b28301d8786535a0f2bbff7adfb45a75f09e60e5315e950648208fa5772d08cdd6abce95ea382838f568947af05ceaa77ba1888f
|
data/README.md
CHANGED
|
@@ -35,22 +35,30 @@ bundle exec csvtool menu
|
|
|
35
35
|
CSV Tool Menu
|
|
36
36
|
1. Extract column
|
|
37
37
|
2. Extract rows (range)
|
|
38
|
-
3.
|
|
38
|
+
3. Randomize rows
|
|
39
|
+
4. Dedupe using another CSV
|
|
40
|
+
5. Exit
|
|
39
41
|
>
|
|
40
42
|
```
|
|
41
43
|
|
|
42
|
-
Select `1`
|
|
44
|
+
Select `1` for column extraction, `2` for row-range extraction, `3` for row randomization, or `4` for cross-CSV dedupe.
|
|
43
45
|
|
|
44
46
|
### 3. Follow prompts
|
|
45
47
|
|
|
46
|
-
|
|
48
|
+
Each menu action runs through a dedicated CLI workflow (`interface/cli/workflows/*`) that handles prompts/output and delegates execution to an interface-agnostic application use case.
|
|
47
49
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
-
|
|
51
|
-
-
|
|
52
|
-
-
|
|
53
|
-
|
|
50
|
+
Workflow internals are split into small composable parts:
|
|
51
|
+
|
|
52
|
+
- `workflows/builders/*` for session construction
|
|
53
|
+
- `workflows/support/*` for shared mapping/dispatch utilities
|
|
54
|
+
- `workflows/presenters/*` for output formatting and summaries
|
|
55
|
+
|
|
56
|
+
Prompt flow by action:
|
|
57
|
+
|
|
58
|
+
- `Extract column`: file path, separator, optional header filter + column select, skip blanks, preview/confirm, output destination.
|
|
59
|
+
- `Extract rows (range)`: file path, separator, start row, end row, output destination.
|
|
60
|
+
- `Randomize rows`: file path, separator, headers present, optional seed, output destination.
|
|
61
|
+
- `Dedupe using another CSV`: source/reference files, separators, header modes, key selectors, match options, output destination.
|
|
54
62
|
|
|
55
63
|
### 4. Example interaction (console output)
|
|
56
64
|
|
|
@@ -111,6 +119,53 @@ With Bundler:
|
|
|
111
119
|
bundle exec csvtool column /path/to/file.csv column_name
|
|
112
120
|
```
|
|
113
121
|
|
|
122
|
+
### 7. Dedupe interaction example
|
|
123
|
+
|
|
124
|
+
Legend: ` ` = prompt/menu, `+` = user input, `-` = tool output
|
|
125
|
+
|
|
126
|
+
```diff
|
|
127
|
+
CSV Tool Menu
|
|
128
|
+
1. Extract column
|
|
129
|
+
2. Extract rows (range)
|
|
130
|
+
3. Randomize rows
|
|
131
|
+
4. Dedupe using another CSV
|
|
132
|
+
5. Exit
|
|
133
|
+
+> 4
|
|
134
|
+
CSV file path: /tmp/source.csv
|
|
135
|
+
Source CSV separator:
|
|
136
|
+
Choose separator:
|
|
137
|
+
1. comma (,)
|
|
138
|
+
2. tab (\t)
|
|
139
|
+
3. semicolon (;)
|
|
140
|
+
4. pipe (|)
|
|
141
|
+
5. custom
|
|
142
|
+
+Separator choice [1]: 1
|
|
143
|
+
Source headers present? [Y/n]:
|
|
144
|
+
Reference CSV file path: /tmp/reference.csv
|
|
145
|
+
Reference CSV separator:
|
|
146
|
+
Choose separator:
|
|
147
|
+
1. comma (,)
|
|
148
|
+
2. tab (\t)
|
|
149
|
+
3. semicolon (;)
|
|
150
|
+
4. pipe (|)
|
|
151
|
+
5. custom
|
|
152
|
+
+Separator choice [1]: 1
|
|
153
|
+
Reference headers present? [Y/n]:
|
|
154
|
+
Source key column name: customer_id
|
|
155
|
+
Reference key column name: external_id
|
|
156
|
+
Trim whitespace before matching? [Y/n]:
|
|
157
|
+
Case-insensitive matching? [y/N]:
|
|
158
|
+
Output destination:
|
|
159
|
+
1. console
|
|
160
|
+
2. file
|
|
161
|
+
+Output destination [1]: 1
|
|
162
|
+
-
|
|
163
|
+
-customer_id,name
|
|
164
|
+
-1,Alice
|
|
165
|
+
-3,Cara
|
|
166
|
+
-Summary: source_rows=5 removed_rows=3 kept_rows=2
|
|
167
|
+
```
|
|
168
|
+
|
|
114
169
|
## Testing
|
|
115
170
|
|
|
116
171
|
Run tests:
|
|
@@ -127,7 +182,7 @@ bundle exec rake test
|
|
|
127
182
|
|
|
128
183
|
## Alpha release
|
|
129
184
|
|
|
130
|
-
Current prerelease version: `0.
|
|
185
|
+
Current prerelease version: `0.5.0.alpha`
|
|
131
186
|
|
|
132
187
|
Install prerelease from RubyGems:
|
|
133
188
|
|
|
@@ -137,146 +192,11 @@ gem install csvops --pre
|
|
|
137
192
|
|
|
138
193
|
Release runbook:
|
|
139
194
|
|
|
140
|
-
- `docs/release-v0.
|
|
195
|
+
- `docs/release-v0.5.0-alpha.md`
|
|
141
196
|
|
|
142
|
-
## Architecture
|
|
143
|
-
|
|
144
|
-
The codebase follows a DDD-lite layered structure:
|
|
145
|
-
|
|
146
|
-
- `domain/`: core domain models and invariants (`ColumnSession`, `RowSession`, and `RandomizationSession` aggregates + supporting entities/value objects).
|
|
147
|
-
- `application/`: use-case orchestration (`RunExtraction`, `RunRowExtraction`, `RunRowRandomization`).
|
|
148
|
-
- `infrastructure/`: CSV reading/streaming and output adapters (console/file).
|
|
149
|
-
- `interface/cli/`: menu, prompts, and user-facing error presentation.
|
|
150
|
-
- `Csvtool::CLI`: entrypoint wiring from command args to interface/application flow.
|
|
151
|
-
|
|
152
|
-
## Domain model
|
|
153
|
-
|
|
154
|
-
Bounded contexts: `Column Extraction`, `Row Extraction`, and `Row Randomization`.
|
|
155
|
-
|
|
156
|
-
### Column Extraction
|
|
157
|
-
|
|
158
|
-
- Aggregate root: `ColumnSession`
|
|
159
|
-
- Controls extraction state transitions (`start`, `with_preview`, `confirm!`, `with_output_destination`).
|
|
160
|
-
- Enforces session-level invariants.
|
|
161
|
-
- Entities:
|
|
162
|
-
- `CsvSource` (file path + `Separator`)
|
|
163
|
-
- `ColumnSelection` (chosen header)
|
|
164
|
-
- Value objects:
|
|
165
|
-
- `Separator`
|
|
166
|
-
- `ExtractionOptions` (`skip_blanks`, `preview_limit`)
|
|
167
|
-
- `Preview` (list of `ExtractionValue`)
|
|
168
|
-
- `ExtractionValue`
|
|
169
|
-
- `OutputDestination` (`console` or `file(path)`)
|
|
170
|
-
- Application service:
|
|
171
|
-
- `Application::UseCases::RunExtraction` orchestrates one extraction request.
|
|
172
|
-
- Infrastructure adapters:
|
|
173
|
-
- `Infrastructure::CSV::HeaderReader`
|
|
174
|
-
- `Infrastructure::CSV::ValueStreamer`
|
|
175
|
-
- `Infrastructure::Output::ConsoleWriter`
|
|
176
|
-
- `Infrastructure::Output::CsvFileWriter`
|
|
177
|
-
- Interface adapters:
|
|
178
|
-
- `Interface::CLI::MenuLoop`
|
|
179
|
-
- `Interface::CLI::Prompts::*`
|
|
180
|
-
- `Interface::CLI::Errors::Presenter`
|
|
181
|
-
|
|
182
|
-
```mermaid
|
|
183
|
-
flowchart LR
|
|
184
|
-
UI["Interface CLI\n(Menu + Prompts + Errors)"] --> APP["Application Use Case\nRunExtraction"]
|
|
185
|
-
APP --> AGG["Domain Aggregate\nColumnSession"]
|
|
186
|
-
|
|
187
|
-
AGG --> E1["Entity\nCsvSource"]
|
|
188
|
-
AGG --> E2["Entity\nColumnSelection"]
|
|
189
|
-
AGG --> V1["Value Objects\nSeparator / ExtractionOptions / Preview / OutputDestination / ExtractionValue"]
|
|
190
|
-
|
|
191
|
-
APP --> INFCSV["Infrastructure CSV\nHeaderReader + ValueStreamer"]
|
|
192
|
-
APP --> INFOUT["Infrastructure Output\nConsoleWriter + CsvFileWriter"]
|
|
193
|
-
```
|
|
194
|
-
|
|
195
|
-
### Row Extraction
|
|
196
|
-
|
|
197
|
-
Core DDD structure:
|
|
198
|
-
|
|
199
|
-
- Aggregate root: `RowSession`
|
|
200
|
-
- Captures one row-range extraction request.
|
|
201
|
-
- Holds selected source, requested range, and output destination.
|
|
202
|
-
- Entity:
|
|
203
|
-
- `RowSource` (file path + separator)
|
|
204
|
-
- Value objects:
|
|
205
|
-
- `RowRange` (`start_row`, `end_row`) plus row-range validation errors
|
|
206
|
-
- `RowOutputDestination` (`console` or `file(path)`)
|
|
207
|
-
- Application service:
|
|
208
|
-
- `Application::UseCases::RunRowExtraction` orchestrates row-range extraction.
|
|
209
|
-
- Infrastructure adapters:
|
|
210
|
-
- `Infrastructure::CSV::HeaderReader`
|
|
211
|
-
- `Infrastructure::CSV::RowStreamer`
|
|
212
|
-
- `Infrastructure::Output::CsvRowConsoleWriter`
|
|
213
|
-
- `Infrastructure::Output::CsvRowFileWriter`
|
|
214
|
-
- Interface adapters:
|
|
215
|
-
- `Interface::CLI::MenuLoop`
|
|
216
|
-
- `Interface::CLI::Prompts::*`
|
|
217
|
-
- `Interface::CLI::Errors::Presenter`
|
|
218
|
-
|
|
219
|
-
```mermaid
|
|
220
|
-
flowchart LR
|
|
221
|
-
UI2["Interface CLI\n(Menu + Prompts + Errors)"] --> APP2["Application Use Case\nRunRowExtraction"]
|
|
222
|
-
APP2 --> AGG2["Domain Aggregate\nRowSession"]
|
|
223
|
-
|
|
224
|
-
AGG2 --> E3["Entity\nRowSource"]
|
|
225
|
-
AGG2 --> V2["Value Objects\nRowRange / RowOutputDestination"]
|
|
226
|
-
|
|
227
|
-
APP2 --> INFCSV2["Infrastructure CSV\nHeaderReader + RowStreamer"]
|
|
228
|
-
APP2 --> INFOUT2["Infrastructure Output\nCsvRowConsoleWriter + CsvRowFileWriter"]
|
|
229
|
-
```
|
|
230
197
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
Core DDD structure:
|
|
234
|
-
|
|
235
|
-
- Aggregate root: `RandomizationSession`
|
|
236
|
-
- Captures one randomization request from source + options + output destination.
|
|
237
|
-
- Entity:
|
|
238
|
-
- `RandomizationSource` (file path + separator + header mode)
|
|
239
|
-
- Value objects:
|
|
240
|
-
- `RandomizationOptions` (optional deterministic `seed`)
|
|
241
|
-
- `RandomizationOutputDestination` (`console` or `file(path)`)
|
|
242
|
-
- Application service:
|
|
243
|
-
- `Application::UseCases::RunRowRandomization` orchestrates row randomization.
|
|
244
|
-
- Infrastructure adapters:
|
|
245
|
-
- `Infrastructure::CSV::HeaderReader`
|
|
246
|
-
- `Infrastructure::CSV::RowRandomizer` (external chunked `RAND + sort` + merge)
|
|
247
|
-
- Interface adapters:
|
|
248
|
-
- `Interface::CLI::MenuLoop`
|
|
249
|
-
- `Interface::CLI::Prompts::*`
|
|
250
|
-
- `Interface::CLI::Errors::Presenter`
|
|
251
|
-
|
|
252
|
-
```mermaid
|
|
253
|
-
flowchart LR
|
|
254
|
-
UI3["Interface CLI\n(Menu + Prompts + Errors)"] --> APP3["Application Use Case\nRunRowRandomization"]
|
|
255
|
-
APP3 --> AGG3["Domain Aggregate\nRandomizationSession"]
|
|
256
|
-
|
|
257
|
-
AGG3 --> E4["Entity\nRandomizationSource"]
|
|
258
|
-
AGG3 --> V3["Value Objects\nRandomizationOptions / RandomizationOutputDestination"]
|
|
259
|
-
|
|
260
|
-
APP3 --> INFCSV3["Infrastructure CSV\nHeaderReader + RowRandomizer"]
|
|
261
|
-
```
|
|
198
|
+
## Architecture
|
|
262
199
|
|
|
263
|
-
|
|
200
|
+
Full architecture and domain documentation lives in:
|
|
264
201
|
|
|
265
|
-
|
|
266
|
-
bin/tool # CLI entrypoint
|
|
267
|
-
lib/csvtool/cli.rb
|
|
268
|
-
lib/csvtool/domain/column_session/*
|
|
269
|
-
lib/csvtool/domain/row_session/*
|
|
270
|
-
lib/csvtool/domain/row_randomization_session/*
|
|
271
|
-
lib/csvtool/application/use_cases/run_extraction.rb
|
|
272
|
-
lib/csvtool/application/use_cases/run_row_extraction.rb
|
|
273
|
-
lib/csvtool/application/use_cases/run_row_randomization.rb
|
|
274
|
-
lib/csvtool/infrastructure/csv/*
|
|
275
|
-
lib/csvtool/infrastructure/output/*
|
|
276
|
-
lib/csvtool/interface/cli/menu_loop.rb
|
|
277
|
-
lib/csvtool/interface/cli/prompts/*
|
|
278
|
-
lib/csvtool/interface/cli/errors/presenter.rb
|
|
279
|
-
test/csvtool/cli_test.rb # end-to-end workflow tests
|
|
280
|
-
test/csvtool/**/*_test.rb # focused unit tests by component folder
|
|
281
|
-
test/test_helper.rb
|
|
282
|
-
```
|
|
202
|
+
- [`docs/architecture.md`](docs/architecture.md)
|
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
# Architecture
|
|
2
|
+
|
|
3
|
+
The codebase follows a DDD-lite layered structure:
|
|
4
|
+
|
|
5
|
+
- `domain/`: core domain models and invariants (`ColumnSession`, `RowSession`, `RandomizationSession`, and `CrossCsvDedupeSession` aggregates + supporting entities/value objects).
|
|
6
|
+
- `application/`: use-case orchestration (`RunExtraction`, `RunRowExtraction`, `RunRowRandomization`, `RunCrossCsvDedupe`).
|
|
7
|
+
- `infrastructure/`: CSV reading/streaming and output adapters (console/file), plus cross-CSV dedupe adapter.
|
|
8
|
+
- `interface/cli/`: menu, prompts, workflows, and user-facing error presentation.
|
|
9
|
+
- `Csvtool::CLI`: entrypoint wiring from command args to interface/application flow.
|
|
10
|
+
|
|
11
|
+
## Workflow boundary (standardized)
|
|
12
|
+
|
|
13
|
+
For all interactive domains (`Column Extraction`, `Row Extraction`, `Row Randomization`, `Cross-CSV Dedupe`), the boundary is:
|
|
14
|
+
|
|
15
|
+
- `interface/cli/workflows/*`: owns prompts, stdout rendering, and user-facing error presentation.
|
|
16
|
+
- `interface/cli/workflows/builders/*`: builds domain sessions/aggregates from prompt results.
|
|
17
|
+
- `interface/cli/workflows/support/*`: shared workflow utilities (error routing, output destination mapping).
|
|
18
|
+
- `interface/cli/workflows/presenters/*`: workflow-level output/summary rendering.
|
|
19
|
+
- `interface/cli/workflows/steps/*`: optional step-pipeline units for complex workflow orchestration.
|
|
20
|
+
- `application/use_cases/*`: interface-agnostic orchestration with request/result style contracts.
|
|
21
|
+
- `domain/*`: invariants and domain policies.
|
|
22
|
+
- `infrastructure/*`: CSV mechanics and output adapters.
|
|
23
|
+
|
|
24
|
+
Write-boundary rule:
|
|
25
|
+
- Use cases coordinate write paths but do not perform direct file writes.
|
|
26
|
+
- Direct write APIs (`CSV.open`, writable `File.open`, `File.write`, `IO.write`) are infrastructure-only.
|
|
27
|
+
- File output behavior is implemented in `infrastructure/output/*` writer adapters.
|
|
28
|
+
|
|
29
|
+
Current usage:
|
|
30
|
+
|
|
31
|
+
- `RunExtractionWorkflow` uses `WorkflowStepPipeline` + `Steps::Extraction::*`.
|
|
32
|
+
- `RunRowExtractionWorkflow` uses `WorkflowStepPipeline` + `Steps::RowExtraction::*`.
|
|
33
|
+
- `RunRowRandomizationWorkflow` uses `WorkflowStepPipeline` + `Steps::RowRandomization::*`.
|
|
34
|
+
- `RunCrossCsvDedupeWorkflow` uses `WorkflowStepPipeline` + `Steps::CrossCsvDedupe::*`.
|
|
35
|
+
|
|
36
|
+
## Adding New Concepts
|
|
37
|
+
|
|
38
|
+
Use this checklist when introducing a new capability (for example: a new transformation function, validator, comparer, or exporter).
|
|
39
|
+
|
|
40
|
+
### 1) Classify the concept first
|
|
41
|
+
|
|
42
|
+
- `Workflow concept`: interactive flow and prompt sequence.
|
|
43
|
+
- `Domain concept`: business rule/invariant and core vocabulary.
|
|
44
|
+
- `Application concept`: use-case orchestration and request/result contract.
|
|
45
|
+
- `Infrastructure concept`: file/CSV mechanics, streaming, persistence, or external IO.
|
|
46
|
+
|
|
47
|
+
If it does not clearly fit one layer, split it until each part has one responsibility.
|
|
48
|
+
|
|
49
|
+
### 2) Add the feature vertically (thin slice)
|
|
50
|
+
|
|
51
|
+
Implement in this order:
|
|
52
|
+
|
|
53
|
+
1. `interface/cli/workflows/*`: new workflow entry or new branch in an existing workflow.
|
|
54
|
+
2. `interface/cli/prompts/*`: prompts for user inputs.
|
|
55
|
+
3. `interface/cli/workflows/builders/*`: build domain session/request objects.
|
|
56
|
+
4. `application/use_cases/*`: interface-agnostic use case with `Result` success/failure.
|
|
57
|
+
5. `domain/*`: new entities/value objects/aggregate changes for invariants.
|
|
58
|
+
6. `infrastructure/*`: adapters needed by the use case.
|
|
59
|
+
7. `interface/cli/workflows/presenters/*`: output and summaries.
|
|
60
|
+
|
|
61
|
+
Keep each step testable on its own before moving to the next.
|
|
62
|
+
|
|
63
|
+
### 3) Function type patterns
|
|
64
|
+
|
|
65
|
+
For a new function type, prefer one of these patterns:
|
|
66
|
+
|
|
67
|
+
- `Transform` (changes output rows/values):
|
|
68
|
+
- Domain: transformation options/value objects.
|
|
69
|
+
- Application: orchestrate transform over streamed rows.
|
|
70
|
+
- Infrastructure: stream reader/writer implementation.
|
|
71
|
+
- `Validate` (checks and reports findings):
|
|
72
|
+
- Domain: validation policy and finding model.
|
|
73
|
+
- Application: run checks and return findings in result data.
|
|
74
|
+
- Presenter: format findings and summary.
|
|
75
|
+
- `Compare` (source vs reference logic):
|
|
76
|
+
- Domain: mapping/selectors/match options.
|
|
77
|
+
- Application: compare strategy and stats.
|
|
78
|
+
- Infrastructure: dual-source readers and selector helpers.
|
|
79
|
+
- `Export` (destination-focused):
|
|
80
|
+
- Domain: output destination value object.
|
|
81
|
+
- Application: orchestrate write path only.
|
|
82
|
+
- Infrastructure: writer adapter.
|
|
83
|
+
|
|
84
|
+
### 4) Required boundaries and rules
|
|
85
|
+
|
|
86
|
+
- Workflows do not contain business rules.
|
|
87
|
+
- Use cases do not prompt or print.
|
|
88
|
+
- Domain does not depend on interface or infrastructure.
|
|
89
|
+
- Infrastructure does not own workflow decisions.
|
|
90
|
+
- Shared workflow helpers belong under `workflows/support/*`.
|
|
91
|
+
- Reusable construction logic belongs under `workflows/builders/*`.
|
|
92
|
+
- Rendering/summary formatting belongs under `workflows/presenters/*`.
|
|
93
|
+
|
|
94
|
+
### 5) Minimum tests for each new concept
|
|
95
|
+
|
|
96
|
+
- Prompt tests for each new prompt class.
|
|
97
|
+
- Builder tests for each new builder class.
|
|
98
|
+
- Use-case tests for request/result behavior.
|
|
99
|
+
- Workflow behavior tests for prompt + output integration.
|
|
100
|
+
- One end-to-end CLI test for the happy path.
|
|
101
|
+
|
|
102
|
+
### 6) Naming and structure guidance
|
|
103
|
+
|
|
104
|
+
- Prefer domain-first names (`RowRange`, `ColumnSelection`, `MatchOptions`) over technical names.
|
|
105
|
+
- Use `Run<Concept>` for use cases and `Run<Concept>Workflow` for workflows.
|
|
106
|
+
- Keep one file per class and mirror structure under `test/csvtool/...`.
|
|
107
|
+
|
|
108
|
+
## Domain model
|
|
109
|
+
|
|
110
|
+
Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, and `Cross-CSV Dedupe`.
|
|
111
|
+
|
|
112
|
+
### Cross-CSV Dedupe (Large-file behavior)
|
|
113
|
+
|
|
114
|
+
- Workflow: remove rows from a source CSV when source key matches a key from a reference CSV.
|
|
115
|
+
- Scaling strategy:
|
|
116
|
+
- Reference CSV keys are loaded into a `Set` for fast membership checks.
|
|
117
|
+
- Source CSV rows are streamed directly to the selected output destination (console or file).
|
|
118
|
+
- Memory tradeoff:
|
|
119
|
+
- Memory is dominated by the number of unique keys in the reference CSV.
|
|
120
|
+
- Source-row memory stays bounded because retained rows are not accumulated in memory before writing.
|
|
121
|
+
|
|
122
|
+
### Column Extraction
|
|
123
|
+
|
|
124
|
+
- Aggregate root: `ColumnSession`
|
|
125
|
+
- Controls extraction state transitions (`start`, `with_preview`, `confirm!`, `with_output_destination`).
|
|
126
|
+
- Enforces session-level invariants.
|
|
127
|
+
- Entities:
|
|
128
|
+
- `CsvSource` (file path + `Separator`)
|
|
129
|
+
- `ColumnSelection` (chosen header)
|
|
130
|
+
- Value objects:
|
|
131
|
+
- `Separator`
|
|
132
|
+
- `ExtractionOptions` (`skip_blanks`, `preview_limit`)
|
|
133
|
+
- `Preview` (list of `ExtractionValue`)
|
|
134
|
+
- `ExtractionValue`
|
|
135
|
+
- Shared `OutputDestination` (`console` or `file(path)`)
|
|
136
|
+
- Application service:
|
|
137
|
+
- `Application::UseCases::RunExtraction` is interface-agnostic and exposes request/result operations.
|
|
138
|
+
- Infrastructure adapters:
|
|
139
|
+
- `Infrastructure::CSV::HeaderReader`
|
|
140
|
+
- `Infrastructure::CSV::ValueStreamer`
|
|
141
|
+
- `Infrastructure::Output::ConsoleWriter`
|
|
142
|
+
- `Infrastructure::Output::CsvFileWriter`
|
|
143
|
+
- Interface adapters:
|
|
144
|
+
- `Interface::CLI::MenuLoop`
|
|
145
|
+
- `Interface::CLI::Workflows::RunExtractionWorkflow`
|
|
146
|
+
- `Interface::CLI::Workflows::Builders::ColumnSessionBuilder`
|
|
147
|
+
- `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
|
|
148
|
+
- `Interface::CLI::Workflows::Steps::Extraction::*`
|
|
149
|
+
- `Interface::CLI::Workflows::Presenters::ColumnExtractionPresenter`
|
|
150
|
+
- `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
|
|
151
|
+
- `Interface::CLI::Prompts::*`
|
|
152
|
+
- `Interface::CLI::Errors::Presenter`
|
|
153
|
+
|
|
154
|
+
```mermaid
|
|
155
|
+
classDiagram
|
|
156
|
+
direction LR
|
|
157
|
+
class MenuLoop
|
|
158
|
+
class RunExtractionWorkflow
|
|
159
|
+
class Prompts
|
|
160
|
+
class Errors
|
|
161
|
+
class RunExtraction
|
|
162
|
+
class ColumnSession
|
|
163
|
+
class CsvSource
|
|
164
|
+
class ColumnSelection
|
|
165
|
+
class ExtractionOptions
|
|
166
|
+
class Preview
|
|
167
|
+
class ExtractionValue
|
|
168
|
+
class OutputDestination
|
|
169
|
+
class HeaderReader
|
|
170
|
+
class ValueStreamer
|
|
171
|
+
class CsvFileWriter
|
|
172
|
+
|
|
173
|
+
MenuLoop --> RunExtractionWorkflow : invokes
|
|
174
|
+
RunExtractionWorkflow --> Prompts : uses
|
|
175
|
+
RunExtractionWorkflow --> Errors : reports failures
|
|
176
|
+
RunExtractionWorkflow --> RunExtraction : calls
|
|
177
|
+
RunExtraction --> ColumnSession : orchestrates
|
|
178
|
+
ColumnSession o-- CsvSource
|
|
179
|
+
ColumnSession o-- ColumnSelection
|
|
180
|
+
ColumnSession o-- ExtractionOptions
|
|
181
|
+
ColumnSession o-- Preview
|
|
182
|
+
Preview o-- ExtractionValue
|
|
183
|
+
ColumnSession o-- OutputDestination
|
|
184
|
+
RunExtraction --> HeaderReader
|
|
185
|
+
RunExtraction --> ValueStreamer
|
|
186
|
+
RunExtraction --> CsvFileWriter
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Row Extraction
|
|
190
|
+
|
|
191
|
+
Core DDD structure:
|
|
192
|
+
|
|
193
|
+
- Aggregate root: `RowSession`
|
|
194
|
+
- Captures one row-range extraction request.
|
|
195
|
+
- Holds selected source, requested range, and output destination.
|
|
196
|
+
- Entity:
|
|
197
|
+
- `RowSource` (file path + separator)
|
|
198
|
+
- Value objects:
|
|
199
|
+
- `RowRange` (`start_row`, `end_row`) plus row-range validation errors
|
|
200
|
+
- Shared `OutputDestination` (`console` or `file(path)`)
|
|
201
|
+
- Application service:
|
|
202
|
+
- `Application::UseCases::RunRowExtraction` is interface-agnostic and exposes request/result operations.
|
|
203
|
+
- Infrastructure adapters:
|
|
204
|
+
- `Infrastructure::CSV::HeaderReader`
|
|
205
|
+
- `Infrastructure::CSV::RowStreamer`
|
|
206
|
+
- `Infrastructure::Output::CsvRowFileWriter`
|
|
207
|
+
- Interface adapters:
|
|
208
|
+
- `Interface::CLI::MenuLoop`
|
|
209
|
+
- `Interface::CLI::Workflows::RunRowExtractionWorkflow`
|
|
210
|
+
- `Interface::CLI::Workflows::Builders::RowExtractionSessionBuilder`
|
|
211
|
+
- `Interface::CLI::Workflows::Presenters::RowExtractionPresenter`
|
|
212
|
+
- `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
|
|
213
|
+
- `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
|
|
214
|
+
- `Interface::CLI::Workflows::Steps::RowExtraction::*`
|
|
215
|
+
- `Interface::CLI::Prompts::*`
|
|
216
|
+
- `Interface::CLI::Errors::Presenter`
|
|
217
|
+
|
|
218
|
+
```mermaid
|
|
219
|
+
classDiagram
|
|
220
|
+
direction LR
|
|
221
|
+
class MenuLoop
|
|
222
|
+
class RunRowExtractionWorkflow
|
|
223
|
+
class Prompts
|
|
224
|
+
class Errors
|
|
225
|
+
class RunRowExtraction
|
|
226
|
+
class RowSession
|
|
227
|
+
class RowSource
|
|
228
|
+
class RowRange
|
|
229
|
+
class OutputDestination
|
|
230
|
+
class HeaderReader
|
|
231
|
+
class RowStreamer
|
|
232
|
+
class CsvRowFileWriter
|
|
233
|
+
MenuLoop --> RunRowExtractionWorkflow : invokes
|
|
234
|
+
RunRowExtractionWorkflow --> Prompts : uses
|
|
235
|
+
RunRowExtractionWorkflow --> Errors : reports failures
|
|
236
|
+
RunRowExtractionWorkflow --> RunRowExtraction : calls
|
|
237
|
+
RunRowExtraction --> RowSession : orchestrates
|
|
238
|
+
RowSession o-- RowSource
|
|
239
|
+
RowSession o-- RowRange
|
|
240
|
+
RowSession o-- OutputDestination
|
|
241
|
+
RunRowExtraction --> HeaderReader
|
|
242
|
+
RunRowExtraction --> RowStreamer
|
|
243
|
+
RunRowExtraction --> CsvRowFileWriter
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
### Row Randomization
|
|
247
|
+
|
|
248
|
+
Core DDD structure:
|
|
249
|
+
|
|
250
|
+
- Aggregate root: `RandomizationSession`
|
|
251
|
+
- Captures one randomization request from source + options + output destination.
|
|
252
|
+
- Entity:
|
|
253
|
+
- `RandomizationSource` (file path + separator + header mode)
|
|
254
|
+
- Value objects:
|
|
255
|
+
- `RandomizationOptions` (optional deterministic `seed`)
|
|
256
|
+
- Shared `OutputDestination` (`console` or `file(path)`)
|
|
257
|
+
- Application service:
|
|
258
|
+
- `Application::UseCases::RunRowRandomization` is interface-agnostic and exposes request/result operations.
|
|
259
|
+
- Infrastructure adapters:
|
|
260
|
+
- `Infrastructure::CSV::HeaderReader`
|
|
261
|
+
- `Infrastructure::CSV::RowRandomizer` (external chunked `RAND + sort` + merge)
|
|
262
|
+
- `Infrastructure::Output::CsvRandomizedRowFileWriter`
|
|
263
|
+
- Interface adapters:
|
|
264
|
+
- `Interface::CLI::MenuLoop`
|
|
265
|
+
- `Interface::CLI::Workflows::RunRowRandomizationWorkflow`
|
|
266
|
+
- `Interface::CLI::Workflows::Builders::RowRandomizationSessionBuilder`
|
|
267
|
+
- `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
|
|
268
|
+
- `Interface::CLI::Workflows::Steps::RowRandomization::*`
|
|
269
|
+
- `Interface::CLI::Workflows::Presenters::RowRandomizationPresenter`
|
|
270
|
+
- `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
|
|
271
|
+
- `Interface::CLI::Prompts::*`
|
|
272
|
+
- `Interface::CLI::Errors::Presenter`
|
|
273
|
+
|
|
274
|
+
```mermaid
|
|
275
|
+
classDiagram
|
|
276
|
+
direction LR
|
|
277
|
+
class MenuLoop
|
|
278
|
+
class RunRowRandomizationWorkflow
|
|
279
|
+
class Prompts
|
|
280
|
+
class Errors
|
|
281
|
+
class RunRowRandomization
|
|
282
|
+
class RandomizationSession
|
|
283
|
+
class RandomizationSource
|
|
284
|
+
class RandomizationOptions
|
|
285
|
+
class OutputDestination
|
|
286
|
+
class HeaderReader
|
|
287
|
+
class RowRandomizer
|
|
288
|
+
class CsvRandomizedRowFileWriter
|
|
289
|
+
|
|
290
|
+
MenuLoop --> RunRowRandomizationWorkflow : invokes
|
|
291
|
+
RunRowRandomizationWorkflow --> Prompts : uses
|
|
292
|
+
RunRowRandomizationWorkflow --> Errors : reports failures
|
|
293
|
+
RunRowRandomizationWorkflow --> RunRowRandomization : calls
|
|
294
|
+
RunRowRandomization --> RandomizationSession : orchestrates
|
|
295
|
+
RandomizationSession o-- RandomizationSource
|
|
296
|
+
RandomizationSession o-- RandomizationOptions
|
|
297
|
+
RandomizationSession o-- OutputDestination
|
|
298
|
+
RunRowRandomization --> HeaderReader
|
|
299
|
+
RunRowRandomization --> RowRandomizer
|
|
300
|
+
RunRowRandomization --> CsvRandomizedRowFileWriter
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
### Cross-CSV Dedupe
|
|
304
|
+
|
|
305
|
+
Core DDD structure:
|
|
306
|
+
|
|
307
|
+
- Aggregate root: `CrossCsvDedupeSession`
|
|
308
|
+
- Captures one dedupe request with source/reference profiles, key mapping, match options, and output destination.
|
|
309
|
+
- Entities:
|
|
310
|
+
- `CsvProfile` (path + separator + header mode) for source and reference CSVs.
|
|
311
|
+
- `KeyMapping` (source selector + reference selector).
|
|
312
|
+
- Value objects:
|
|
313
|
+
- `ColumnSelector` (header name or 1-based index mode)
|
|
314
|
+
- `MatchOptions` (`trim_whitespace`, `case_insensitive`, plus normalization behavior)
|
|
315
|
+
- Shared `OutputDestination` (`console` or `file(path)`)
|
|
316
|
+
- Application service:
|
|
317
|
+
- `Application::UseCases::RunCrossCsvDedupe` orchestrates dedupe workflow.
|
|
318
|
+
- Infrastructure adapters:
|
|
319
|
+
- `Infrastructure::CSV::HeaderReader`
|
|
320
|
+
- `Infrastructure::CSV::SelectorValidator`
|
|
321
|
+
- `Infrastructure::CSV::CrossCsvDeduper` (streams source rows while checking membership against reference key set)
|
|
322
|
+
- `Infrastructure::Output::CsvCrossCsvDedupeFileWriter`
|
|
323
|
+
- Interface adapters:
|
|
324
|
+
- `Interface::CLI::MenuLoop`
|
|
325
|
+
- `Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow`
|
|
326
|
+
- `Interface::CLI::Workflows::Builders::CrossCsvDedupeSessionBuilder`
|
|
327
|
+
- `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
|
|
328
|
+
- `Interface::CLI::Workflows::Steps::CrossCsvDedupe::*`
|
|
329
|
+
- `Interface::CLI::Workflows::Presenters::CrossCsvDedupePresenter`
|
|
330
|
+
- `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
|
|
331
|
+
- `Interface::CLI::Prompts::*`
|
|
332
|
+
- `Interface::CLI::Errors::Presenter`
|
|
333
|
+
|
|
334
|
+
```mermaid
|
|
335
|
+
classDiagram
|
|
336
|
+
direction LR
|
|
337
|
+
class MenuLoop
|
|
338
|
+
class RunCrossCsvDedupeWorkflow
|
|
339
|
+
class Prompts
|
|
340
|
+
class Errors
|
|
341
|
+
class RunCrossCsvDedupe
|
|
342
|
+
class CrossCsvDedupeSession
|
|
343
|
+
class CsvProfile
|
|
344
|
+
class KeyMapping
|
|
345
|
+
class ColumnSelector
|
|
346
|
+
class MatchOptions
|
|
347
|
+
class OutputDestination
|
|
348
|
+
class HeaderReader
|
|
349
|
+
class SelectorValidator
|
|
350
|
+
class CrossCsvDeduper
|
|
351
|
+
class CsvCrossCsvDedupeFileWriter
|
|
352
|
+
|
|
353
|
+
MenuLoop --> RunCrossCsvDedupeWorkflow : invokes
|
|
354
|
+
Prompts --> RunCrossCsvDedupeWorkflow : provides input
|
|
355
|
+
RunCrossCsvDedupeWorkflow --> Errors : reports failures
|
|
356
|
+
RunCrossCsvDedupeWorkflow --> RunCrossCsvDedupe : calls
|
|
357
|
+
RunCrossCsvDedupe --> CrossCsvDedupeSession : orchestrates
|
|
358
|
+
CrossCsvDedupeSession o-- CsvProfile
|
|
359
|
+
CrossCsvDedupeSession o-- KeyMapping
|
|
360
|
+
KeyMapping o-- ColumnSelector
|
|
361
|
+
CrossCsvDedupeSession o-- MatchOptions
|
|
362
|
+
CrossCsvDedupeSession o-- OutputDestination
|
|
363
|
+
RunCrossCsvDedupe --> HeaderReader
|
|
364
|
+
RunCrossCsvDedupe --> SelectorValidator
|
|
365
|
+
RunCrossCsvDedupe --> CrossCsvDeduper
|
|
366
|
+
RunCrossCsvDedupe --> CsvCrossCsvDedupeFileWriter
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
## Project layout
|
|
370
|
+
|
|
371
|
+
```text
|
|
372
|
+
bin/tool # CLI entrypoint
|
|
373
|
+
lib/csvtool/cli.rb
|
|
374
|
+
lib/csvtool/domain/column_session/*
|
|
375
|
+
lib/csvtool/domain/row_session/*
|
|
376
|
+
lib/csvtool/domain/row_randomization_session/*
|
|
377
|
+
lib/csvtool/domain/cross_csv_dedupe_session/*
|
|
378
|
+
lib/csvtool/domain/shared/output_destination.rb
|
|
379
|
+
lib/csvtool/application/use_cases/run_extraction.rb
|
|
380
|
+
lib/csvtool/application/use_cases/run_row_extraction.rb
|
|
381
|
+
lib/csvtool/application/use_cases/run_row_randomization.rb
|
|
382
|
+
lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb
|
|
383
|
+
lib/csvtool/infrastructure/csv/*
|
|
384
|
+
lib/csvtool/infrastructure/output/*
|
|
385
|
+
lib/csvtool/interface/cli/menu_loop.rb
|
|
386
|
+
lib/csvtool/interface/cli/workflows/*
|
|
387
|
+
lib/csvtool/interface/cli/workflows/builders/*
|
|
388
|
+
lib/csvtool/interface/cli/workflows/support/*
|
|
389
|
+
lib/csvtool/interface/cli/workflows/presenters/*
|
|
390
|
+
lib/csvtool/interface/cli/workflows/steps/*
|
|
391
|
+
lib/csvtool/interface/cli/prompts/*
|
|
392
|
+
lib/csvtool/interface/cli/errors/presenter.rb
|
|
393
|
+
test/csvtool/cli_test.rb # end-to-end workflow tests
|
|
394
|
+
test/csvtool/**/*_test.rb # focused unit tests by component folder
|
|
395
|
+
test/test_helper.rb
|
|
396
|
+
```
|