csvops 0.4.0.alpha → 0.5.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -9
- data/docs/architecture.md +148 -18
- data/docs/release-v0.5.0-alpha.md +89 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +17 -14
- data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
- data/lib/csvtool/cli.rb +6 -6
- data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
- data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
- data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +48 -125
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
- data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
- data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +28 -0
- data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
- data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
- data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
- data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
- data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
- data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
- data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
- data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
- data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
- data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
- data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
- data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
- metadata +60 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b96fb7e03fa0629d3412a97d3abff5414492ac46ad08ede2c872e2176fcbfc62
|
|
4
|
+
data.tar.gz: 856b7735a472b5810d5f19dff6371a565a7fcc538ce5b6eba52260fff0028760
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5f643d331c6b54cb5feb0fe5db4ff7f8f7bc5c28461f74e3bfca5cf93d25703b84f497e72377302874b2b6302ef0fb542995c72d2d21798e3a998f6d5b294704
|
|
7
|
+
data.tar.gz: 0e254fa75780ce0605054c24b28301d8786535a0f2bbff7adfb45a75f09e60e5315e950648208fa5772d08cdd6abce95ea382838f568947af05ceaa77ba1888f
|
data/README.md
CHANGED
|
@@ -45,14 +45,20 @@ Select `1` for column extraction, `2` for row-range extraction, `3` for row rand
|
|
|
45
45
|
|
|
46
46
|
### 3. Follow prompts
|
|
47
47
|
|
|
48
|
-
|
|
48
|
+
Each menu action runs through a dedicated CLI workflow (`interface/cli/workflows/*`) that handles prompts/output and delegates execution to an interface-agnostic application use case.
|
|
49
49
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
-
|
|
53
|
-
-
|
|
54
|
-
-
|
|
55
|
-
|
|
50
|
+
Workflow internals are split into small composable parts:
|
|
51
|
+
|
|
52
|
+
- `workflows/builders/*` for session construction
|
|
53
|
+
- `workflows/support/*` for shared mapping/dispatch utilities
|
|
54
|
+
- `workflows/presenters/*` for output formatting and summaries
|
|
55
|
+
|
|
56
|
+
Prompt flow by action:
|
|
57
|
+
|
|
58
|
+
- `Extract column`: file path, separator, optional header filter + column select, skip blanks, preview/confirm, output destination.
|
|
59
|
+
- `Extract rows (range)`: file path, separator, start row, end row, output destination.
|
|
60
|
+
- `Randomize rows`: file path, separator, headers present, optional seed, output destination.
|
|
61
|
+
- `Dedupe using another CSV`: source/reference files, separators, header modes, key selectors, match options, output destination.
|
|
56
62
|
|
|
57
63
|
### 4. Example interaction (console output)
|
|
58
64
|
|
|
@@ -176,7 +182,7 @@ bundle exec rake test
|
|
|
176
182
|
|
|
177
183
|
## Alpha release
|
|
178
184
|
|
|
179
|
-
Current prerelease version: `0.
|
|
185
|
+
Current prerelease version: `0.5.0.alpha`
|
|
180
186
|
|
|
181
187
|
Install prerelease from RubyGems:
|
|
182
188
|
|
|
@@ -186,7 +192,7 @@ gem install csvops --pre
|
|
|
186
192
|
|
|
187
193
|
Release runbook:
|
|
188
194
|
|
|
189
|
-
- `docs/release-v0.
|
|
195
|
+
- `docs/release-v0.5.0-alpha.md`
|
|
190
196
|
|
|
191
197
|
|
|
192
198
|
## Architecture
|
data/docs/architecture.md
CHANGED
|
@@ -8,6 +8,103 @@ The codebase follows a DDD-lite layered structure:
|
|
|
8
8
|
- `interface/cli/`: menu, prompts, workflows, and user-facing error presentation.
|
|
9
9
|
- `Csvtool::CLI`: entrypoint wiring from command args to interface/application flow.
|
|
10
10
|
|
|
11
|
+
## Workflow boundary (standardized)
|
|
12
|
+
|
|
13
|
+
For all interactive domains (`Column Extraction`, `Row Extraction`, `Row Randomization`, `Cross-CSV Dedupe`), the boundary is:
|
|
14
|
+
|
|
15
|
+
- `interface/cli/workflows/*`: owns prompts, stdout rendering, and user-facing error presentation.
|
|
16
|
+
- `interface/cli/workflows/builders/*`: builds domain sessions/aggregates from prompt results.
|
|
17
|
+
- `interface/cli/workflows/support/*`: shared workflow utilities (error routing, output destination mapping).
|
|
18
|
+
- `interface/cli/workflows/presenters/*`: workflow-level output/summary rendering.
|
|
19
|
+
- `interface/cli/workflows/steps/*`: optional step-pipeline units for complex workflow orchestration.
|
|
20
|
+
- `application/use_cases/*`: interface-agnostic orchestration with request/result style contracts.
|
|
21
|
+
- `domain/*`: invariants and domain policies.
|
|
22
|
+
- `infrastructure/*`: CSV mechanics and output adapters.
|
|
23
|
+
|
|
24
|
+
Write-boundary rule:
|
|
25
|
+
- Use cases coordinate write paths but do not perform direct file writes.
|
|
26
|
+
- Direct write APIs (`CSV.open`, writable `File.open`, `File.write`, `IO.write`) are infrastructure-only.
|
|
27
|
+
- File output behavior is implemented in `infrastructure/output/*` writer adapters.
|
|
28
|
+
|
|
29
|
+
Current usage:
|
|
30
|
+
|
|
31
|
+
- `RunExtractionWorkflow` uses `WorkflowStepPipeline` + `Steps::Extraction::*`.
|
|
32
|
+
- `RunRowExtractionWorkflow` uses `WorkflowStepPipeline` + `Steps::RowExtraction::*`.
|
|
33
|
+
- `RunRowRandomizationWorkflow` uses `WorkflowStepPipeline` + `Steps::RowRandomization::*`.
|
|
34
|
+
- `RunCrossCsvDedupeWorkflow` uses `WorkflowStepPipeline` + `Steps::CrossCsvDedupe::*`.
|
|
35
|
+
|
|
36
|
+
## Adding New Concepts
|
|
37
|
+
|
|
38
|
+
Use this checklist when introducing a new capability (for example: a new transformation function, validator, comparer, or exporter).
|
|
39
|
+
|
|
40
|
+
### 1) Classify the concept first
|
|
41
|
+
|
|
42
|
+
- `Workflow concept`: interactive flow and prompt sequence.
|
|
43
|
+
- `Domain concept`: business rule/invariant and core vocabulary.
|
|
44
|
+
- `Application concept`: use-case orchestration and request/result contract.
|
|
45
|
+
- `Infrastructure concept`: file/CSV mechanics, streaming, persistence, or external IO.
|
|
46
|
+
|
|
47
|
+
If it does not clearly fit one layer, split it until each part has one responsibility.
|
|
48
|
+
|
|
49
|
+
### 2) Add the feature vertically (thin slice)
|
|
50
|
+
|
|
51
|
+
Implement in this order:
|
|
52
|
+
|
|
53
|
+
1. `interface/cli/workflows/*`: new workflow entry or new branch in an existing workflow.
|
|
54
|
+
2. `interface/cli/prompts/*`: prompts for user inputs.
|
|
55
|
+
3. `interface/cli/workflows/builders/*`: build domain session/request objects.
|
|
56
|
+
4. `application/use_cases/*`: interface-agnostic use case with `Result` success/failure.
|
|
57
|
+
5. `domain/*`: new entities/value objects/aggregate changes for invariants.
|
|
58
|
+
6. `infrastructure/*`: adapters needed by the use case.
|
|
59
|
+
7. `interface/cli/workflows/presenters/*`: output and summaries.
|
|
60
|
+
|
|
61
|
+
Keep each step testable on its own before moving to the next.
|
|
62
|
+
|
|
63
|
+
### 3) Function type patterns
|
|
64
|
+
|
|
65
|
+
For a new function type, prefer one of these patterns:
|
|
66
|
+
|
|
67
|
+
- `Transform` (changes output rows/values):
|
|
68
|
+
- Domain: transformation options/value objects.
|
|
69
|
+
- Application: orchestrate transform over streamed rows.
|
|
70
|
+
- Infrastructure: stream reader/writer implementation.
|
|
71
|
+
- `Validate` (checks and reports findings):
|
|
72
|
+
- Domain: validation policy and finding model.
|
|
73
|
+
- Application: run checks and return findings in result data.
|
|
74
|
+
- Presenter: format findings and summary.
|
|
75
|
+
- `Compare` (source vs reference logic):
|
|
76
|
+
- Domain: mapping/selectors/match options.
|
|
77
|
+
- Application: compare strategy and stats.
|
|
78
|
+
- Infrastructure: dual-source readers and selector helpers.
|
|
79
|
+
- `Export` (destination-focused):
|
|
80
|
+
- Domain: output destination value object.
|
|
81
|
+
- Application: orchestrate write path only.
|
|
82
|
+
- Infrastructure: writer adapter.
|
|
83
|
+
|
|
84
|
+
### 4) Required boundaries and rules
|
|
85
|
+
|
|
86
|
+
- Workflows do not contain business rules.
|
|
87
|
+
- Use cases do not prompt or print.
|
|
88
|
+
- Domain does not depend on interface or infrastructure.
|
|
89
|
+
- Infrastructure does not own workflow decisions.
|
|
90
|
+
- Shared workflow helpers belong under `workflows/support/*`.
|
|
91
|
+
- Reusable construction logic belongs under `workflows/builders/*`.
|
|
92
|
+
- Rendering/summary formatting belongs under `workflows/presenters/*`.
|
|
93
|
+
|
|
94
|
+
### 5) Minimum tests for each new concept
|
|
95
|
+
|
|
96
|
+
- Prompt tests for each new prompt class.
|
|
97
|
+
- Builder tests for each new builder class.
|
|
98
|
+
- Use-case tests for request/result behavior.
|
|
99
|
+
- Workflow behavior tests for prompt + output integration.
|
|
100
|
+
- One end-to-end CLI test for the happy path.
|
|
101
|
+
|
|
102
|
+
### 6) Naming and structure guidance
|
|
103
|
+
|
|
104
|
+
- Prefer domain-first names (`RowRange`, `ColumnSelection`, `MatchOptions`) over technical names.
|
|
105
|
+
- Use `Run<Concept>` for use cases and `Run<Concept>Workflow` for workflows.
|
|
106
|
+
- Keep one file per class and mirror structure under `test/csvtool/...`.
|
|
107
|
+
|
|
11
108
|
## Domain model
|
|
12
109
|
|
|
13
110
|
Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, and `Cross-CSV Dedupe`.
|
|
@@ -37,7 +134,7 @@ Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, an
|
|
|
37
134
|
- `ExtractionValue`
|
|
38
135
|
- Shared `OutputDestination` (`console` or `file(path)`)
|
|
39
136
|
- Application service:
|
|
40
|
-
- `Application::UseCases::RunExtraction`
|
|
137
|
+
- `Application::UseCases::RunExtraction` is interface-agnostic and exposes request/result operations.
|
|
41
138
|
- Infrastructure adapters:
|
|
42
139
|
- `Infrastructure::CSV::HeaderReader`
|
|
43
140
|
- `Infrastructure::CSV::ValueStreamer`
|
|
@@ -45,6 +142,12 @@ Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, an
|
|
|
45
142
|
- `Infrastructure::Output::CsvFileWriter`
|
|
46
143
|
- Interface adapters:
|
|
47
144
|
- `Interface::CLI::MenuLoop`
|
|
145
|
+
- `Interface::CLI::Workflows::RunExtractionWorkflow`
|
|
146
|
+
- `Interface::CLI::Workflows::Builders::ColumnSessionBuilder`
|
|
147
|
+
- `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
|
|
148
|
+
- `Interface::CLI::Workflows::Steps::Extraction::*`
|
|
149
|
+
- `Interface::CLI::Workflows::Presenters::ColumnExtractionPresenter`
|
|
150
|
+
- `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
|
|
48
151
|
- `Interface::CLI::Prompts::*`
|
|
49
152
|
- `Interface::CLI::Errors::Presenter`
|
|
50
153
|
|
|
@@ -52,6 +155,7 @@ Bounded contexts: `Column Extraction`, `Row Extraction`, `Row Randomization`, an
|
|
|
52
155
|
classDiagram
|
|
53
156
|
direction LR
|
|
54
157
|
class MenuLoop
|
|
158
|
+
class RunExtractionWorkflow
|
|
55
159
|
class Prompts
|
|
56
160
|
class Errors
|
|
57
161
|
class RunExtraction
|
|
@@ -64,12 +168,12 @@ classDiagram
|
|
|
64
168
|
class OutputDestination
|
|
65
169
|
class HeaderReader
|
|
66
170
|
class ValueStreamer
|
|
67
|
-
class ConsoleWriter
|
|
68
171
|
class CsvFileWriter
|
|
69
172
|
|
|
70
|
-
MenuLoop -->
|
|
71
|
-
|
|
72
|
-
|
|
173
|
+
MenuLoop --> RunExtractionWorkflow : invokes
|
|
174
|
+
RunExtractionWorkflow --> Prompts : uses
|
|
175
|
+
RunExtractionWorkflow --> Errors : reports failures
|
|
176
|
+
RunExtractionWorkflow --> RunExtraction : calls
|
|
73
177
|
RunExtraction --> ColumnSession : orchestrates
|
|
74
178
|
ColumnSession o-- CsvSource
|
|
75
179
|
ColumnSession o-- ColumnSelection
|
|
@@ -79,7 +183,6 @@ classDiagram
|
|
|
79
183
|
ColumnSession o-- OutputDestination
|
|
80
184
|
RunExtraction --> HeaderReader
|
|
81
185
|
RunExtraction --> ValueStreamer
|
|
82
|
-
RunExtraction --> ConsoleWriter
|
|
83
186
|
RunExtraction --> CsvFileWriter
|
|
84
187
|
```
|
|
85
188
|
|
|
@@ -96,14 +199,19 @@ Core DDD structure:
|
|
|
96
199
|
- `RowRange` (`start_row`, `end_row`) plus row-range validation errors
|
|
97
200
|
- Shared `OutputDestination` (`console` or `file(path)`)
|
|
98
201
|
- Application service:
|
|
99
|
-
- `Application::UseCases::RunRowExtraction`
|
|
202
|
+
- `Application::UseCases::RunRowExtraction` is interface-agnostic and exposes request/result operations.
|
|
100
203
|
- Infrastructure adapters:
|
|
101
204
|
- `Infrastructure::CSV::HeaderReader`
|
|
102
205
|
- `Infrastructure::CSV::RowStreamer`
|
|
103
|
-
- `Infrastructure::Output::CsvRowConsoleWriter`
|
|
104
206
|
- `Infrastructure::Output::CsvRowFileWriter`
|
|
105
207
|
- Interface adapters:
|
|
106
208
|
- `Interface::CLI::MenuLoop`
|
|
209
|
+
- `Interface::CLI::Workflows::RunRowExtractionWorkflow`
|
|
210
|
+
- `Interface::CLI::Workflows::Builders::RowExtractionSessionBuilder`
|
|
211
|
+
- `Interface::CLI::Workflows::Presenters::RowExtractionPresenter`
|
|
212
|
+
- `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
|
|
213
|
+
- `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
|
|
214
|
+
- `Interface::CLI::Workflows::Steps::RowExtraction::*`
|
|
107
215
|
- `Interface::CLI::Prompts::*`
|
|
108
216
|
- `Interface::CLI::Errors::Presenter`
|
|
109
217
|
|
|
@@ -111,6 +219,7 @@ Core DDD structure:
|
|
|
111
219
|
classDiagram
|
|
112
220
|
direction LR
|
|
113
221
|
class MenuLoop
|
|
222
|
+
class RunRowExtractionWorkflow
|
|
114
223
|
class Prompts
|
|
115
224
|
class Errors
|
|
116
225
|
class RunRowExtraction
|
|
@@ -120,19 +229,17 @@ classDiagram
|
|
|
120
229
|
class OutputDestination
|
|
121
230
|
class HeaderReader
|
|
122
231
|
class RowStreamer
|
|
123
|
-
class CsvRowConsoleWriter
|
|
124
232
|
class CsvRowFileWriter
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
233
|
+
MenuLoop --> RunRowExtractionWorkflow : invokes
|
|
234
|
+
RunRowExtractionWorkflow --> Prompts : uses
|
|
235
|
+
RunRowExtractionWorkflow --> Errors : reports failures
|
|
236
|
+
RunRowExtractionWorkflow --> RunRowExtraction : calls
|
|
129
237
|
RunRowExtraction --> RowSession : orchestrates
|
|
130
238
|
RowSession o-- RowSource
|
|
131
239
|
RowSession o-- RowRange
|
|
132
240
|
RowSession o-- OutputDestination
|
|
133
241
|
RunRowExtraction --> HeaderReader
|
|
134
242
|
RunRowExtraction --> RowStreamer
|
|
135
|
-
RunRowExtraction --> CsvRowConsoleWriter
|
|
136
243
|
RunRowExtraction --> CsvRowFileWriter
|
|
137
244
|
```
|
|
138
245
|
|
|
@@ -148,12 +255,19 @@ Core DDD structure:
|
|
|
148
255
|
- `RandomizationOptions` (optional deterministic `seed`)
|
|
149
256
|
- Shared `OutputDestination` (`console` or `file(path)`)
|
|
150
257
|
- Application service:
|
|
151
|
-
- `Application::UseCases::RunRowRandomization`
|
|
258
|
+
- `Application::UseCases::RunRowRandomization` is interface-agnostic and exposes request/result operations.
|
|
152
259
|
- Infrastructure adapters:
|
|
153
260
|
- `Infrastructure::CSV::HeaderReader`
|
|
154
261
|
- `Infrastructure::CSV::RowRandomizer` (external chunked `RAND + sort` + merge)
|
|
262
|
+
- `Infrastructure::Output::CsvRandomizedRowFileWriter`
|
|
155
263
|
- Interface adapters:
|
|
156
264
|
- `Interface::CLI::MenuLoop`
|
|
265
|
+
- `Interface::CLI::Workflows::RunRowRandomizationWorkflow`
|
|
266
|
+
- `Interface::CLI::Workflows::Builders::RowRandomizationSessionBuilder`
|
|
267
|
+
- `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
|
|
268
|
+
- `Interface::CLI::Workflows::Steps::RowRandomization::*`
|
|
269
|
+
- `Interface::CLI::Workflows::Presenters::RowRandomizationPresenter`
|
|
270
|
+
- `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
|
|
157
271
|
- `Interface::CLI::Prompts::*`
|
|
158
272
|
- `Interface::CLI::Errors::Presenter`
|
|
159
273
|
|
|
@@ -161,6 +275,7 @@ Core DDD structure:
|
|
|
161
275
|
classDiagram
|
|
162
276
|
direction LR
|
|
163
277
|
class MenuLoop
|
|
278
|
+
class RunRowRandomizationWorkflow
|
|
164
279
|
class Prompts
|
|
165
280
|
class Errors
|
|
166
281
|
class RunRowRandomization
|
|
@@ -170,16 +285,19 @@ classDiagram
|
|
|
170
285
|
class OutputDestination
|
|
171
286
|
class HeaderReader
|
|
172
287
|
class RowRandomizer
|
|
288
|
+
class CsvRandomizedRowFileWriter
|
|
173
289
|
|
|
174
|
-
MenuLoop -->
|
|
175
|
-
|
|
176
|
-
|
|
290
|
+
MenuLoop --> RunRowRandomizationWorkflow : invokes
|
|
291
|
+
RunRowRandomizationWorkflow --> Prompts : uses
|
|
292
|
+
RunRowRandomizationWorkflow --> Errors : reports failures
|
|
293
|
+
RunRowRandomizationWorkflow --> RunRowRandomization : calls
|
|
177
294
|
RunRowRandomization --> RandomizationSession : orchestrates
|
|
178
295
|
RandomizationSession o-- RandomizationSource
|
|
179
296
|
RandomizationSession o-- RandomizationOptions
|
|
180
297
|
RandomizationSession o-- OutputDestination
|
|
181
298
|
RunRowRandomization --> HeaderReader
|
|
182
299
|
RunRowRandomization --> RowRandomizer
|
|
300
|
+
RunRowRandomization --> CsvRandomizedRowFileWriter
|
|
183
301
|
```
|
|
184
302
|
|
|
185
303
|
### Cross-CSV Dedupe
|
|
@@ -201,9 +319,15 @@ Core DDD structure:
|
|
|
201
319
|
- `Infrastructure::CSV::HeaderReader`
|
|
202
320
|
- `Infrastructure::CSV::SelectorValidator`
|
|
203
321
|
- `Infrastructure::CSV::CrossCsvDeduper` (streams source rows while checking membership against reference key set)
|
|
322
|
+
- `Infrastructure::Output::CsvCrossCsvDedupeFileWriter`
|
|
204
323
|
- Interface adapters:
|
|
205
324
|
- `Interface::CLI::MenuLoop`
|
|
206
325
|
- `Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow`
|
|
326
|
+
- `Interface::CLI::Workflows::Builders::CrossCsvDedupeSessionBuilder`
|
|
327
|
+
- `Interface::CLI::Workflows::Steps::WorkflowStepPipeline`
|
|
328
|
+
- `Interface::CLI::Workflows::Steps::CrossCsvDedupe::*`
|
|
329
|
+
- `Interface::CLI::Workflows::Presenters::CrossCsvDedupePresenter`
|
|
330
|
+
- `Interface::CLI::Workflows::Support::{OutputDestinationMapper,ResultErrorHandler}`
|
|
207
331
|
- `Interface::CLI::Prompts::*`
|
|
208
332
|
- `Interface::CLI::Errors::Presenter`
|
|
209
333
|
|
|
@@ -224,6 +348,7 @@ classDiagram
|
|
|
224
348
|
class HeaderReader
|
|
225
349
|
class SelectorValidator
|
|
226
350
|
class CrossCsvDeduper
|
|
351
|
+
class CsvCrossCsvDedupeFileWriter
|
|
227
352
|
|
|
228
353
|
MenuLoop --> RunCrossCsvDedupeWorkflow : invokes
|
|
229
354
|
Prompts --> RunCrossCsvDedupeWorkflow : provides input
|
|
@@ -238,6 +363,7 @@ classDiagram
|
|
|
238
363
|
RunCrossCsvDedupe --> HeaderReader
|
|
239
364
|
RunCrossCsvDedupe --> SelectorValidator
|
|
240
365
|
RunCrossCsvDedupe --> CrossCsvDeduper
|
|
366
|
+
RunCrossCsvDedupe --> CsvCrossCsvDedupeFileWriter
|
|
241
367
|
```
|
|
242
368
|
|
|
243
369
|
## Project layout
|
|
@@ -258,6 +384,10 @@ lib/csvtool/infrastructure/csv/*
|
|
|
258
384
|
lib/csvtool/infrastructure/output/*
|
|
259
385
|
lib/csvtool/interface/cli/menu_loop.rb
|
|
260
386
|
lib/csvtool/interface/cli/workflows/*
|
|
387
|
+
lib/csvtool/interface/cli/workflows/builders/*
|
|
388
|
+
lib/csvtool/interface/cli/workflows/support/*
|
|
389
|
+
lib/csvtool/interface/cli/workflows/presenters/*
|
|
390
|
+
lib/csvtool/interface/cli/workflows/steps/*
|
|
261
391
|
lib/csvtool/interface/cli/prompts/*
|
|
262
392
|
lib/csvtool/interface/cli/errors/presenter.rb
|
|
263
393
|
test/csvtool/cli_test.rb # end-to-end workflow tests
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# Release Checklist: v0.5.0-alpha
|
|
2
|
+
|
|
3
|
+
## 1. Verify environment
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
ruby -v
|
|
7
|
+
bundle -v
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
Expected:
|
|
11
|
+
- Ruby `3.3.x`
|
|
12
|
+
|
|
13
|
+
## 2. Install dependencies
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
bundle install
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## 3. Run quality checks
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
bundle exec rake test
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## 4. Smoke test CLI commands
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
bundle exec csvtool menu
|
|
29
|
+
bundle exec csvtool column test/fixtures/sample_people.csv name
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## 5. Smoke test workflows
|
|
33
|
+
|
|
34
|
+
### Row extraction workflow
|
|
35
|
+
|
|
36
|
+
Use menu option `2` (`Extract rows (range)`) and verify:
|
|
37
|
+
- headered CSV rows print correctly in console mode
|
|
38
|
+
- out-of-bounds row range shows friendly message
|
|
39
|
+
- file output mode writes expected CSV rows
|
|
40
|
+
|
|
41
|
+
### Row randomization workflow
|
|
42
|
+
|
|
43
|
+
Use menu option `3` (`Randomize rows`) and verify:
|
|
44
|
+
- seeded mode is reproducible
|
|
45
|
+
- headered and headerless modes both work
|
|
46
|
+
- file output path writes valid randomized CSV
|
|
47
|
+
|
|
48
|
+
### Cross-CSV dedupe workflow
|
|
49
|
+
|
|
50
|
+
Use menu option `4` (`Dedupe using another CSV`) and verify:
|
|
51
|
+
- expected retained rows for headered source/reference files
|
|
52
|
+
- separator/header-mode combinations still work
|
|
53
|
+
- file output mode writes expected deduped CSV
|
|
54
|
+
|
|
55
|
+
## 6. Build and validate gem package
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
gem build csvops.gemspec
|
|
59
|
+
gem install ./csvops-0.5.0.alpha.gem
|
|
60
|
+
csvtool menu
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## 7. Commit release prep
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
git add -A
|
|
67
|
+
git commit -m "chore(release): prepare v0.5.0-alpha"
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## 8. Tag release
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
git tag -a v0.5.0-alpha -m "v0.5.0-alpha"
|
|
74
|
+
git push origin main --tags
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## 9. Publish gem (optional for alpha)
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
gem push csvops-0.5.0.alpha.gem
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## 10. Create GitHub release
|
|
84
|
+
|
|
85
|
+
Create release `v0.5.0-alpha` with:
|
|
86
|
+
- Use-case file-write boundary cleanup across all workflows
|
|
87
|
+
- New infrastructure file-writer adapters for row randomization and cross-CSV dedupe
|
|
88
|
+
- Final architecture boundary audit with guard test for direct write APIs in use cases
|
|
89
|
+
- Updated architecture diagrams to reflect current writer adapter dependencies
|
|
@@ -4,6 +4,7 @@ require "csv"
|
|
|
4
4
|
require "csvtool/infrastructure/csv/header_reader"
|
|
5
5
|
require "csvtool/infrastructure/csv/cross_csv_deduper"
|
|
6
6
|
require "csvtool/infrastructure/csv/selector_validator"
|
|
7
|
+
require "csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer"
|
|
7
8
|
|
|
8
9
|
module Csvtool
|
|
9
10
|
module Application
|
|
@@ -18,11 +19,15 @@ module Csvtool
|
|
|
18
19
|
def initialize(
|
|
19
20
|
header_reader: Infrastructure::CSV::HeaderReader.new,
|
|
20
21
|
deduper: Infrastructure::CSV::CrossCsvDeduper.new,
|
|
21
|
-
selector_validator: Infrastructure::CSV::SelectorValidator.new(header_reader: header_reader)
|
|
22
|
+
selector_validator: Infrastructure::CSV::SelectorValidator.new(header_reader: header_reader),
|
|
23
|
+
csv_cross_csv_dedupe_file_writer: nil
|
|
22
24
|
)
|
|
23
25
|
@header_reader = header_reader
|
|
24
26
|
@deduper = deduper
|
|
25
27
|
@selector_validator = selector_validator
|
|
28
|
+
@csv_cross_csv_dedupe_file_writer = csv_cross_csv_dedupe_file_writer || Infrastructure::Output::CsvCrossCsvDedupeFileWriter.new(
|
|
29
|
+
deduper: @deduper
|
|
30
|
+
)
|
|
26
31
|
end
|
|
27
32
|
|
|
28
33
|
def call(session:, on_header: nil, on_row: nil)
|
|
@@ -46,26 +51,24 @@ module Csvtool
|
|
|
46
51
|
end
|
|
47
52
|
rescue CSV::MalformedCSVError
|
|
48
53
|
failure(:could_not_parse_csv)
|
|
49
|
-
rescue Errno::EACCES
|
|
50
|
-
|
|
54
|
+
rescue Errno::EACCES, Errno::ENOENT => e
|
|
55
|
+
if session.output_destination.file?
|
|
56
|
+
failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class)
|
|
57
|
+
else
|
|
58
|
+
failure(:cannot_read_file, path: current_read_path || session.source.path)
|
|
59
|
+
end
|
|
51
60
|
end
|
|
52
61
|
|
|
53
62
|
private
|
|
54
63
|
|
|
55
64
|
def write_file(session:, source_headers:)
|
|
56
|
-
stats =
|
|
57
|
-
|
|
58
|
-
session.output_destination.path,
|
|
59
|
-
"w",
|
|
60
|
-
write_headers: !source_headers.nil?,
|
|
65
|
+
stats = @csv_cross_csv_dedupe_file_writer.call(
|
|
66
|
+
path: session.output_destination.path,
|
|
61
67
|
headers: source_headers,
|
|
62
|
-
col_sep: session.source.separator
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
end
|
|
68
|
+
col_sep: session.source.separator,
|
|
69
|
+
dedupe_options: dedupe_options(session)
|
|
70
|
+
)
|
|
66
71
|
success(stats: stats, output_path: session.output_destination.path)
|
|
67
|
-
rescue Errno::EACCES, Errno::ENOENT => e
|
|
68
|
-
failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class)
|
|
69
72
|
end
|
|
70
73
|
|
|
71
74
|
def dedupe_options(session)
|