csvops 0.3.0.alpha → 0.5.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -149
- data/docs/architecture.md +396 -0
- data/docs/release-v0.4.0-alpha.md +87 -0
- data/docs/release-v0.5.0-alpha.md +89 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +96 -0
- data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
- data/lib/csvtool/cli.rb +11 -7
- data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
- data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +1 -0
- data/lib/csvtool/domain/row_session/row_source.rb +3 -0
- data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
- data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
- data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
- data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
- data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
- data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
- data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
- data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
- data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +141 -0
- data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
- data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
- data/test/csvtool/cli_test.rb +130 -16
- data/test/csvtool/cli_unit_test.rb +16 -3
- data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
- data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
- data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +2 -2
- data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +15 -1
- data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
- data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
- data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
- data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
- data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
- data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
- data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
- data/test/csvtool/interface/cli/menu_loop_test.rb +50 -13
- data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
- data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
- data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
- data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
- data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
- data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
- data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
- data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
- data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
- data/test/fixtures/dedupe_reference.csv +3 -0
- data/test/fixtures/dedupe_reference.tsv +3 -0
- data/test/fixtures/dedupe_reference_all.csv +5 -0
- data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
- data/test/fixtures/dedupe_reference_none.csv +2 -0
- data/test/fixtures/dedupe_reference_normalization.csv +3 -0
- data/test/fixtures/dedupe_source.csv +6 -0
- data/test/fixtures/dedupe_source.tsv +6 -0
- data/test/fixtures/dedupe_source_no_headers.csv +5 -0
- data/test/fixtures/dedupe_source_normalization.csv +4 -0
- metadata +93 -8
- data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +0 -31
- data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
- data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
- data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +0 -21
- data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow"
|
|
5
|
+
require "tmpdir"
|
|
6
|
+
|
|
7
|
+
class RunCrossCsvDedupeWorkflowTest < Minitest::Test
|
|
8
|
+
def fixture_path(name)
|
|
9
|
+
File.expand_path("../../../../fixtures/#{name}", __dir__)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def test_dedupes_source_rows_by_reference_column
|
|
13
|
+
output = StringIO.new
|
|
14
|
+
input = [
|
|
15
|
+
fixture_path("dedupe_source.csv"),
|
|
16
|
+
"",
|
|
17
|
+
"",
|
|
18
|
+
fixture_path("dedupe_reference.csv"),
|
|
19
|
+
"",
|
|
20
|
+
"",
|
|
21
|
+
"customer_id",
|
|
22
|
+
"external_id",
|
|
23
|
+
"",
|
|
24
|
+
"",
|
|
25
|
+
""
|
|
26
|
+
].join("\n") + "\n"
|
|
27
|
+
|
|
28
|
+
Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
|
|
29
|
+
.new(stdin: StringIO.new(input), stdout: output)
|
|
30
|
+
.call
|
|
31
|
+
|
|
32
|
+
assert_includes output.string, "CSV file path:"
|
|
33
|
+
assert_includes output.string, "Reference CSV file path:"
|
|
34
|
+
assert_includes output.string, "Source key column name:"
|
|
35
|
+
assert_includes output.string, "Reference key column name:"
|
|
36
|
+
assert_includes output.string, "customer_id,name"
|
|
37
|
+
assert_includes output.string, "1,Alice"
|
|
38
|
+
assert_includes output.string, "3,Cara"
|
|
39
|
+
refute_includes output.string, "2,Bob"
|
|
40
|
+
refute_includes output.string, "4,Dan"
|
|
41
|
+
assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def test_can_write_deduped_rows_to_file
|
|
45
|
+
output = StringIO.new
|
|
46
|
+
|
|
47
|
+
Dir.mktmpdir do |dir|
|
|
48
|
+
output_path = File.join(dir, "deduped.csv")
|
|
49
|
+
input = [
|
|
50
|
+
fixture_path("dedupe_source.csv"),
|
|
51
|
+
"",
|
|
52
|
+
"",
|
|
53
|
+
fixture_path("dedupe_reference.csv"),
|
|
54
|
+
"",
|
|
55
|
+
"",
|
|
56
|
+
"customer_id",
|
|
57
|
+
"external_id",
|
|
58
|
+
"",
|
|
59
|
+
"",
|
|
60
|
+
"2",
|
|
61
|
+
output_path
|
|
62
|
+
].join("\n") + "\n"
|
|
63
|
+
|
|
64
|
+
Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
|
|
65
|
+
.new(stdin: StringIO.new(input), stdout: output)
|
|
66
|
+
.call
|
|
67
|
+
|
|
68
|
+
assert_includes output.string, "Wrote output to #{output_path}"
|
|
69
|
+
assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
|
|
70
|
+
assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def test_supports_tsv_separators
|
|
75
|
+
output = StringIO.new
|
|
76
|
+
input = [
|
|
77
|
+
fixture_path("dedupe_source.tsv"),
|
|
78
|
+
"2",
|
|
79
|
+
"",
|
|
80
|
+
fixture_path("dedupe_reference.tsv"),
|
|
81
|
+
"2",
|
|
82
|
+
"",
|
|
83
|
+
"customer_id",
|
|
84
|
+
"external_id",
|
|
85
|
+
"",
|
|
86
|
+
"",
|
|
87
|
+
""
|
|
88
|
+
].join("\n") + "\n"
|
|
89
|
+
|
|
90
|
+
Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
|
|
91
|
+
.new(stdin: StringIO.new(input), stdout: output)
|
|
92
|
+
.call
|
|
93
|
+
|
|
94
|
+
assert_includes output.string, "customer_id\tname"
|
|
95
|
+
assert_includes output.string, "1\tAlice"
|
|
96
|
+
assert_includes output.string, "3\tCara"
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def test_headerless_mode_supports_column_index
|
|
100
|
+
output = StringIO.new
|
|
101
|
+
input = [
|
|
102
|
+
fixture_path("dedupe_source_no_headers.csv"),
|
|
103
|
+
"",
|
|
104
|
+
"n",
|
|
105
|
+
fixture_path("dedupe_reference_no_headers.csv"),
|
|
106
|
+
"",
|
|
107
|
+
"n",
|
|
108
|
+
"1",
|
|
109
|
+
"1",
|
|
110
|
+
"",
|
|
111
|
+
"",
|
|
112
|
+
""
|
|
113
|
+
].join("\n") + "\n"
|
|
114
|
+
|
|
115
|
+
Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
|
|
116
|
+
.new(stdin: StringIO.new(input), stdout: output)
|
|
117
|
+
.call
|
|
118
|
+
|
|
119
|
+
refute_includes output.string, "customer_id,name"
|
|
120
|
+
assert_includes output.string, "1,Alice"
|
|
121
|
+
assert_includes output.string, "3,Cara"
|
|
122
|
+
assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def test_reports_column_not_found_when_missing
|
|
126
|
+
output = StringIO.new
|
|
127
|
+
input = [
|
|
128
|
+
fixture_path("dedupe_source.csv"),
|
|
129
|
+
"",
|
|
130
|
+
"",
|
|
131
|
+
fixture_path("dedupe_reference.csv"),
|
|
132
|
+
"",
|
|
133
|
+
"",
|
|
134
|
+
"missing",
|
|
135
|
+
"external_id",
|
|
136
|
+
"",
|
|
137
|
+
""
|
|
138
|
+
].join("\n") + "\n"
|
|
139
|
+
|
|
140
|
+
Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
|
|
141
|
+
.new(stdin: StringIO.new(input), stdout: output)
|
|
142
|
+
.call
|
|
143
|
+
|
|
144
|
+
assert_includes output.string, "Column not found."
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def test_reports_when_no_rows_were_removed
|
|
148
|
+
output = StringIO.new
|
|
149
|
+
input = [
|
|
150
|
+
fixture_path("dedupe_source.csv"),
|
|
151
|
+
"",
|
|
152
|
+
"",
|
|
153
|
+
fixture_path("dedupe_reference_none.csv"),
|
|
154
|
+
"",
|
|
155
|
+
"",
|
|
156
|
+
"customer_id",
|
|
157
|
+
"external_id",
|
|
158
|
+
"",
|
|
159
|
+
"",
|
|
160
|
+
""
|
|
161
|
+
].join("\n") + "\n"
|
|
162
|
+
|
|
163
|
+
Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
|
|
164
|
+
.new(stdin: StringIO.new(input), stdout: output)
|
|
165
|
+
.call
|
|
166
|
+
|
|
167
|
+
assert_includes output.string, "Summary: source_rows=5 removed_rows=0 kept_rows=5"
|
|
168
|
+
assert_includes output.string, "No rows removed; no matching keys found."
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def test_reports_when_all_rows_were_removed
|
|
172
|
+
output = StringIO.new
|
|
173
|
+
input = [
|
|
174
|
+
fixture_path("dedupe_source.csv"),
|
|
175
|
+
"",
|
|
176
|
+
"",
|
|
177
|
+
fixture_path("dedupe_reference_all.csv"),
|
|
178
|
+
"",
|
|
179
|
+
"",
|
|
180
|
+
"customer_id",
|
|
181
|
+
"external_id",
|
|
182
|
+
"",
|
|
183
|
+
"",
|
|
184
|
+
""
|
|
185
|
+
].join("\n") + "\n"
|
|
186
|
+
|
|
187
|
+
Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
|
|
188
|
+
.new(stdin: StringIO.new(input), stdout: output)
|
|
189
|
+
.call
|
|
190
|
+
|
|
191
|
+
assert_includes output.string, "Summary: source_rows=5 removed_rows=5 kept_rows=0"
|
|
192
|
+
assert_includes output.string, "All source rows were removed by dedupe."
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def test_normalization_trim_on_and_case_insensitive_on_matches_equivalent_keys
|
|
196
|
+
output = StringIO.new
|
|
197
|
+
input = [
|
|
198
|
+
fixture_path("dedupe_source_normalization.csv"),
|
|
199
|
+
"",
|
|
200
|
+
"",
|
|
201
|
+
fixture_path("dedupe_reference_normalization.csv"),
|
|
202
|
+
"",
|
|
203
|
+
"",
|
|
204
|
+
"customer_id",
|
|
205
|
+
"external_id",
|
|
206
|
+
"",
|
|
207
|
+
"y",
|
|
208
|
+
""
|
|
209
|
+
].join("\n") + "\n"
|
|
210
|
+
|
|
211
|
+
Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
|
|
212
|
+
.new(stdin: StringIO.new(input), stdout: output)
|
|
213
|
+
.call
|
|
214
|
+
|
|
215
|
+
refute_includes output.string, " A1 ,Alice"
|
|
216
|
+
refute_includes output.string, "c3,Cara"
|
|
217
|
+
assert_includes output.string, "B2,Bob"
|
|
218
|
+
assert_includes output.string, "Summary: source_rows=3 removed_rows=2 kept_rows=1"
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
def test_normalization_disabled_preserves_exact_match_behavior
|
|
222
|
+
output = StringIO.new
|
|
223
|
+
input = [
|
|
224
|
+
fixture_path("dedupe_source_normalization.csv"),
|
|
225
|
+
"",
|
|
226
|
+
"",
|
|
227
|
+
fixture_path("dedupe_reference_normalization.csv"),
|
|
228
|
+
"",
|
|
229
|
+
"",
|
|
230
|
+
"customer_id",
|
|
231
|
+
"external_id",
|
|
232
|
+
"n",
|
|
233
|
+
"n",
|
|
234
|
+
""
|
|
235
|
+
].join("\n") + "\n"
|
|
236
|
+
|
|
237
|
+
Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
|
|
238
|
+
.new(stdin: StringIO.new(input), stdout: output)
|
|
239
|
+
.call
|
|
240
|
+
|
|
241
|
+
assert_includes output.string, " A1 ,Alice"
|
|
242
|
+
assert_includes output.string, "B2,Bob"
|
|
243
|
+
assert_includes output.string, "c3,Cara"
|
|
244
|
+
assert_includes output.string, "Summary: source_rows=3 removed_rows=0 kept_rows=3"
|
|
245
|
+
end
|
|
246
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/run_extraction_workflow"
|
|
5
|
+
require "tmpdir"
|
|
6
|
+
|
|
7
|
+
class RunExtractionWorkflowTest < Minitest::Test
|
|
8
|
+
def fixture_path(name)
|
|
9
|
+
File.expand_path("../../../../fixtures/#{name}", __dir__)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def test_missing_file_path_reports_error
|
|
13
|
+
out = StringIO.new
|
|
14
|
+
workflow = Csvtool::Interface::CLI::Workflows::RunExtractionWorkflow.new(
|
|
15
|
+
stdin: StringIO.new("/tmp/not-present.csv\n\n"),
|
|
16
|
+
stdout: out
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
workflow.call
|
|
20
|
+
|
|
21
|
+
assert_includes out.string, "File not found: /tmp/not-present.csv"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def test_workflow_can_run_console_happy_path
|
|
25
|
+
out = StringIO.new
|
|
26
|
+
fixture = fixture_path("sample_people.csv")
|
|
27
|
+
input = ["#{fixture}", "1", "", "1", "", "y", ""].join("\n") + "\n"
|
|
28
|
+
|
|
29
|
+
Csvtool::Interface::CLI::Workflows::RunExtractionWorkflow.new(
|
|
30
|
+
stdin: StringIO.new(input),
|
|
31
|
+
stdout: out
|
|
32
|
+
).call
|
|
33
|
+
|
|
34
|
+
assert_includes out.string, "Alice"
|
|
35
|
+
assert_includes out.string, "Bob"
|
|
36
|
+
assert_includes out.string, "Cara"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def test_workflow_can_write_output_file
|
|
40
|
+
out = StringIO.new
|
|
41
|
+
|
|
42
|
+
Dir.mktmpdir do |dir|
|
|
43
|
+
output_path = File.join(dir, "names.csv")
|
|
44
|
+
fixture = fixture_path("sample_people.csv")
|
|
45
|
+
input = ["#{fixture}", "1", "", "1", "", "y", "2", output_path].join("\n") + "\n"
|
|
46
|
+
|
|
47
|
+
Csvtool::Interface::CLI::Workflows::RunExtractionWorkflow.new(
|
|
48
|
+
stdin: StringIO.new(input),
|
|
49
|
+
stdout: out
|
|
50
|
+
).call
|
|
51
|
+
|
|
52
|
+
assert_includes out.string, "Wrote output to #{output_path}"
|
|
53
|
+
assert_equal "name\nAlice\nBob\nCara\n", File.read(output_path)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/run_row_extraction_workflow"
|
|
5
|
+
require "tmpdir"
|
|
6
|
+
|
|
7
|
+
class RunRowExtractionWorkflowTest < Minitest::Test
|
|
8
|
+
def fixture_path(name)
|
|
9
|
+
File.expand_path("../../../../fixtures/#{name}", __dir__)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def test_missing_file_path_reports_error
|
|
13
|
+
out = StringIO.new
|
|
14
|
+
workflow = Csvtool::Interface::CLI::Workflows::RunRowExtractionWorkflow.new(
|
|
15
|
+
stdin: StringIO.new("/tmp/not-present.csv\n\n"),
|
|
16
|
+
stdout: out
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
workflow.call
|
|
20
|
+
|
|
21
|
+
assert_includes out.string, "File not found: /tmp/not-present.csv"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def test_workflow_can_run_console_happy_path
|
|
25
|
+
out = StringIO.new
|
|
26
|
+
fixture = fixture_path("sample_people.csv")
|
|
27
|
+
input = [fixture, "", "2", "3", ""].join("\n") + "\n"
|
|
28
|
+
|
|
29
|
+
Csvtool::Interface::CLI::Workflows::RunRowExtractionWorkflow.new(
|
|
30
|
+
stdin: StringIO.new(input),
|
|
31
|
+
stdout: out
|
|
32
|
+
).call
|
|
33
|
+
|
|
34
|
+
assert_includes out.string, "name,city"
|
|
35
|
+
assert_includes out.string, "Bob,Paris"
|
|
36
|
+
assert_includes out.string, "Cara,Berlin"
|
|
37
|
+
refute_includes out.string, "Alice,London"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def test_workflow_can_write_output_file
|
|
41
|
+
out = StringIO.new
|
|
42
|
+
|
|
43
|
+
Dir.mktmpdir do |dir|
|
|
44
|
+
output_path = File.join(dir, "rows.csv")
|
|
45
|
+
fixture = fixture_path("sample_people.csv")
|
|
46
|
+
input = [fixture, "", "2", "3", "2", output_path].join("\n") + "\n"
|
|
47
|
+
|
|
48
|
+
Csvtool::Interface::CLI::Workflows::RunRowExtractionWorkflow.new(
|
|
49
|
+
stdin: StringIO.new(input),
|
|
50
|
+
stdout: out
|
|
51
|
+
).call
|
|
52
|
+
|
|
53
|
+
assert_includes out.string, "Wrote output to #{output_path}"
|
|
54
|
+
assert_equal "name,city\nBob,Paris\nCara,Berlin\n", File.read(output_path)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def test_rejects_non_numeric_start_row
|
|
59
|
+
out = StringIO.new
|
|
60
|
+
fixture = fixture_path("sample_people.csv")
|
|
61
|
+
input = [fixture, "", "abc", "3", ""].join("\n") + "\n"
|
|
62
|
+
|
|
63
|
+
Csvtool::Interface::CLI::Workflows::RunRowExtractionWorkflow.new(
|
|
64
|
+
stdin: StringIO.new(input),
|
|
65
|
+
stdout: out
|
|
66
|
+
).call
|
|
67
|
+
|
|
68
|
+
assert_includes out.string, "Start row must be a positive integer."
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def test_reports_out_of_bounds_range
|
|
72
|
+
out = StringIO.new
|
|
73
|
+
fixture = fixture_path("sample_people.csv")
|
|
74
|
+
input = [fixture, "", "10", "12", ""].join("\n") + "\n"
|
|
75
|
+
|
|
76
|
+
Csvtool::Interface::CLI::Workflows::RunRowExtractionWorkflow.new(
|
|
77
|
+
stdin: StringIO.new(input),
|
|
78
|
+
stdout: out
|
|
79
|
+
).call
|
|
80
|
+
|
|
81
|
+
assert_includes out.string, "Row range is out of bounds. File has 3 data rows."
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/run_row_randomization_workflow"
|
|
5
|
+
require "tmpdir"
|
|
6
|
+
|
|
7
|
+
class RunRowRandomizationWorkflowTest < Minitest::Test
|
|
8
|
+
def fixture_path(name)
|
|
9
|
+
File.expand_path("../../../../fixtures/#{name}", __dir__)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def test_missing_file_shows_friendly_error
|
|
13
|
+
output = StringIO.new
|
|
14
|
+
input = StringIO.new("/tmp/does-not-exist.csv\n\n")
|
|
15
|
+
|
|
16
|
+
Csvtool::Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: input, stdout: output).call
|
|
17
|
+
|
|
18
|
+
assert_includes output.string, "File not found: /tmp/does-not-exist.csv"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def test_workflow_prints_header_then_all_randomized_rows
|
|
22
|
+
output = StringIO.new
|
|
23
|
+
input = StringIO.new([fixture_path("sample_people.csv"), "", "", "", ""].join("\n") + "\n")
|
|
24
|
+
|
|
25
|
+
Csvtool::Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: input, stdout: output).call
|
|
26
|
+
|
|
27
|
+
assert_includes output.string, "name,city"
|
|
28
|
+
assert_includes output.string, "Alice,London"
|
|
29
|
+
assert_includes output.string, "Bob,Paris"
|
|
30
|
+
assert_includes output.string, "Cara,Berlin"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def test_workflow_can_write_randomized_rows_to_file
|
|
34
|
+
output = StringIO.new
|
|
35
|
+
|
|
36
|
+
Dir.mktmpdir do |dir|
|
|
37
|
+
output_path = File.join(dir, "randomized.csv")
|
|
38
|
+
input = StringIO.new([fixture_path("sample_people.csv"), "", "", "", "2", output_path].join("\n") + "\n")
|
|
39
|
+
|
|
40
|
+
Csvtool::Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: input, stdout: output).call
|
|
41
|
+
|
|
42
|
+
written = File.read(output_path).lines.map(&:strip)
|
|
43
|
+
assert_equal "name,city", written.first
|
|
44
|
+
assert_equal ["Alice,London", "Bob,Paris", "Cara,Berlin"].sort, written[1..].sort
|
|
45
|
+
assert_includes output.string, "Wrote output to #{output_path}"
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def test_workflow_supports_headerless_mode
|
|
50
|
+
output = StringIO.new
|
|
51
|
+
input = StringIO.new([fixture_path("sample_people_no_headers.csv"), "", "n", "", ""].join("\n") + "\n")
|
|
52
|
+
|
|
53
|
+
Csvtool::Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: input, stdout: output).call
|
|
54
|
+
|
|
55
|
+
refute_includes output.string, "name,city"
|
|
56
|
+
assert_includes output.string, "Alice,London"
|
|
57
|
+
assert_includes output.string, "Bob,Paris"
|
|
58
|
+
assert_includes output.string, "Cara,Berlin"
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def test_invalid_seed_shows_friendly_error
|
|
62
|
+
output = StringIO.new
|
|
63
|
+
input = StringIO.new([fixture_path("sample_people.csv"), "", "", "abc"].join("\n") + "\n")
|
|
64
|
+
|
|
65
|
+
Csvtool::Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: input, stdout: output).call
|
|
66
|
+
|
|
67
|
+
assert_includes output.string, "Seed must be an integer."
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
|
|
6
|
+
|
|
7
|
+
class CrossCsvDedupeCollectOptionsStepTest < Minitest::Test
|
|
8
|
+
class FakeErrors
|
|
9
|
+
attr_reader :column_not_found_called
|
|
10
|
+
|
|
11
|
+
def column_not_found
|
|
12
|
+
@column_not_found_called = true
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_halts_when_source_selector_invalid
|
|
17
|
+
selector_prompt = Object.new
|
|
18
|
+
yes_no_prompt = Object.new
|
|
19
|
+
output_destination_prompt = Object.new
|
|
20
|
+
session_builder = Object.new
|
|
21
|
+
mapper = Object.new
|
|
22
|
+
errors = FakeErrors.new
|
|
23
|
+
|
|
24
|
+
def selector_prompt.call(label:, headers_present:) = nil
|
|
25
|
+
|
|
26
|
+
step = Csvtool::Interface::CLI::Workflows::Steps::CrossCsvDedupe::CollectOptionsStep.new(
|
|
27
|
+
selector_prompt: selector_prompt,
|
|
28
|
+
yes_no_prompt: yes_no_prompt,
|
|
29
|
+
output_destination_prompt: output_destination_prompt,
|
|
30
|
+
errors: errors
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
source = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(path: "/tmp/a.csv", separator: ",", headers_present: true)
|
|
34
|
+
reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(path: "/tmp/b.csv", separator: ",", headers_present: true)
|
|
35
|
+
|
|
36
|
+
result = step.call(source: source, reference: reference, session_builder: session_builder, output_destination_mapper: mapper)
|
|
37
|
+
|
|
38
|
+
assert_equal :halt, result
|
|
39
|
+
assert_equal true, errors.column_not_found_called
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step"
|
|
5
|
+
|
|
6
|
+
class ExtractionCollectInputsStepTest < Minitest::Test
|
|
7
|
+
Result = Struct.new(:ok, :data) do
|
|
8
|
+
def ok? = ok
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
class FakeUseCase
|
|
12
|
+
def initialize(result)
|
|
13
|
+
@result = result
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def read_headers(file_path:, col_sep:)
|
|
17
|
+
@result
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def test_halts_when_separator_missing
|
|
22
|
+
file_prompt = Object.new
|
|
23
|
+
separator_prompt = Object.new
|
|
24
|
+
selector_prompt = Object.new
|
|
25
|
+
skip_prompt = Object.new
|
|
26
|
+
def file_prompt.call = "/tmp/data.csv"
|
|
27
|
+
def separator_prompt.call = nil
|
|
28
|
+
|
|
29
|
+
step = Csvtool::Interface::CLI::Workflows::Steps::Extraction::CollectInputsStep.new(
|
|
30
|
+
file_path_prompt: file_prompt,
|
|
31
|
+
separator_prompt: separator_prompt,
|
|
32
|
+
column_selector_prompt: selector_prompt,
|
|
33
|
+
skip_blanks_prompt: skip_prompt
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
assert_equal :halt, step.call(
|
|
37
|
+
use_case: FakeUseCase.new(Result.new(true, { headers: [] })),
|
|
38
|
+
session_builder: Object.new,
|
|
39
|
+
handle_error: ->(_r) {}
|
|
40
|
+
)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def test_halts_when_header_read_fails
|
|
44
|
+
file_prompt = Object.new
|
|
45
|
+
separator_prompt = Object.new
|
|
46
|
+
selector_prompt = Object.new
|
|
47
|
+
skip_prompt = Object.new
|
|
48
|
+
builder = Object.new
|
|
49
|
+
handled = []
|
|
50
|
+
def file_prompt.call = "/tmp/data.csv"
|
|
51
|
+
def separator_prompt.call = ","
|
|
52
|
+
|
|
53
|
+
step = Csvtool::Interface::CLI::Workflows::Steps::Extraction::CollectInputsStep.new(
|
|
54
|
+
file_path_prompt: file_prompt,
|
|
55
|
+
separator_prompt: separator_prompt,
|
|
56
|
+
column_selector_prompt: selector_prompt,
|
|
57
|
+
skip_blanks_prompt: skip_prompt
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
fail_result = Result.new(false, {})
|
|
61
|
+
result = step.call(use_case: FakeUseCase.new(fail_result), session_builder: builder, handle_error: ->(r) { handled << r })
|
|
62
|
+
|
|
63
|
+
assert_equal :halt, result
|
|
64
|
+
assert_equal [fail_result], handled
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step"
|
|
5
|
+
|
|
6
|
+
class CollectSourceStepTest < Minitest::Test
|
|
7
|
+
def test_collects_file_and_separator
|
|
8
|
+
file_prompt = Object.new
|
|
9
|
+
separator_prompt = Object.new
|
|
10
|
+
def file_prompt.call = "/tmp/data.csv"
|
|
11
|
+
def separator_prompt.call = ","
|
|
12
|
+
|
|
13
|
+
step = Csvtool::Interface::CLI::Workflows::Steps::RowExtraction::CollectSourceStep.new(
|
|
14
|
+
file_path_prompt: file_prompt,
|
|
15
|
+
separator_prompt: separator_prompt
|
|
16
|
+
)
|
|
17
|
+
context = {}
|
|
18
|
+
|
|
19
|
+
result = step.call(context)
|
|
20
|
+
|
|
21
|
+
assert_nil result
|
|
22
|
+
assert_equal "/tmp/data.csv", context[:file_path]
|
|
23
|
+
assert_equal ",", context[:col_sep]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def test_halts_when_separator_missing
|
|
27
|
+
file_prompt = Object.new
|
|
28
|
+
separator_prompt = Object.new
|
|
29
|
+
def file_prompt.call = "/tmp/data.csv"
|
|
30
|
+
def separator_prompt.call = nil
|
|
31
|
+
|
|
32
|
+
step = Csvtool::Interface::CLI::Workflows::Steps::RowExtraction::CollectSourceStep.new(
|
|
33
|
+
file_path_prompt: file_prompt,
|
|
34
|
+
separator_prompt: separator_prompt
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
assert_equal :halt, step.call({})
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/steps/row_extraction/execute_step"
|
|
5
|
+
|
|
6
|
+
class ExecuteStepTest < Minitest::Test
|
|
7
|
+
Result = Struct.new(:ok, :data) do
|
|
8
|
+
def ok? = ok
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
class FakeUseCase
|
|
12
|
+
def initialize(result)
|
|
13
|
+
@result = result
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def extract(session:, headers:, on_row:)
|
|
17
|
+
@called = true
|
|
18
|
+
on_row.call(["Bob", "Paris"]) if @result.ok?
|
|
19
|
+
@result
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
attr_reader :called
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
class FakePresenter
|
|
26
|
+
attr_reader :rows, :written
|
|
27
|
+
|
|
28
|
+
def initialize(stdout:, headers:, col_sep:)
|
|
29
|
+
@rows = []
|
|
30
|
+
@written = nil
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def print_row(fields)
|
|
34
|
+
@rows << fields
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def print_file_written(path)
|
|
38
|
+
@written = path
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
class FakeErrors
|
|
43
|
+
attr_reader :out_of_bounds
|
|
44
|
+
|
|
45
|
+
def row_range_out_of_bounds(count)
|
|
46
|
+
@out_of_bounds = count
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def test_prints_rows_and_reports_out_of_bounds
|
|
51
|
+
errors = FakeErrors.new
|
|
52
|
+
step = Csvtool::Interface::CLI::Workflows::Steps::RowExtraction::ExecuteStep.new(
|
|
53
|
+
stdout: StringIO.new,
|
|
54
|
+
errors: errors,
|
|
55
|
+
presenter_class: FakePresenter
|
|
56
|
+
)
|
|
57
|
+
use_case = FakeUseCase.new(Result.new(true, { matched: false, row_count: 3, wrote_rows: false }))
|
|
58
|
+
context = {
|
|
59
|
+
session: Object.new,
|
|
60
|
+
headers: ["name", "city"],
|
|
61
|
+
use_case: use_case,
|
|
62
|
+
handle_error: ->(_r) { raise "unexpected" }
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
result = step.call(context)
|
|
66
|
+
|
|
67
|
+
assert_nil result
|
|
68
|
+
assert_equal 3, errors.out_of_bounds
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def test_halts_on_use_case_failure
|
|
72
|
+
handled = []
|
|
73
|
+
step = Csvtool::Interface::CLI::Workflows::Steps::RowExtraction::ExecuteStep.new(
|
|
74
|
+
stdout: StringIO.new,
|
|
75
|
+
errors: FakeErrors.new,
|
|
76
|
+
presenter_class: FakePresenter
|
|
77
|
+
)
|
|
78
|
+
fail_result = Result.new(false, {})
|
|
79
|
+
use_case = FakeUseCase.new(fail_result)
|
|
80
|
+
|
|
81
|
+
result = step.call(
|
|
82
|
+
session: Object.new,
|
|
83
|
+
headers: ["name", "city"],
|
|
84
|
+
use_case: use_case,
|
|
85
|
+
handle_error: ->(r) { handled << r }
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
assert_equal :halt, result
|
|
89
|
+
assert_equal [fail_result], handled
|
|
90
|
+
end
|
|
91
|
+
end
|