csvops 0.2.0.alpha → 0.4.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +56 -108
- data/docs/architecture.md +266 -0
- data/docs/release-v0.3.0-alpha.md +74 -0
- data/docs/release-v0.4.0-alpha.md +87 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +93 -0
- data/lib/csvtool/application/use_cases/run_extraction.rb +3 -3
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +3 -3
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +105 -0
- data/lib/csvtool/cli.rb +9 -1
- data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
- data/lib/csvtool/domain/row_randomization_session/randomization_options.rb +17 -0
- data/lib/csvtool/domain/row_randomization_session/randomization_session.rb +25 -0
- data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +24 -0
- data/lib/csvtool/domain/row_session/row_source.rb +3 -0
- data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
- data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
- data/lib/csvtool/infrastructure/csv/row_randomizer.rb +83 -0
- data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
- data/lib/csvtool/interface/cli/errors/presenter.rb +4 -0
- data/lib/csvtool/interface/cli/menu_loop.rb +8 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +22 -0
- data/lib/csvtool/interface/cli/prompts/seed_prompt.rb +29 -0
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +163 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +113 -0
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +124 -0
- data/test/csvtool/cli_test.rb +231 -12
- data/test/csvtool/cli_unit_test.rb +27 -2
- data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
- data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
- data/test/csvtool/domain/row_randomization_session/randomization_options_test.rb +20 -0
- data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +26 -0
- data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +42 -0
- data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
- data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
- data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
- data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
- data/test/csvtool/infrastructure/csv/row_randomizer_test.rb +37 -0
- data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
- data/test/csvtool/interface/cli/errors/presenter_test.rb +2 -0
- data/test/csvtool/interface/cli/menu_loop_test.rb +78 -10
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +14 -0
- data/test/csvtool/interface/cli/prompts/seed_prompt_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
- data/test/fixtures/dedupe_reference.csv +3 -0
- data/test/fixtures/dedupe_reference.tsv +3 -0
- data/test/fixtures/dedupe_reference_all.csv +5 -0
- data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
- data/test/fixtures/dedupe_reference_none.csv +2 -0
- data/test/fixtures/dedupe_reference_normalization.csv +3 -0
- data/test/fixtures/dedupe_source.csv +6 -0
- data/test/fixtures/dedupe_source.tsv +6 -0
- data/test/fixtures/dedupe_source_no_headers.csv +5 -0
- data/test/fixtures/dedupe_source_normalization.csv +4 -0
- data/test/fixtures/sample_people_no_headers.csv +3 -0
- metadata +50 -6
- data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
- data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
- data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
data/test/csvtool/cli_test.rb
CHANGED
|
@@ -11,7 +11,7 @@ class TestCli < Minitest::Test
|
|
|
11
11
|
|
|
12
12
|
def test_menu_can_exit_cleanly
|
|
13
13
|
output = StringIO.new
|
|
14
|
-
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("
|
|
14
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("5\n"), stdout: output, stderr: StringIO.new)
|
|
15
15
|
assert_equal 0, status
|
|
16
16
|
assert_includes output.string, "CSV Tool Menu"
|
|
17
17
|
end
|
|
@@ -26,7 +26,7 @@ class TestCli < Minitest::Test
|
|
|
26
26
|
"",
|
|
27
27
|
"y",
|
|
28
28
|
"",
|
|
29
|
-
"
|
|
29
|
+
"5"
|
|
30
30
|
].join("\n") + "\n"
|
|
31
31
|
|
|
32
32
|
output = StringIO.new
|
|
@@ -58,7 +58,7 @@ class TestCli < Minitest::Test
|
|
|
58
58
|
"2",
|
|
59
59
|
"3",
|
|
60
60
|
"",
|
|
61
|
-
"
|
|
61
|
+
"5"
|
|
62
62
|
].join("\n") + "\n"
|
|
63
63
|
|
|
64
64
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -79,7 +79,7 @@ class TestCli < Minitest::Test
|
|
|
79
79
|
"0",
|
|
80
80
|
"3",
|
|
81
81
|
"",
|
|
82
|
-
"
|
|
82
|
+
"5"
|
|
83
83
|
].join("\n") + "\n"
|
|
84
84
|
|
|
85
85
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -98,7 +98,7 @@ class TestCli < Minitest::Test
|
|
|
98
98
|
"2",
|
|
99
99
|
"3",
|
|
100
100
|
"",
|
|
101
|
-
"
|
|
101
|
+
"5"
|
|
102
102
|
].join("\n") + "\n"
|
|
103
103
|
|
|
104
104
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -119,7 +119,7 @@ class TestCli < Minitest::Test
|
|
|
119
119
|
"2",
|
|
120
120
|
"3",
|
|
121
121
|
"",
|
|
122
|
-
"
|
|
122
|
+
"5"
|
|
123
123
|
].join("\n") + "\n"
|
|
124
124
|
|
|
125
125
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -144,7 +144,7 @@ class TestCli < Minitest::Test
|
|
|
144
144
|
"3",
|
|
145
145
|
"2",
|
|
146
146
|
output_path,
|
|
147
|
-
"
|
|
147
|
+
"5"
|
|
148
148
|
].join("\n") + "\n"
|
|
149
149
|
|
|
150
150
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -164,7 +164,7 @@ class TestCli < Minitest::Test
|
|
|
164
164
|
"1",
|
|
165
165
|
"2",
|
|
166
166
|
"",
|
|
167
|
-
"
|
|
167
|
+
"5"
|
|
168
168
|
].join("\n") + "\n"
|
|
169
169
|
|
|
170
170
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -175,6 +175,225 @@ class TestCli < Minitest::Test
|
|
|
175
175
|
refute_includes output.string, "Could not parse CSV file."
|
|
176
176
|
end
|
|
177
177
|
|
|
178
|
+
def test_randomize_rows_workflow_prints_header_and_all_data_rows
|
|
179
|
+
output = StringIO.new
|
|
180
|
+
input = [
|
|
181
|
+
"3",
|
|
182
|
+
fixture_path("sample_people.csv"),
|
|
183
|
+
"",
|
|
184
|
+
"",
|
|
185
|
+
"",
|
|
186
|
+
"",
|
|
187
|
+
"5"
|
|
188
|
+
].join("\n") + "\n"
|
|
189
|
+
|
|
190
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
191
|
+
|
|
192
|
+
assert_equal 0, status
|
|
193
|
+
assert_includes output.string, "name,city"
|
|
194
|
+
assert_includes output.string, "Alice,London"
|
|
195
|
+
assert_includes output.string, "Bob,Paris"
|
|
196
|
+
assert_includes output.string, "Cara,Berlin"
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def test_randomize_rows_workflow_can_write_to_file
|
|
200
|
+
output = StringIO.new
|
|
201
|
+
|
|
202
|
+
Dir.mktmpdir do |dir|
|
|
203
|
+
output_path = File.join(dir, "randomized_rows.csv")
|
|
204
|
+
input = [
|
|
205
|
+
"3",
|
|
206
|
+
fixture_path("sample_people.csv"),
|
|
207
|
+
"",
|
|
208
|
+
"",
|
|
209
|
+
"",
|
|
210
|
+
"2",
|
|
211
|
+
output_path,
|
|
212
|
+
"5"
|
|
213
|
+
].join("\n") + "\n"
|
|
214
|
+
|
|
215
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
216
|
+
|
|
217
|
+
assert_equal 0, status
|
|
218
|
+
assert_includes output.string, "Wrote output to #{output_path}"
|
|
219
|
+
lines = File.read(output_path).lines.map(&:strip)
|
|
220
|
+
assert_equal "name,city", lines.first
|
|
221
|
+
assert_equal ["Alice,London", "Bob,Paris", "Cara,Berlin"].sort, lines[1..].sort
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def test_randomize_rows_workflow_supports_tsv_separator
|
|
226
|
+
output = StringIO.new
|
|
227
|
+
input = [
|
|
228
|
+
"3",
|
|
229
|
+
fixture_path("sample_people.tsv"),
|
|
230
|
+
"2",
|
|
231
|
+
"",
|
|
232
|
+
"",
|
|
233
|
+
"",
|
|
234
|
+
"5"
|
|
235
|
+
].join("\n") + "\n"
|
|
236
|
+
|
|
237
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
238
|
+
|
|
239
|
+
assert_equal 0, status
|
|
240
|
+
assert_includes output.string, "name\tcity"
|
|
241
|
+
assert_includes output.string, "Alice\tLondon"
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def test_randomize_rows_workflow_headerless_mode_randomizes_all_rows
|
|
245
|
+
output = StringIO.new
|
|
246
|
+
input = [
|
|
247
|
+
"3",
|
|
248
|
+
fixture_path("sample_people_no_headers.csv"),
|
|
249
|
+
"",
|
|
250
|
+
"n",
|
|
251
|
+
"",
|
|
252
|
+
"",
|
|
253
|
+
"5"
|
|
254
|
+
].join("\n") + "\n"
|
|
255
|
+
|
|
256
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
257
|
+
|
|
258
|
+
assert_equal 0, status
|
|
259
|
+
refute_includes output.string, "name,city"
|
|
260
|
+
assert_includes output.string, "Alice,London"
|
|
261
|
+
assert_includes output.string, "Bob,Paris"
|
|
262
|
+
assert_includes output.string, "Cara,Berlin"
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
def test_randomize_rows_invalid_seed_returns_to_menu
|
|
266
|
+
output = StringIO.new
|
|
267
|
+
input = [
|
|
268
|
+
"3",
|
|
269
|
+
fixture_path("sample_people.csv"),
|
|
270
|
+
"",
|
|
271
|
+
"",
|
|
272
|
+
"abc",
|
|
273
|
+
"5"
|
|
274
|
+
].join("\n") + "\n"
|
|
275
|
+
|
|
276
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
277
|
+
|
|
278
|
+
assert_equal 0, status
|
|
279
|
+
assert_includes output.string, "Seed must be an integer."
|
|
280
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def test_dedupe_workflow_shell_prompts_and_returns_to_menu
|
|
284
|
+
output = StringIO.new
|
|
285
|
+
input = [
|
|
286
|
+
"4",
|
|
287
|
+
fixture_path("dedupe_source.csv"),
|
|
288
|
+
"",
|
|
289
|
+
"",
|
|
290
|
+
fixture_path("dedupe_reference.csv"),
|
|
291
|
+
"",
|
|
292
|
+
"",
|
|
293
|
+
"customer_id",
|
|
294
|
+
"external_id",
|
|
295
|
+
"",
|
|
296
|
+
"",
|
|
297
|
+
"",
|
|
298
|
+
"5"
|
|
299
|
+
].join("\n") + "\n"
|
|
300
|
+
|
|
301
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
302
|
+
|
|
303
|
+
assert_equal 0, status
|
|
304
|
+
assert_includes output.string, "Reference CSV file path:"
|
|
305
|
+
assert_includes output.string, "Source key column name:"
|
|
306
|
+
assert_includes output.string, "Reference key column name:"
|
|
307
|
+
assert_includes output.string, "customer_id,name"
|
|
308
|
+
assert_includes output.string, "1,Alice"
|
|
309
|
+
assert_includes output.string, "3,Cara"
|
|
310
|
+
assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def test_dedupe_workflow_can_write_to_file
|
|
314
|
+
output = StringIO.new
|
|
315
|
+
|
|
316
|
+
Dir.mktmpdir do |dir|
|
|
317
|
+
output_path = File.join(dir, "deduped.csv")
|
|
318
|
+
input = [
|
|
319
|
+
"4",
|
|
320
|
+
fixture_path("dedupe_source.csv"),
|
|
321
|
+
"",
|
|
322
|
+
"",
|
|
323
|
+
fixture_path("dedupe_reference.csv"),
|
|
324
|
+
"",
|
|
325
|
+
"",
|
|
326
|
+
"customer_id",
|
|
327
|
+
"external_id",
|
|
328
|
+
"",
|
|
329
|
+
"",
|
|
330
|
+
"2",
|
|
331
|
+
output_path,
|
|
332
|
+
"5"
|
|
333
|
+
].join("\n") + "\n"
|
|
334
|
+
|
|
335
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
336
|
+
|
|
337
|
+
assert_equal 0, status
|
|
338
|
+
assert_includes output.string, "Wrote output to #{output_path}"
|
|
339
|
+
assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
|
|
340
|
+
assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def test_dedupe_workflow_supports_tsv_separators
|
|
345
|
+
output = StringIO.new
|
|
346
|
+
input = [
|
|
347
|
+
"4",
|
|
348
|
+
fixture_path("dedupe_source.tsv"),
|
|
349
|
+
"2",
|
|
350
|
+
"",
|
|
351
|
+
fixture_path("dedupe_reference.tsv"),
|
|
352
|
+
"2",
|
|
353
|
+
"",
|
|
354
|
+
"customer_id",
|
|
355
|
+
"external_id",
|
|
356
|
+
"",
|
|
357
|
+
"",
|
|
358
|
+
"",
|
|
359
|
+
"5"
|
|
360
|
+
].join("\n") + "\n"
|
|
361
|
+
|
|
362
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
363
|
+
|
|
364
|
+
assert_equal 0, status
|
|
365
|
+
assert_includes output.string, "customer_id\tname"
|
|
366
|
+
assert_includes output.string, "1\tAlice"
|
|
367
|
+
assert_includes output.string, "3\tCara"
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
def test_dedupe_workflow_headerless_mode_supports_index
|
|
371
|
+
output = StringIO.new
|
|
372
|
+
input = [
|
|
373
|
+
"4",
|
|
374
|
+
fixture_path("dedupe_source_no_headers.csv"),
|
|
375
|
+
"",
|
|
376
|
+
"n",
|
|
377
|
+
fixture_path("dedupe_reference_no_headers.csv"),
|
|
378
|
+
"",
|
|
379
|
+
"n",
|
|
380
|
+
"1",
|
|
381
|
+
"1",
|
|
382
|
+
"",
|
|
383
|
+
"",
|
|
384
|
+
"",
|
|
385
|
+
"5"
|
|
386
|
+
].join("\n") + "\n"
|
|
387
|
+
|
|
388
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
389
|
+
|
|
390
|
+
assert_equal 0, status
|
|
391
|
+
refute_includes output.string, "customer_id,name"
|
|
392
|
+
assert_includes output.string, "1,Alice"
|
|
393
|
+
assert_includes output.string, "3,Cara"
|
|
394
|
+
assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
395
|
+
end
|
|
396
|
+
|
|
178
397
|
def test_end_to_end_file_output_writes_expected_csv
|
|
179
398
|
output = StringIO.new
|
|
180
399
|
output_path = nil
|
|
@@ -191,7 +410,7 @@ class TestCli < Minitest::Test
|
|
|
191
410
|
"y",
|
|
192
411
|
"2",
|
|
193
412
|
output_path,
|
|
194
|
-
"
|
|
413
|
+
"5"
|
|
195
414
|
].join("\n") + "\n"
|
|
196
415
|
|
|
197
416
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -211,7 +430,7 @@ class TestCli < Minitest::Test
|
|
|
211
430
|
"1",
|
|
212
431
|
"",
|
|
213
432
|
"n",
|
|
214
|
-
"
|
|
433
|
+
"5"
|
|
215
434
|
].join("\n") + "\n"
|
|
216
435
|
|
|
217
436
|
output = StringIO.new
|
|
@@ -226,7 +445,7 @@ class TestCli < Minitest::Test
|
|
|
226
445
|
output = StringIO.new
|
|
227
446
|
status = Csvtool::CLI.start(
|
|
228
447
|
["menu"],
|
|
229
|
-
stdin: StringIO.new("1\n/tmp/does-not-exist.csv\
|
|
448
|
+
stdin: StringIO.new("1\n/tmp/does-not-exist.csv\n4\n"),
|
|
230
449
|
stdout: output,
|
|
231
450
|
stderr: StringIO.new
|
|
232
451
|
)
|
|
@@ -247,7 +466,7 @@ class TestCli < Minitest::Test
|
|
|
247
466
|
"y",
|
|
248
467
|
"2",
|
|
249
468
|
"/tmp/not-a-dir/out.csv",
|
|
250
|
-
"
|
|
469
|
+
"5"
|
|
251
470
|
].join("\n") + "\n"
|
|
252
471
|
|
|
253
472
|
output = StringIO.new
|
|
@@ -16,7 +16,7 @@ class CliUnitTest < Minitest::Test
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
def test_menu_command_can_exit_zero
|
|
19
|
-
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("
|
|
19
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("5\n"), stdout: StringIO.new, stderr: StringIO.new)
|
|
20
20
|
assert_equal 0, status
|
|
21
21
|
end
|
|
22
22
|
|
|
@@ -28,11 +28,36 @@ class CliUnitTest < Minitest::Test
|
|
|
28
28
|
def test_menu_routes_to_row_range_shell
|
|
29
29
|
stdout = StringIO.new
|
|
30
30
|
fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
|
|
31
|
-
input = ["2", fixture, "", "2", "3", "", "
|
|
31
|
+
input = ["2", fixture, "", "2", "3", "", "5"].join("\n") + "\n"
|
|
32
32
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
33
33
|
assert_equal 0, status
|
|
34
34
|
assert_includes stdout.string, "name,city"
|
|
35
35
|
assert_includes stdout.string, "Bob,Paris"
|
|
36
36
|
assert_includes stdout.string, "Cara,Berlin"
|
|
37
37
|
end
|
|
38
|
+
|
|
39
|
+
def test_menu_routes_to_randomize_rows_shell
|
|
40
|
+
stdout = StringIO.new
|
|
41
|
+
fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
|
|
42
|
+
input = ["3", fixture, "", "", "", "", "5"].join("\n") + "\n"
|
|
43
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
44
|
+
assert_equal 0, status
|
|
45
|
+
assert_includes stdout.string, "name,city"
|
|
46
|
+
assert_includes stdout.string, "Alice,London"
|
|
47
|
+
assert_includes stdout.string, "Bob,Paris"
|
|
48
|
+
assert_includes stdout.string, "Cara,Berlin"
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def test_menu_routes_to_dedupe_shell
|
|
52
|
+
stdout = StringIO.new
|
|
53
|
+
source_fixture = File.expand_path("../fixtures/dedupe_source.csv", __dir__)
|
|
54
|
+
reference_fixture = File.expand_path("../fixtures/dedupe_reference.csv", __dir__)
|
|
55
|
+
input = ["4", source_fixture, "", "", reference_fixture, "", "", "customer_id", "external_id", "", "", "", "5"].join("\n") + "\n"
|
|
56
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
57
|
+
assert_equal 0, status
|
|
58
|
+
assert_includes stdout.string, "customer_id,name"
|
|
59
|
+
assert_includes stdout.string, "1,Alice"
|
|
60
|
+
assert_includes stdout.string, "3,Cara"
|
|
61
|
+
assert_includes stdout.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
62
|
+
end
|
|
38
63
|
end
|
|
@@ -8,7 +8,7 @@ require "csvtool/domain/column_session/column_selection"
|
|
|
8
8
|
require "csvtool/domain/column_session/extraction_options"
|
|
9
9
|
require "csvtool/domain/column_session/preview"
|
|
10
10
|
require "csvtool/domain/column_session/extraction_value"
|
|
11
|
-
require "csvtool/domain/
|
|
11
|
+
require "csvtool/domain/shared/output_destination"
|
|
12
12
|
|
|
13
13
|
class ColumnSessionTest < Minitest::Test
|
|
14
14
|
def test_state_transitions
|
|
@@ -25,7 +25,7 @@ class ColumnSessionTest < Minitest::Test
|
|
|
25
25
|
values: [Csvtool::Domain::ColumnSession::ExtractionValue.new("Alice")]
|
|
26
26
|
)
|
|
27
27
|
session = session.with_preview(preview).confirm!.with_output_destination(
|
|
28
|
-
Csvtool::Domain::
|
|
28
|
+
Csvtool::Domain::Shared::OutputDestination.console
|
|
29
29
|
)
|
|
30
30
|
|
|
31
31
|
assert_equal true, session.confirmed?
|
|
@@ -11,4 +11,14 @@ class CsvSourceTest < Minitest::Test
|
|
|
11
11
|
assert_equal "/tmp/a.csv", source.path
|
|
12
12
|
assert_equal separator, source.separator
|
|
13
13
|
end
|
|
14
|
+
|
|
15
|
+
def test_rejects_empty_path
|
|
16
|
+
separator = Csvtool::Domain::ColumnSession::Separator.new(",")
|
|
17
|
+
|
|
18
|
+
error = assert_raises(ArgumentError) do
|
|
19
|
+
Csvtool::Domain::ColumnSession::CsvSource.new(path: "", separator: separator)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
assert_equal "path cannot be empty", error.message
|
|
23
|
+
end
|
|
14
24
|
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
5
|
+
|
|
6
|
+
class CrossCsvDedupeColumnSelectorTest < Minitest::Test
|
|
7
|
+
def test_builds_header_selector_from_input
|
|
8
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "customer_id")
|
|
9
|
+
|
|
10
|
+
assert_equal "customer_id", selector.value
|
|
11
|
+
assert_equal true, selector.headers_present?
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def test_builds_index_selector_from_input
|
|
15
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "2")
|
|
16
|
+
|
|
17
|
+
assert_equal 2, selector.value
|
|
18
|
+
assert_equal true, selector.index?
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def test_rejects_invalid_index_input
|
|
22
|
+
error = assert_raises(ArgumentError) do
|
|
23
|
+
Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "0")
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
assert_equal "column index must be a positive integer", error.message
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def test_extracts_from_headered_row
|
|
30
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "customer_id")
|
|
31
|
+
row = { "customer_id" => "42" }
|
|
32
|
+
|
|
33
|
+
assert_equal "42", selector.extract_from(row)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def test_extracts_from_headerless_row_by_index
|
|
37
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "2")
|
|
38
|
+
row = ["a", "b", "c"]
|
|
39
|
+
|
|
40
|
+
assert_equal "b", selector.extract_from(row)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
|
|
6
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
7
|
+
require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
|
|
8
|
+
require "csvtool/domain/cross_csv_dedupe_session/match_options"
|
|
9
|
+
require "csvtool/domain/shared/output_destination"
|
|
10
|
+
|
|
11
|
+
class CrossCsvDedupeSessionTest < Minitest::Test
|
|
12
|
+
def test_start_and_with_output_destination
|
|
13
|
+
source = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
14
|
+
path: "/tmp/source.csv",
|
|
15
|
+
separator: ",",
|
|
16
|
+
headers_present: true
|
|
17
|
+
)
|
|
18
|
+
reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
19
|
+
path: "/tmp/reference.csv",
|
|
20
|
+
separator: ",",
|
|
21
|
+
headers_present: true
|
|
22
|
+
)
|
|
23
|
+
key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
24
|
+
source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id"),
|
|
25
|
+
reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "reference_id")
|
|
26
|
+
)
|
|
27
|
+
match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
28
|
+
trim_whitespace: true,
|
|
29
|
+
case_insensitive: false
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
session = Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
|
|
33
|
+
source: source,
|
|
34
|
+
reference: reference,
|
|
35
|
+
key_mapping: key_mapping,
|
|
36
|
+
match_options: match_options
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
destination = Csvtool::Domain::Shared::OutputDestination.console
|
|
40
|
+
updated = session.with_output_destination(destination)
|
|
41
|
+
|
|
42
|
+
assert_equal source, updated.source
|
|
43
|
+
assert_equal reference, updated.reference
|
|
44
|
+
assert_equal key_mapping, updated.key_mapping
|
|
45
|
+
assert_equal match_options, updated.match_options
|
|
46
|
+
assert_equal destination, updated.output_destination
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def test_rejects_invalid_source_type
|
|
50
|
+
reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
51
|
+
path: "/tmp/reference.csv",
|
|
52
|
+
separator: ",",
|
|
53
|
+
headers_present: true
|
|
54
|
+
)
|
|
55
|
+
key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
56
|
+
source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id"),
|
|
57
|
+
reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "reference_id")
|
|
58
|
+
)
|
|
59
|
+
match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
60
|
+
trim_whitespace: true,
|
|
61
|
+
case_insensitive: false
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
error = assert_raises(ArgumentError) do
|
|
65
|
+
Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
|
|
66
|
+
source: "bad",
|
|
67
|
+
reference: reference,
|
|
68
|
+
key_mapping: key_mapping,
|
|
69
|
+
match_options: match_options
|
|
70
|
+
)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
assert_equal "source must be CsvProfile", error.message
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
|
|
5
|
+
|
|
6
|
+
class CrossCsvDedupeCsvProfileTest < Minitest::Test
|
|
7
|
+
def test_initializes_with_expected_fields
|
|
8
|
+
profile = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
9
|
+
path: "/tmp/source.csv",
|
|
10
|
+
separator: ",",
|
|
11
|
+
headers_present: true
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
assert_equal "/tmp/source.csv", profile.path
|
|
15
|
+
assert_equal ",", profile.separator
|
|
16
|
+
assert_equal true, profile.headers_present?
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def test_requires_path
|
|
20
|
+
error = assert_raises(ArgumentError) do
|
|
21
|
+
Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(path: "", separator: ",", headers_present: true)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
assert_equal "path cannot be empty", error.message
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
6
|
+
|
|
7
|
+
class CrossCsvDedupeKeyMappingTest < Minitest::Test
|
|
8
|
+
def test_holds_source_and_reference_selectors
|
|
9
|
+
source_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id")
|
|
10
|
+
reference_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "ref_id")
|
|
11
|
+
|
|
12
|
+
mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
13
|
+
source_selector: source_selector,
|
|
14
|
+
reference_selector: reference_selector
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
assert_equal source_selector, mapping.source_selector
|
|
18
|
+
assert_equal reference_selector, mapping.reference_selector
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def test_rejects_non_selector_inputs
|
|
22
|
+
error = assert_raises(ArgumentError) do
|
|
23
|
+
Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
24
|
+
source_selector: "id",
|
|
25
|
+
reference_selector: "external_id"
|
|
26
|
+
)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
assert_equal "selectors must be ColumnSelector", error.message
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/match_options"
|
|
5
|
+
|
|
6
|
+
class CrossCsvDedupeMatchOptionsTest < Minitest::Test
|
|
7
|
+
def test_predicates_return_boolean_flags
|
|
8
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
9
|
+
trim_whitespace: true,
|
|
10
|
+
case_insensitive: false
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
assert_equal true, options.trim_whitespace?
|
|
14
|
+
assert_equal false, options.case_insensitive?
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def test_normalize_trim_on_case_off
|
|
18
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
19
|
+
trim_whitespace: true,
|
|
20
|
+
case_insensitive: false
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
assert_equal "AbC", options.normalize(" AbC ")
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def test_normalize_trim_on_case_on
|
|
27
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
28
|
+
trim_whitespace: true,
|
|
29
|
+
case_insensitive: true
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
assert_equal "abc", options.normalize(" AbC ")
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def test_normalize_trim_off_case_on
|
|
36
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
37
|
+
trim_whitespace: false,
|
|
38
|
+
case_insensitive: true
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
assert_equal " abc ", options.normalize(" AbC ")
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def test_normalize_trim_off_case_off
|
|
45
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
46
|
+
trim_whitespace: false,
|
|
47
|
+
case_insensitive: false
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
assert_equal " AbC ", options.normalize(" AbC ")
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/row_randomization_session/randomization_options"
|
|
5
|
+
|
|
6
|
+
class RandomizationOptionsTest < Minitest::Test
|
|
7
|
+
def test_accepts_nil_or_integer_seed
|
|
8
|
+
with_seed = Csvtool::Domain::RowRandomizationSession::RandomizationOptions.new(seed: 42)
|
|
9
|
+
without_seed = Csvtool::Domain::RowRandomizationSession::RandomizationOptions.new(seed: nil)
|
|
10
|
+
|
|
11
|
+
assert_equal 42, with_seed.seed
|
|
12
|
+
assert_nil without_seed.seed
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def test_rejects_non_integer_seed
|
|
16
|
+
assert_raises(ArgumentError) do
|
|
17
|
+
Csvtool::Domain::RowRandomizationSession::RandomizationOptions.new(seed: "abc")
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/row_randomization_session/randomization_session"
|
|
5
|
+
require "csvtool/domain/row_randomization_session/randomization_source"
|
|
6
|
+
require "csvtool/domain/row_randomization_session/randomization_options"
|
|
7
|
+
require "csvtool/domain/shared/output_destination"
|
|
8
|
+
|
|
9
|
+
class RandomizationSessionTest < Minitest::Test
|
|
10
|
+
def test_with_output_destination_returns_updated_session
|
|
11
|
+
source = Csvtool::Domain::RowRandomizationSession::RandomizationSource.new(
|
|
12
|
+
path: "/tmp/in.csv",
|
|
13
|
+
separator: ",",
|
|
14
|
+
headers_present: true
|
|
15
|
+
)
|
|
16
|
+
options = Csvtool::Domain::RowRandomizationSession::RandomizationOptions.new(seed: 7)
|
|
17
|
+
session = Csvtool::Domain::RowRandomizationSession::RandomizationSession.start(source: source, options: options)
|
|
18
|
+
destination = Csvtool::Domain::Shared::OutputDestination.console
|
|
19
|
+
|
|
20
|
+
updated = session.with_output_destination(destination)
|
|
21
|
+
|
|
22
|
+
assert_equal source, updated.source
|
|
23
|
+
assert_equal options, updated.options
|
|
24
|
+
assert_equal destination, updated.output_destination
|
|
25
|
+
end
|
|
26
|
+
end
|