csvops 0.5.0.alpha → 0.7.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +88 -7
- data/docs/architecture.md +119 -5
- data/docs/release-v0.6.0-alpha.md +84 -0
- data/docs/release-v0.7.0-alpha.md +87 -0
- data/lib/csvtool/application/use_cases/run_csv_parity.rb +70 -0
- data/lib/csvtool/application/use_cases/run_csv_split.rb +97 -0
- data/lib/csvtool/cli.rb +9 -1
- data/lib/csvtool/domain/csv_parity_session/parity_options.rb +22 -0
- data/lib/csvtool/domain/csv_parity_session/parity_session.rb +20 -0
- data/lib/csvtool/domain/csv_parity_session/source_pair.rb +19 -0
- data/lib/csvtool/domain/csv_split_session/split_options.rb +27 -0
- data/lib/csvtool/domain/csv_split_session/split_session.rb +20 -0
- data/lib/csvtool/domain/csv_split_session/split_source.rb +17 -0
- data/lib/csvtool/infrastructure/csv/csv_parity_comparator.rb +71 -0
- data/lib/csvtool/infrastructure/csv/csv_splitter.rb +64 -0
- data/lib/csvtool/infrastructure/output/csv_split_manifest_writer.rb +20 -0
- data/lib/csvtool/interface/cli/errors/presenter.rb +12 -0
- data/lib/csvtool/interface/cli/menu_loop.rb +8 -2
- data/lib/csvtool/interface/cli/prompts/chunk_size_prompt.rb +21 -0
- data/lib/csvtool/interface/cli/prompts/split_manifest_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/split_output_prompt.rb +38 -0
- data/lib/csvtool/interface/cli/workflows/builders/csv_parity_session_builder.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/builders/csv_split_session_builder.rb +44 -0
- data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +38 -0
- data/lib/csvtool/interface/cli/workflows/presenters/csv_split_presenter.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +66 -0
- data/lib/csvtool/interface/cli/workflows/run_csv_split_workflow.rb +89 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/build_session_step.rb +30 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step.rb +30 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step.rb +31 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/execute_step.rb +36 -0
- data/lib/csvtool/interface/cli/workflows/steps/parity/build_session_step.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/parity/execute_step.rb +26 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/run_csv_parity_test.rb +160 -0
- data/test/csvtool/application/use_cases/run_csv_split_test.rb +124 -0
- data/test/csvtool/cli_test.rb +222 -21
- data/test/csvtool/cli_unit_test.rb +4 -4
- data/test/csvtool/domain/csv_parity_session/parity_options_test.rb +17 -0
- data/test/csvtool/domain/csv_parity_session/parity_session_test.rb +18 -0
- data/test/csvtool/domain/csv_parity_session/source_pair_test.rb +11 -0
- data/test/csvtool/infrastructure/csv/csv_parity_comparator_test.rb +78 -0
- data/test/csvtool/infrastructure/csv/csv_splitter_test.rb +68 -0
- data/test/csvtool/infrastructure/output/csv_split_manifest_writer_test.rb +25 -0
- data/test/csvtool/interface/cli/errors/presenter_test.rb +2 -0
- data/test/csvtool/interface/cli/menu_loop_test.rb +87 -93
- data/test/csvtool/interface/cli/prompts/chunk_size_prompt_test.rb +17 -0
- data/test/csvtool/interface/cli/prompts/split_manifest_prompt_test.rb +42 -0
- data/test/csvtool/interface/cli/prompts/split_output_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/csv_parity_session_builder_test.rb +20 -0
- data/test/csvtool/interface/cli/workflows/builders/csv_split_session_builder_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +43 -0
- data/test/csvtool/interface/cli/workflows/presenters/csv_split_presenter_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +94 -0
- data/test/csvtool/interface/cli/workflows/run_csv_split_workflow_test.rb +200 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/build_session_step_test.rb +40 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step_test.rb +64 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step_test.rb +32 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/execute_step_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/steps/parity/build_session_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/steps/parity/execute_step_test.rb +40 -0
- data/test/fixtures/parity_duplicates_left.csv +4 -0
- data/test/fixtures/parity_duplicates_right.csv +3 -0
- data/test/fixtures/parity_people_header_mismatch.csv +4 -0
- data/test/fixtures/parity_people_many_reordered.csv +13 -0
- data/test/fixtures/parity_people_mismatch.csv +4 -0
- data/test/fixtures/parity_people_reordered.csv +4 -0
- data/test/fixtures/parity_people_reordered.tsv +4 -0
- data/test/fixtures/split_people_25.csv +26 -0
- metadata +64 -1
data/test/csvtool/cli_test.rb
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require_relative "../test_helper"
|
|
4
4
|
require "csvtool/cli"
|
|
5
5
|
require "tmpdir"
|
|
6
|
+
require "fileutils"
|
|
6
7
|
|
|
7
8
|
class TestCli < Minitest::Test
|
|
8
9
|
def fixture_path(name)
|
|
@@ -11,11 +12,57 @@ class TestCli < Minitest::Test
|
|
|
11
12
|
|
|
12
13
|
def test_menu_can_exit_cleanly
|
|
13
14
|
output = StringIO.new
|
|
14
|
-
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("
|
|
15
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("7\n"), stdout: output, stderr: StringIO.new)
|
|
15
16
|
assert_equal 0, status
|
|
16
17
|
assert_includes output.string, "CSV Tool Menu"
|
|
17
18
|
end
|
|
18
19
|
|
|
20
|
+
def test_split_workflow_splits_csv_in_menu_flow
|
|
21
|
+
output = StringIO.new
|
|
22
|
+
Dir.mktmpdir do |dir|
|
|
23
|
+
source_path = File.join(dir, "people.csv")
|
|
24
|
+
FileUtils.cp(fixture_path("split_people_25.csv"), source_path)
|
|
25
|
+
input = [
|
|
26
|
+
"6",
|
|
27
|
+
source_path,
|
|
28
|
+
"",
|
|
29
|
+
"",
|
|
30
|
+
"10",
|
|
31
|
+
"",
|
|
32
|
+
"",
|
|
33
|
+
"",
|
|
34
|
+
"",
|
|
35
|
+
"7"
|
|
36
|
+
].join("\n") + "\n"
|
|
37
|
+
|
|
38
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
39
|
+
|
|
40
|
+
assert_equal 0, status
|
|
41
|
+
assert_includes output.string, "Chunks written: 3"
|
|
42
|
+
assert File.file?(File.join(dir, "people_part_001.csv"))
|
|
43
|
+
assert File.file?(File.join(dir, "people_part_002.csv"))
|
|
44
|
+
assert File.file?(File.join(dir, "people_part_003.csv"))
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def test_split_workflow_invalid_chunk_size_returns_to_menu
|
|
49
|
+
output = StringIO.new
|
|
50
|
+
input = [
|
|
51
|
+
"6",
|
|
52
|
+
fixture_path("sample_people.csv"),
|
|
53
|
+
"",
|
|
54
|
+
"",
|
|
55
|
+
"0",
|
|
56
|
+
"7"
|
|
57
|
+
].join("\n") + "\n"
|
|
58
|
+
|
|
59
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
60
|
+
|
|
61
|
+
assert_equal 0, status
|
|
62
|
+
assert_includes output.string, "Chunk size must be a positive integer."
|
|
63
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
64
|
+
end
|
|
65
|
+
|
|
19
66
|
def test_end_to_end_console_happy_path_prints_expected_values
|
|
20
67
|
input = [
|
|
21
68
|
"1",
|
|
@@ -26,7 +73,7 @@ class TestCli < Minitest::Test
|
|
|
26
73
|
"",
|
|
27
74
|
"y",
|
|
28
75
|
"",
|
|
29
|
-
"
|
|
76
|
+
"7"
|
|
30
77
|
].join("\n") + "\n"
|
|
31
78
|
|
|
32
79
|
output = StringIO.new
|
|
@@ -58,7 +105,7 @@ class TestCli < Minitest::Test
|
|
|
58
105
|
"2",
|
|
59
106
|
"3",
|
|
60
107
|
"",
|
|
61
|
-
"
|
|
108
|
+
"7"
|
|
62
109
|
].join("\n") + "\n"
|
|
63
110
|
|
|
64
111
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -79,7 +126,7 @@ class TestCli < Minitest::Test
|
|
|
79
126
|
"0",
|
|
80
127
|
"3",
|
|
81
128
|
"",
|
|
82
|
-
"
|
|
129
|
+
"7"
|
|
83
130
|
].join("\n") + "\n"
|
|
84
131
|
|
|
85
132
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -98,7 +145,7 @@ class TestCli < Minitest::Test
|
|
|
98
145
|
"2",
|
|
99
146
|
"3",
|
|
100
147
|
"",
|
|
101
|
-
"
|
|
148
|
+
"7"
|
|
102
149
|
].join("\n") + "\n"
|
|
103
150
|
|
|
104
151
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -119,7 +166,7 @@ class TestCli < Minitest::Test
|
|
|
119
166
|
"2",
|
|
120
167
|
"3",
|
|
121
168
|
"",
|
|
122
|
-
"
|
|
169
|
+
"7"
|
|
123
170
|
].join("\n") + "\n"
|
|
124
171
|
|
|
125
172
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -144,7 +191,7 @@ class TestCli < Minitest::Test
|
|
|
144
191
|
"3",
|
|
145
192
|
"2",
|
|
146
193
|
output_path,
|
|
147
|
-
"
|
|
194
|
+
"7"
|
|
148
195
|
].join("\n") + "\n"
|
|
149
196
|
|
|
150
197
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -164,7 +211,7 @@ class TestCli < Minitest::Test
|
|
|
164
211
|
"1",
|
|
165
212
|
"2",
|
|
166
213
|
"",
|
|
167
|
-
"
|
|
214
|
+
"7"
|
|
168
215
|
].join("\n") + "\n"
|
|
169
216
|
|
|
170
217
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -184,7 +231,7 @@ class TestCli < Minitest::Test
|
|
|
184
231
|
"",
|
|
185
232
|
"",
|
|
186
233
|
"",
|
|
187
|
-
"
|
|
234
|
+
"7"
|
|
188
235
|
].join("\n") + "\n"
|
|
189
236
|
|
|
190
237
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -209,7 +256,7 @@ class TestCli < Minitest::Test
|
|
|
209
256
|
"",
|
|
210
257
|
"2",
|
|
211
258
|
output_path,
|
|
212
|
-
"
|
|
259
|
+
"7"
|
|
213
260
|
].join("\n") + "\n"
|
|
214
261
|
|
|
215
262
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -231,7 +278,7 @@ class TestCli < Minitest::Test
|
|
|
231
278
|
"",
|
|
232
279
|
"",
|
|
233
280
|
"",
|
|
234
|
-
"
|
|
281
|
+
"7"
|
|
235
282
|
].join("\n") + "\n"
|
|
236
283
|
|
|
237
284
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -250,7 +297,7 @@ class TestCli < Minitest::Test
|
|
|
250
297
|
"n",
|
|
251
298
|
"",
|
|
252
299
|
"",
|
|
253
|
-
"
|
|
300
|
+
"7"
|
|
254
301
|
].join("\n") + "\n"
|
|
255
302
|
|
|
256
303
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -270,7 +317,7 @@ class TestCli < Minitest::Test
|
|
|
270
317
|
"",
|
|
271
318
|
"",
|
|
272
319
|
"abc",
|
|
273
|
-
"
|
|
320
|
+
"7"
|
|
274
321
|
].join("\n") + "\n"
|
|
275
322
|
|
|
276
323
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -295,7 +342,7 @@ class TestCli < Minitest::Test
|
|
|
295
342
|
"",
|
|
296
343
|
"",
|
|
297
344
|
"",
|
|
298
|
-
"
|
|
345
|
+
"7"
|
|
299
346
|
].join("\n") + "\n"
|
|
300
347
|
|
|
301
348
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -329,7 +376,7 @@ class TestCli < Minitest::Test
|
|
|
329
376
|
"",
|
|
330
377
|
"2",
|
|
331
378
|
output_path,
|
|
332
|
-
"
|
|
379
|
+
"7"
|
|
333
380
|
].join("\n") + "\n"
|
|
334
381
|
|
|
335
382
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -356,7 +403,7 @@ class TestCli < Minitest::Test
|
|
|
356
403
|
"",
|
|
357
404
|
"",
|
|
358
405
|
"",
|
|
359
|
-
"
|
|
406
|
+
"7"
|
|
360
407
|
].join("\n") + "\n"
|
|
361
408
|
|
|
362
409
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -382,7 +429,7 @@ class TestCli < Minitest::Test
|
|
|
382
429
|
"",
|
|
383
430
|
"",
|
|
384
431
|
"",
|
|
385
|
-
"
|
|
432
|
+
"7"
|
|
386
433
|
].join("\n") + "\n"
|
|
387
434
|
|
|
388
435
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -394,6 +441,160 @@ class TestCli < Minitest::Test
|
|
|
394
441
|
assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
395
442
|
end
|
|
396
443
|
|
|
444
|
+
def test_parity_workflow_reports_match_and_returns_to_menu
|
|
445
|
+
output = StringIO.new
|
|
446
|
+
input = [
|
|
447
|
+
"5",
|
|
448
|
+
fixture_path("sample_people.csv"),
|
|
449
|
+
fixture_path("sample_people.csv"),
|
|
450
|
+
"",
|
|
451
|
+
"",
|
|
452
|
+
"7"
|
|
453
|
+
].join("\n") + "\n"
|
|
454
|
+
|
|
455
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
456
|
+
|
|
457
|
+
assert_equal 0, status
|
|
458
|
+
assert_includes output.string, "Left CSV file path:"
|
|
459
|
+
assert_includes output.string, "Right CSV file path:"
|
|
460
|
+
assert_includes output.string, "MATCH"
|
|
461
|
+
assert_includes output.string, "Summary: left_rows=3 right_rows=3 left_only=0 right_only=0"
|
|
462
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
def test_parity_workflow_supports_tsv_separator
|
|
466
|
+
output = StringIO.new
|
|
467
|
+
input = [
|
|
468
|
+
"5",
|
|
469
|
+
fixture_path("sample_people.tsv"),
|
|
470
|
+
fixture_path("parity_people_reordered.tsv"),
|
|
471
|
+
"2",
|
|
472
|
+
"",
|
|
473
|
+
"7"
|
|
474
|
+
].join("\n") + "\n"
|
|
475
|
+
|
|
476
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
477
|
+
|
|
478
|
+
assert_equal 0, status
|
|
479
|
+
assert_includes output.string, "MATCH"
|
|
480
|
+
assert_includes output.string, "Summary: left_rows=3 right_rows=3 left_only=0 right_only=0"
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
def test_parity_workflow_headerless_mode_compares_all_rows
|
|
484
|
+
output = StringIO.new
|
|
485
|
+
input = [
|
|
486
|
+
"5",
|
|
487
|
+
fixture_path("sample_people_no_headers.csv"),
|
|
488
|
+
fixture_path("sample_people_no_headers.csv"),
|
|
489
|
+
"",
|
|
490
|
+
"n",
|
|
491
|
+
"7"
|
|
492
|
+
].join("\n") + "\n"
|
|
493
|
+
|
|
494
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
495
|
+
|
|
496
|
+
assert_equal 0, status
|
|
497
|
+
assert_includes output.string, "MATCH"
|
|
498
|
+
assert_includes output.string, "Summary: left_rows=3 right_rows=3 left_only=0 right_only=0"
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
def test_parity_workflow_reports_header_mismatch_in_headered_mode
|
|
502
|
+
output = StringIO.new
|
|
503
|
+
input = [
|
|
504
|
+
"5",
|
|
505
|
+
fixture_path("sample_people.csv"),
|
|
506
|
+
fixture_path("parity_people_header_mismatch.csv"),
|
|
507
|
+
"",
|
|
508
|
+
"",
|
|
509
|
+
"7"
|
|
510
|
+
].join("\n") + "\n"
|
|
511
|
+
|
|
512
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
513
|
+
|
|
514
|
+
assert_equal 0, status
|
|
515
|
+
assert_includes output.string, "CSV headers do not match."
|
|
516
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
517
|
+
end
|
|
518
|
+
|
|
519
|
+
def test_parity_workflow_prints_mismatch_examples_and_counts
|
|
520
|
+
output = StringIO.new
|
|
521
|
+
input = [
|
|
522
|
+
"5",
|
|
523
|
+
fixture_path("sample_people.csv"),
|
|
524
|
+
fixture_path("parity_people_mismatch.csv"),
|
|
525
|
+
"",
|
|
526
|
+
"",
|
|
527
|
+
"7"
|
|
528
|
+
].join("\n") + "\n"
|
|
529
|
+
|
|
530
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
531
|
+
|
|
532
|
+
assert_equal 0, status
|
|
533
|
+
assert_includes output.string, "MISMATCH"
|
|
534
|
+
assert_includes output.string, "Summary: left_rows=3 right_rows=3 left_only=1 right_only=1"
|
|
535
|
+
assert_includes output.string, "Left-only examples:"
|
|
536
|
+
assert_includes output.string, "Cara,Berlin (count +1)"
|
|
537
|
+
assert_includes output.string, "Right-only examples:"
|
|
538
|
+
assert_includes output.string, "Dina,Rome (count +1)"
|
|
539
|
+
end
|
|
540
|
+
|
|
541
|
+
def test_parity_workflow_missing_left_file_returns_to_menu
|
|
542
|
+
output = StringIO.new
|
|
543
|
+
input = [
|
|
544
|
+
"5",
|
|
545
|
+
"/tmp/not-there-left.csv",
|
|
546
|
+
fixture_path("sample_people.csv"),
|
|
547
|
+
"",
|
|
548
|
+
"",
|
|
549
|
+
"7"
|
|
550
|
+
].join("\n") + "\n"
|
|
551
|
+
|
|
552
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
553
|
+
|
|
554
|
+
assert_equal 0, status
|
|
555
|
+
assert_includes output.string, "File not found: /tmp/not-there-left.csv"
|
|
556
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
557
|
+
refute_includes output.string, "Traceback"
|
|
558
|
+
end
|
|
559
|
+
|
|
560
|
+
def test_parity_workflow_missing_right_file_returns_to_menu
|
|
561
|
+
output = StringIO.new
|
|
562
|
+
input = [
|
|
563
|
+
"5",
|
|
564
|
+
fixture_path("sample_people.csv"),
|
|
565
|
+
"/tmp/not-there-right.csv",
|
|
566
|
+
"",
|
|
567
|
+
"",
|
|
568
|
+
"7"
|
|
569
|
+
].join("\n") + "\n"
|
|
570
|
+
|
|
571
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
572
|
+
|
|
573
|
+
assert_equal 0, status
|
|
574
|
+
assert_includes output.string, "File not found: /tmp/not-there-right.csv"
|
|
575
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
576
|
+
refute_includes output.string, "Traceback"
|
|
577
|
+
end
|
|
578
|
+
|
|
579
|
+
def test_parity_workflow_malformed_csv_returns_to_menu
|
|
580
|
+
output = StringIO.new
|
|
581
|
+
input = [
|
|
582
|
+
"5",
|
|
583
|
+
fixture_path("sample_people.csv"),
|
|
584
|
+
fixture_path("sample_people_bad_tail.csv"),
|
|
585
|
+
"",
|
|
586
|
+
"",
|
|
587
|
+
"7"
|
|
588
|
+
].join("\n") + "\n"
|
|
589
|
+
|
|
590
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
591
|
+
|
|
592
|
+
assert_equal 0, status
|
|
593
|
+
assert_includes output.string, "Could not parse CSV file."
|
|
594
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
595
|
+
refute_includes output.string, "Traceback"
|
|
596
|
+
end
|
|
597
|
+
|
|
397
598
|
def test_end_to_end_file_output_writes_expected_csv
|
|
398
599
|
output = StringIO.new
|
|
399
600
|
output_path = nil
|
|
@@ -410,7 +611,7 @@ class TestCli < Minitest::Test
|
|
|
410
611
|
"y",
|
|
411
612
|
"2",
|
|
412
613
|
output_path,
|
|
413
|
-
"
|
|
614
|
+
"7"
|
|
414
615
|
].join("\n") + "\n"
|
|
415
616
|
|
|
416
617
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -430,7 +631,7 @@ class TestCli < Minitest::Test
|
|
|
430
631
|
"1",
|
|
431
632
|
"",
|
|
432
633
|
"n",
|
|
433
|
-
"
|
|
634
|
+
"7"
|
|
434
635
|
].join("\n") + "\n"
|
|
435
636
|
|
|
436
637
|
output = StringIO.new
|
|
@@ -445,7 +646,7 @@ class TestCli < Minitest::Test
|
|
|
445
646
|
output = StringIO.new
|
|
446
647
|
status = Csvtool::CLI.start(
|
|
447
648
|
["menu"],
|
|
448
|
-
stdin: StringIO.new("1\n/tmp/does-not-exist.csv\n4\n"),
|
|
649
|
+
stdin: StringIO.new("1\n/tmp/does-not-exist.csv\n4\n7\n"),
|
|
449
650
|
stdout: output,
|
|
450
651
|
stderr: StringIO.new
|
|
451
652
|
)
|
|
@@ -466,7 +667,7 @@ class TestCli < Minitest::Test
|
|
|
466
667
|
"y",
|
|
467
668
|
"2",
|
|
468
669
|
"/tmp/not-a-dir/out.csv",
|
|
469
|
-
"
|
|
670
|
+
"7"
|
|
470
671
|
].join("\n") + "\n"
|
|
471
672
|
|
|
472
673
|
output = StringIO.new
|
|
@@ -16,7 +16,7 @@ class CliUnitTest < Minitest::Test
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
def test_menu_command_can_exit_zero
|
|
19
|
-
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("
|
|
19
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("6\n"), stdout: StringIO.new, stderr: StringIO.new)
|
|
20
20
|
assert_equal 0, status
|
|
21
21
|
end
|
|
22
22
|
|
|
@@ -28,7 +28,7 @@ class CliUnitTest < Minitest::Test
|
|
|
28
28
|
def test_menu_routes_to_row_range_shell
|
|
29
29
|
stdout = StringIO.new
|
|
30
30
|
fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
|
|
31
|
-
input = ["2", fixture, "", "2", "3", "", "
|
|
31
|
+
input = ["2", fixture, "", "2", "3", "", "6"].join("\n") + "\n"
|
|
32
32
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
33
33
|
assert_equal 0, status
|
|
34
34
|
assert_includes stdout.string, "name,city"
|
|
@@ -39,7 +39,7 @@ class CliUnitTest < Minitest::Test
|
|
|
39
39
|
def test_menu_routes_to_randomize_rows_shell
|
|
40
40
|
stdout = StringIO.new
|
|
41
41
|
fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
|
|
42
|
-
input = ["3", fixture, "", "", "", "", "
|
|
42
|
+
input = ["3", fixture, "", "", "", "", "6"].join("\n") + "\n"
|
|
43
43
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
44
44
|
assert_equal 0, status
|
|
45
45
|
assert_includes stdout.string, "name,city"
|
|
@@ -52,7 +52,7 @@ class CliUnitTest < Minitest::Test
|
|
|
52
52
|
stdout = StringIO.new
|
|
53
53
|
source_fixture = File.expand_path("../fixtures/dedupe_source.csv", __dir__)
|
|
54
54
|
reference_fixture = File.expand_path("../fixtures/dedupe_reference.csv", __dir__)
|
|
55
|
-
input = ["4", source_fixture, "", "", reference_fixture, "", "", "customer_id", "external_id", "", "", "", "
|
|
55
|
+
input = ["4", source_fixture, "", "", reference_fixture, "", "", "customer_id", "external_id", "", "", "", "6"].join("\n") + "\n"
|
|
56
56
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
57
57
|
assert_equal 0, status
|
|
58
58
|
assert_includes stdout.string, "customer_id,name"
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/csv_parity_session/parity_options"
|
|
5
|
+
|
|
6
|
+
class ParityOptionsTest < Minitest::Test
|
|
7
|
+
def test_requires_separator
|
|
8
|
+
assert_raises(ArgumentError) do
|
|
9
|
+
Csvtool::Domain::CsvParitySession::ParityOptions.new(separator: "", headers_present: true)
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def test_exposes_headers_present
|
|
14
|
+
options = Csvtool::Domain::CsvParitySession::ParityOptions.new(separator: ",", headers_present: false)
|
|
15
|
+
assert_equal false, options.headers_present?
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/csv_parity_session/source_pair"
|
|
5
|
+
require "csvtool/domain/csv_parity_session/parity_options"
|
|
6
|
+
require "csvtool/domain/csv_parity_session/parity_session"
|
|
7
|
+
|
|
8
|
+
class ParitySessionTest < Minitest::Test
|
|
9
|
+
def test_stores_source_pair_and_options
|
|
10
|
+
source_pair = Csvtool::Domain::CsvParitySession::SourcePair.new(left_path: "/tmp/l.csv", right_path: "/tmp/r.csv")
|
|
11
|
+
options = Csvtool::Domain::CsvParitySession::ParityOptions.new(separator: ",", headers_present: true)
|
|
12
|
+
|
|
13
|
+
session = Csvtool::Domain::CsvParitySession::ParitySession.start(source_pair: source_pair, options: options)
|
|
14
|
+
|
|
15
|
+
assert_equal source_pair, session.source_pair
|
|
16
|
+
assert_equal options, session.options
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/csv_parity_session/source_pair"
|
|
5
|
+
|
|
6
|
+
class SourcePairTest < Minitest::Test
|
|
7
|
+
def test_requires_paths
|
|
8
|
+
assert_raises(ArgumentError) { Csvtool::Domain::CsvParitySession::SourcePair.new(left_path: "", right_path: "/tmp/r.csv") }
|
|
9
|
+
assert_raises(ArgumentError) { Csvtool::Domain::CsvParitySession::SourcePair.new(left_path: "/tmp/l.csv", right_path: "") }
|
|
10
|
+
end
|
|
11
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/infrastructure/csv/csv_parity_comparator"
|
|
5
|
+
|
|
6
|
+
class CsvParityComparatorTest < Minitest::Test
|
|
7
|
+
def fixture_path(name)
|
|
8
|
+
File.expand_path("../../../fixtures/#{name}", __dir__)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def test_reports_match_when_rows_are_equal_ignoring_order
|
|
12
|
+
comparator = Csvtool::Infrastructure::CSV::CsvParityComparator.new
|
|
13
|
+
|
|
14
|
+
result = comparator.call(
|
|
15
|
+
left_path: fixture_path("sample_people.csv"),
|
|
16
|
+
right_path: fixture_path("parity_people_reordered.csv"),
|
|
17
|
+
col_sep: ",",
|
|
18
|
+
headers_present: true
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
assert_equal true, result[:match]
|
|
22
|
+
assert_equal 0, result[:left_only_count]
|
|
23
|
+
assert_equal 0, result[:right_only_count]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def test_reports_mismatch_counts_for_different_rows
|
|
27
|
+
comparator = Csvtool::Infrastructure::CSV::CsvParityComparator.new
|
|
28
|
+
|
|
29
|
+
result = comparator.call(
|
|
30
|
+
left_path: fixture_path("sample_people.csv"),
|
|
31
|
+
right_path: fixture_path("parity_people_mismatch.csv"),
|
|
32
|
+
col_sep: ",",
|
|
33
|
+
headers_present: true
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
assert_equal false, result[:match]
|
|
37
|
+
assert_equal 1, result[:left_only_count]
|
|
38
|
+
assert_equal 1, result[:right_only_count]
|
|
39
|
+
assert_equal "Cara,Berlin", result[:left_only_examples][0][:row]
|
|
40
|
+
assert_equal 1, result[:left_only_examples][0][:count_delta]
|
|
41
|
+
assert_equal "Dina,Rome", result[:right_only_examples][0][:row]
|
|
42
|
+
assert_equal 1, result[:right_only_examples][0][:count_delta]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def test_respects_duplicate_counts
|
|
46
|
+
comparator = Csvtool::Infrastructure::CSV::CsvParityComparator.new
|
|
47
|
+
|
|
48
|
+
result = comparator.call(
|
|
49
|
+
left_path: fixture_path("parity_duplicates_left.csv"),
|
|
50
|
+
right_path: fixture_path("parity_duplicates_right.csv"),
|
|
51
|
+
col_sep: ",",
|
|
52
|
+
headers_present: true
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
assert_equal false, result[:match]
|
|
56
|
+
assert_equal 1, result[:left_only_count]
|
|
57
|
+
assert_equal 0, result[:right_only_count]
|
|
58
|
+
assert_equal "1,Alice", result[:left_only_examples][0][:row]
|
|
59
|
+
assert_equal 1, result[:left_only_examples][0][:count_delta]
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def test_preserves_exact_semantics_for_larger_fixture_with_different_order
|
|
63
|
+
comparator = Csvtool::Infrastructure::CSV::CsvParityComparator.new
|
|
64
|
+
|
|
65
|
+
result = comparator.call(
|
|
66
|
+
left_path: fixture_path("sample_people_many.csv"),
|
|
67
|
+
right_path: fixture_path("parity_people_many_reordered.csv"),
|
|
68
|
+
col_sep: ",",
|
|
69
|
+
headers_present: true
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
assert_equal true, result[:match]
|
|
73
|
+
assert_equal 12, result[:left_rows]
|
|
74
|
+
assert_equal 12, result[:right_rows]
|
|
75
|
+
assert_equal 0, result[:left_only_count]
|
|
76
|
+
assert_equal 0, result[:right_only_count]
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/infrastructure/csv/csv_splitter"
|
|
5
|
+
require "tmpdir"
|
|
6
|
+
|
|
7
|
+
class CsvSplitterTest < Minitest::Test
|
|
8
|
+
def test_splits_large_file_in_order
|
|
9
|
+
splitter = Csvtool::Infrastructure::CSV::CsvSplitter.new
|
|
10
|
+
|
|
11
|
+
Dir.mktmpdir do |dir|
|
|
12
|
+
source_path = File.join(dir, "large.csv")
|
|
13
|
+
File.open(source_path, "w") do |f|
|
|
14
|
+
f.puts "id,value"
|
|
15
|
+
5_000.times { |i| f.puts "#{i + 1},v#{i + 1}" }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
stats = splitter.call(
|
|
19
|
+
file_path: source_path,
|
|
20
|
+
col_sep: ",",
|
|
21
|
+
headers_present: true,
|
|
22
|
+
chunk_size: 1_000,
|
|
23
|
+
output_directory: dir,
|
|
24
|
+
file_prefix: "large",
|
|
25
|
+
overwrite_existing: false
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
assert_equal 5, stats[:chunk_count]
|
|
29
|
+
assert_equal 5_000, stats[:data_rows]
|
|
30
|
+
assert_equal [1_000, 1_000, 1_000, 1_000, 1_000], stats[:chunk_row_counts]
|
|
31
|
+
|
|
32
|
+
first_chunk = File.read(File.join(dir, "large_part_001.csv")).lines.map(&:strip)
|
|
33
|
+
last_chunk = File.read(File.join(dir, "large_part_005.csv")).lines.map(&:strip)
|
|
34
|
+
assert_equal "id,value", first_chunk.first
|
|
35
|
+
assert_equal "1,v1", first_chunk[1]
|
|
36
|
+
assert_equal "1000,v1000", first_chunk[1000]
|
|
37
|
+
assert_equal "4001,v4001", last_chunk[1]
|
|
38
|
+
assert_equal "5000,v5000", last_chunk[1000]
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def test_streaming_split_handles_headerless_file
|
|
43
|
+
splitter = Csvtool::Infrastructure::CSV::CsvSplitter.new
|
|
44
|
+
|
|
45
|
+
Dir.mktmpdir do |dir|
|
|
46
|
+
source_path = File.join(dir, "large_no_headers.csv")
|
|
47
|
+
File.open(source_path, "w") do |f|
|
|
48
|
+
2_500.times { |i| f.puts "#{i + 1},v#{i + 1}" }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
stats = splitter.call(
|
|
52
|
+
file_path: source_path,
|
|
53
|
+
col_sep: ",",
|
|
54
|
+
headers_present: false,
|
|
55
|
+
chunk_size: 1_000,
|
|
56
|
+
output_directory: dir,
|
|
57
|
+
file_prefix: "large_no_headers",
|
|
58
|
+
overwrite_existing: false
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
assert_equal 3, stats[:chunk_count]
|
|
62
|
+
assert_equal 2_500, stats[:data_rows]
|
|
63
|
+
assert_equal [1_000, 1_000, 500], stats[:chunk_row_counts]
|
|
64
|
+
first_line = File.read(File.join(dir, "large_no_headers_part_001.csv")).lines.first.strip
|
|
65
|
+
assert_equal "1,v1", first_line
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/infrastructure/output/csv_split_manifest_writer"
|
|
5
|
+
require "tmpdir"
|
|
6
|
+
|
|
7
|
+
class CsvSplitManifestWriterTest < Minitest::Test
|
|
8
|
+
def test_writes_manifest_csv
|
|
9
|
+
writer = Csvtool::Infrastructure::Output::CsvSplitManifestWriter.new
|
|
10
|
+
|
|
11
|
+
Dir.mktmpdir do |dir|
|
|
12
|
+
path = File.join(dir, "manifest.csv")
|
|
13
|
+
writer.call(
|
|
14
|
+
path: path,
|
|
15
|
+
chunk_paths: ["/tmp/a.csv", "/tmp/b.csv"],
|
|
16
|
+
chunk_row_counts: [10, 5]
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
lines = File.read(path).lines.map(&:strip)
|
|
20
|
+
assert_equal "chunk_index,chunk_path,row_count", lines[0]
|
|
21
|
+
assert_equal "1,/tmp/a.csv,10", lines[1]
|
|
22
|
+
assert_equal "2,/tmp/b.csv,5", lines[2]
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -24,6 +24,7 @@ class ErrorsPresenterTest < Minitest::Test
|
|
|
24
24
|
presenter.invalid_end_row
|
|
25
25
|
presenter.invalid_row_range_order
|
|
26
26
|
presenter.row_range_out_of_bounds(3)
|
|
27
|
+
presenter.header_mismatch
|
|
27
28
|
|
|
28
29
|
text = out.string
|
|
29
30
|
assert_includes text, "File not found: /tmp/x.csv"
|
|
@@ -42,5 +43,6 @@ class ErrorsPresenterTest < Minitest::Test
|
|
|
42
43
|
assert_includes text, "End row must be a positive integer."
|
|
43
44
|
assert_includes text, "End row must be greater than or equal to start row."
|
|
44
45
|
assert_includes text, "Row range is out of bounds. File has 3 data rows."
|
|
46
|
+
assert_includes text, "CSV headers do not match."
|
|
45
47
|
end
|
|
46
48
|
end
|