csvops 0.5.0.alpha → 0.7.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +88 -7
  3. data/docs/architecture.md +119 -5
  4. data/docs/release-v0.6.0-alpha.md +84 -0
  5. data/docs/release-v0.7.0-alpha.md +87 -0
  6. data/lib/csvtool/application/use_cases/run_csv_parity.rb +70 -0
  7. data/lib/csvtool/application/use_cases/run_csv_split.rb +97 -0
  8. data/lib/csvtool/cli.rb +9 -1
  9. data/lib/csvtool/domain/csv_parity_session/parity_options.rb +22 -0
  10. data/lib/csvtool/domain/csv_parity_session/parity_session.rb +20 -0
  11. data/lib/csvtool/domain/csv_parity_session/source_pair.rb +19 -0
  12. data/lib/csvtool/domain/csv_split_session/split_options.rb +27 -0
  13. data/lib/csvtool/domain/csv_split_session/split_session.rb +20 -0
  14. data/lib/csvtool/domain/csv_split_session/split_source.rb +17 -0
  15. data/lib/csvtool/infrastructure/csv/csv_parity_comparator.rb +71 -0
  16. data/lib/csvtool/infrastructure/csv/csv_splitter.rb +64 -0
  17. data/lib/csvtool/infrastructure/output/csv_split_manifest_writer.rb +20 -0
  18. data/lib/csvtool/interface/cli/errors/presenter.rb +12 -0
  19. data/lib/csvtool/interface/cli/menu_loop.rb +8 -2
  20. data/lib/csvtool/interface/cli/prompts/chunk_size_prompt.rb +21 -0
  21. data/lib/csvtool/interface/cli/prompts/split_manifest_prompt.rb +30 -0
  22. data/lib/csvtool/interface/cli/prompts/split_output_prompt.rb +38 -0
  23. data/lib/csvtool/interface/cli/workflows/builders/csv_parity_session_builder.rb +33 -0
  24. data/lib/csvtool/interface/cli/workflows/builders/csv_split_session_builder.rb +44 -0
  25. data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +38 -0
  26. data/lib/csvtool/interface/cli/workflows/presenters/csv_split_presenter.rb +26 -0
  27. data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +66 -0
  28. data/lib/csvtool/interface/cli/workflows/run_csv_split_workflow.rb +89 -0
  29. data/lib/csvtool/interface/cli/workflows/steps/csv_split/build_session_step.rb +30 -0
  30. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step.rb +43 -0
  31. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step.rb +30 -0
  32. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step.rb +31 -0
  33. data/lib/csvtool/interface/cli/workflows/steps/csv_split/execute_step.rb +36 -0
  34. data/lib/csvtool/interface/cli/workflows/steps/parity/build_session_step.rb +25 -0
  35. data/lib/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step.rb +32 -0
  36. data/lib/csvtool/interface/cli/workflows/steps/parity/execute_step.rb +26 -0
  37. data/lib/csvtool/version.rb +1 -1
  38. data/test/csvtool/application/use_cases/run_csv_parity_test.rb +160 -0
  39. data/test/csvtool/application/use_cases/run_csv_split_test.rb +124 -0
  40. data/test/csvtool/cli_test.rb +222 -21
  41. data/test/csvtool/cli_unit_test.rb +4 -4
  42. data/test/csvtool/domain/csv_parity_session/parity_options_test.rb +17 -0
  43. data/test/csvtool/domain/csv_parity_session/parity_session_test.rb +18 -0
  44. data/test/csvtool/domain/csv_parity_session/source_pair_test.rb +11 -0
  45. data/test/csvtool/infrastructure/csv/csv_parity_comparator_test.rb +78 -0
  46. data/test/csvtool/infrastructure/csv/csv_splitter_test.rb +68 -0
  47. data/test/csvtool/infrastructure/output/csv_split_manifest_writer_test.rb +25 -0
  48. data/test/csvtool/interface/cli/errors/presenter_test.rb +2 -0
  49. data/test/csvtool/interface/cli/menu_loop_test.rb +87 -93
  50. data/test/csvtool/interface/cli/prompts/chunk_size_prompt_test.rb +17 -0
  51. data/test/csvtool/interface/cli/prompts/split_manifest_prompt_test.rb +42 -0
  52. data/test/csvtool/interface/cli/prompts/split_output_prompt_test.rb +22 -0
  53. data/test/csvtool/interface/cli/workflows/builders/csv_parity_session_builder_test.rb +20 -0
  54. data/test/csvtool/interface/cli/workflows/builders/csv_split_session_builder_test.rb +30 -0
  55. data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +43 -0
  56. data/test/csvtool/interface/cli/workflows/presenters/csv_split_presenter_test.rb +26 -0
  57. data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +94 -0
  58. data/test/csvtool/interface/cli/workflows/run_csv_split_workflow_test.rb +200 -0
  59. data/test/csvtool/interface/cli/workflows/steps/csv_split/build_session_step_test.rb +40 -0
  60. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step_test.rb +64 -0
  61. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step_test.rb +30 -0
  62. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step_test.rb +32 -0
  63. data/test/csvtool/interface/cli/workflows/steps/csv_split/execute_step_test.rb +83 -0
  64. data/test/csvtool/interface/cli/workflows/steps/parity/build_session_step_test.rb +41 -0
  65. data/test/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step_test.rb +30 -0
  66. data/test/csvtool/interface/cli/workflows/steps/parity/execute_step_test.rb +40 -0
  67. data/test/fixtures/parity_duplicates_left.csv +4 -0
  68. data/test/fixtures/parity_duplicates_right.csv +3 -0
  69. data/test/fixtures/parity_people_header_mismatch.csv +4 -0
  70. data/test/fixtures/parity_people_many_reordered.csv +13 -0
  71. data/test/fixtures/parity_people_mismatch.csv +4 -0
  72. data/test/fixtures/parity_people_reordered.csv +4 -0
  73. data/test/fixtures/parity_people_reordered.tsv +4 -0
  74. data/test/fixtures/split_people_25.csv +26 -0
  75. metadata +64 -1
@@ -3,6 +3,7 @@
3
3
  require_relative "../test_helper"
4
4
  require "csvtool/cli"
5
5
  require "tmpdir"
6
+ require "fileutils"
6
7
 
7
8
  class TestCli < Minitest::Test
8
9
  def fixture_path(name)
@@ -11,11 +12,57 @@ class TestCli < Minitest::Test
11
12
 
12
13
  def test_menu_can_exit_cleanly
13
14
  output = StringIO.new
14
- status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("5\n"), stdout: output, stderr: StringIO.new)
15
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("7\n"), stdout: output, stderr: StringIO.new)
15
16
  assert_equal 0, status
16
17
  assert_includes output.string, "CSV Tool Menu"
17
18
  end
18
19
 
20
+ def test_split_workflow_splits_csv_in_menu_flow
21
+ output = StringIO.new
22
+ Dir.mktmpdir do |dir|
23
+ source_path = File.join(dir, "people.csv")
24
+ FileUtils.cp(fixture_path("split_people_25.csv"), source_path)
25
+ input = [
26
+ "6",
27
+ source_path,
28
+ "",
29
+ "",
30
+ "10",
31
+ "",
32
+ "",
33
+ "",
34
+ "",
35
+ "7"
36
+ ].join("\n") + "\n"
37
+
38
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
39
+
40
+ assert_equal 0, status
41
+ assert_includes output.string, "Chunks written: 3"
42
+ assert File.file?(File.join(dir, "people_part_001.csv"))
43
+ assert File.file?(File.join(dir, "people_part_002.csv"))
44
+ assert File.file?(File.join(dir, "people_part_003.csv"))
45
+ end
46
+ end
47
+
48
+ def test_split_workflow_invalid_chunk_size_returns_to_menu
49
+ output = StringIO.new
50
+ input = [
51
+ "6",
52
+ fixture_path("sample_people.csv"),
53
+ "",
54
+ "",
55
+ "0",
56
+ "7"
57
+ ].join("\n") + "\n"
58
+
59
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
60
+
61
+ assert_equal 0, status
62
+ assert_includes output.string, "Chunk size must be a positive integer."
63
+ assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
64
+ end
65
+
19
66
  def test_end_to_end_console_happy_path_prints_expected_values
20
67
  input = [
21
68
  "1",
@@ -26,7 +73,7 @@ class TestCli < Minitest::Test
26
73
  "",
27
74
  "y",
28
75
  "",
29
- "5"
76
+ "7"
30
77
  ].join("\n") + "\n"
31
78
 
32
79
  output = StringIO.new
@@ -58,7 +105,7 @@ class TestCli < Minitest::Test
58
105
  "2",
59
106
  "3",
60
107
  "",
61
- "5"
108
+ "7"
62
109
  ].join("\n") + "\n"
63
110
 
64
111
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -79,7 +126,7 @@ class TestCli < Minitest::Test
79
126
  "0",
80
127
  "3",
81
128
  "",
82
- "5"
129
+ "7"
83
130
  ].join("\n") + "\n"
84
131
 
85
132
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -98,7 +145,7 @@ class TestCli < Minitest::Test
98
145
  "2",
99
146
  "3",
100
147
  "",
101
- "5"
148
+ "7"
102
149
  ].join("\n") + "\n"
103
150
 
104
151
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -119,7 +166,7 @@ class TestCli < Minitest::Test
119
166
  "2",
120
167
  "3",
121
168
  "",
122
- "5"
169
+ "7"
123
170
  ].join("\n") + "\n"
124
171
 
125
172
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -144,7 +191,7 @@ class TestCli < Minitest::Test
144
191
  "3",
145
192
  "2",
146
193
  output_path,
147
- "5"
194
+ "7"
148
195
  ].join("\n") + "\n"
149
196
 
150
197
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -164,7 +211,7 @@ class TestCli < Minitest::Test
164
211
  "1",
165
212
  "2",
166
213
  "",
167
- "5"
214
+ "7"
168
215
  ].join("\n") + "\n"
169
216
 
170
217
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -184,7 +231,7 @@ class TestCli < Minitest::Test
184
231
  "",
185
232
  "",
186
233
  "",
187
- "5"
234
+ "7"
188
235
  ].join("\n") + "\n"
189
236
 
190
237
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -209,7 +256,7 @@ class TestCli < Minitest::Test
209
256
  "",
210
257
  "2",
211
258
  output_path,
212
- "5"
259
+ "7"
213
260
  ].join("\n") + "\n"
214
261
 
215
262
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -231,7 +278,7 @@ class TestCli < Minitest::Test
231
278
  "",
232
279
  "",
233
280
  "",
234
- "5"
281
+ "7"
235
282
  ].join("\n") + "\n"
236
283
 
237
284
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -250,7 +297,7 @@ class TestCli < Minitest::Test
250
297
  "n",
251
298
  "",
252
299
  "",
253
- "5"
300
+ "7"
254
301
  ].join("\n") + "\n"
255
302
 
256
303
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -270,7 +317,7 @@ class TestCli < Minitest::Test
270
317
  "",
271
318
  "",
272
319
  "abc",
273
- "5"
320
+ "7"
274
321
  ].join("\n") + "\n"
275
322
 
276
323
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -295,7 +342,7 @@ class TestCli < Minitest::Test
295
342
  "",
296
343
  "",
297
344
  "",
298
- "5"
345
+ "7"
299
346
  ].join("\n") + "\n"
300
347
 
301
348
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -329,7 +376,7 @@ class TestCli < Minitest::Test
329
376
  "",
330
377
  "2",
331
378
  output_path,
332
- "5"
379
+ "7"
333
380
  ].join("\n") + "\n"
334
381
 
335
382
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -356,7 +403,7 @@ class TestCli < Minitest::Test
356
403
  "",
357
404
  "",
358
405
  "",
359
- "5"
406
+ "7"
360
407
  ].join("\n") + "\n"
361
408
 
362
409
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -382,7 +429,7 @@ class TestCli < Minitest::Test
382
429
  "",
383
430
  "",
384
431
  "",
385
- "5"
432
+ "7"
386
433
  ].join("\n") + "\n"
387
434
 
388
435
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -394,6 +441,160 @@ class TestCli < Minitest::Test
394
441
  assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
395
442
  end
396
443
 
444
+ def test_parity_workflow_reports_match_and_returns_to_menu
445
+ output = StringIO.new
446
+ input = [
447
+ "5",
448
+ fixture_path("sample_people.csv"),
449
+ fixture_path("sample_people.csv"),
450
+ "",
451
+ "",
452
+ "7"
453
+ ].join("\n") + "\n"
454
+
455
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
456
+
457
+ assert_equal 0, status
458
+ assert_includes output.string, "Left CSV file path:"
459
+ assert_includes output.string, "Right CSV file path:"
460
+ assert_includes output.string, "MATCH"
461
+ assert_includes output.string, "Summary: left_rows=3 right_rows=3 left_only=0 right_only=0"
462
+ assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
463
+ end
464
+
465
+ def test_parity_workflow_supports_tsv_separator
466
+ output = StringIO.new
467
+ input = [
468
+ "5",
469
+ fixture_path("sample_people.tsv"),
470
+ fixture_path("parity_people_reordered.tsv"),
471
+ "2",
472
+ "",
473
+ "7"
474
+ ].join("\n") + "\n"
475
+
476
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
477
+
478
+ assert_equal 0, status
479
+ assert_includes output.string, "MATCH"
480
+ assert_includes output.string, "Summary: left_rows=3 right_rows=3 left_only=0 right_only=0"
481
+ end
482
+
483
+ def test_parity_workflow_headerless_mode_compares_all_rows
484
+ output = StringIO.new
485
+ input = [
486
+ "5",
487
+ fixture_path("sample_people_no_headers.csv"),
488
+ fixture_path("sample_people_no_headers.csv"),
489
+ "",
490
+ "n",
491
+ "7"
492
+ ].join("\n") + "\n"
493
+
494
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
495
+
496
+ assert_equal 0, status
497
+ assert_includes output.string, "MATCH"
498
+ assert_includes output.string, "Summary: left_rows=3 right_rows=3 left_only=0 right_only=0"
499
+ end
500
+
501
+ def test_parity_workflow_reports_header_mismatch_in_headered_mode
502
+ output = StringIO.new
503
+ input = [
504
+ "5",
505
+ fixture_path("sample_people.csv"),
506
+ fixture_path("parity_people_header_mismatch.csv"),
507
+ "",
508
+ "",
509
+ "7"
510
+ ].join("\n") + "\n"
511
+
512
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
513
+
514
+ assert_equal 0, status
515
+ assert_includes output.string, "CSV headers do not match."
516
+ assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
517
+ end
518
+
519
+ def test_parity_workflow_prints_mismatch_examples_and_counts
520
+ output = StringIO.new
521
+ input = [
522
+ "5",
523
+ fixture_path("sample_people.csv"),
524
+ fixture_path("parity_people_mismatch.csv"),
525
+ "",
526
+ "",
527
+ "7"
528
+ ].join("\n") + "\n"
529
+
530
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
531
+
532
+ assert_equal 0, status
533
+ assert_includes output.string, "MISMATCH"
534
+ assert_includes output.string, "Summary: left_rows=3 right_rows=3 left_only=1 right_only=1"
535
+ assert_includes output.string, "Left-only examples:"
536
+ assert_includes output.string, "Cara,Berlin (count +1)"
537
+ assert_includes output.string, "Right-only examples:"
538
+ assert_includes output.string, "Dina,Rome (count +1)"
539
+ end
540
+
541
+ def test_parity_workflow_missing_left_file_returns_to_menu
542
+ output = StringIO.new
543
+ input = [
544
+ "5",
545
+ "/tmp/not-there-left.csv",
546
+ fixture_path("sample_people.csv"),
547
+ "",
548
+ "",
549
+ "7"
550
+ ].join("\n") + "\n"
551
+
552
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
553
+
554
+ assert_equal 0, status
555
+ assert_includes output.string, "File not found: /tmp/not-there-left.csv"
556
+ assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
557
+ refute_includes output.string, "Traceback"
558
+ end
559
+
560
+ def test_parity_workflow_missing_right_file_returns_to_menu
561
+ output = StringIO.new
562
+ input = [
563
+ "5",
564
+ fixture_path("sample_people.csv"),
565
+ "/tmp/not-there-right.csv",
566
+ "",
567
+ "",
568
+ "7"
569
+ ].join("\n") + "\n"
570
+
571
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
572
+
573
+ assert_equal 0, status
574
+ assert_includes output.string, "File not found: /tmp/not-there-right.csv"
575
+ assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
576
+ refute_includes output.string, "Traceback"
577
+ end
578
+
579
+ def test_parity_workflow_malformed_csv_returns_to_menu
580
+ output = StringIO.new
581
+ input = [
582
+ "5",
583
+ fixture_path("sample_people.csv"),
584
+ fixture_path("sample_people_bad_tail.csv"),
585
+ "",
586
+ "",
587
+ "7"
588
+ ].join("\n") + "\n"
589
+
590
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
591
+
592
+ assert_equal 0, status
593
+ assert_includes output.string, "Could not parse CSV file."
594
+ assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
595
+ refute_includes output.string, "Traceback"
596
+ end
597
+
397
598
  def test_end_to_end_file_output_writes_expected_csv
398
599
  output = StringIO.new
399
600
  output_path = nil
@@ -410,7 +611,7 @@ class TestCli < Minitest::Test
410
611
  "y",
411
612
  "2",
412
613
  output_path,
413
- "5"
614
+ "7"
414
615
  ].join("\n") + "\n"
415
616
 
416
617
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -430,7 +631,7 @@ class TestCli < Minitest::Test
430
631
  "1",
431
632
  "",
432
633
  "n",
433
- "5"
634
+ "7"
434
635
  ].join("\n") + "\n"
435
636
 
436
637
  output = StringIO.new
@@ -445,7 +646,7 @@ class TestCli < Minitest::Test
445
646
  output = StringIO.new
446
647
  status = Csvtool::CLI.start(
447
648
  ["menu"],
448
- stdin: StringIO.new("1\n/tmp/does-not-exist.csv\n4\n"),
649
+ stdin: StringIO.new("1\n/tmp/does-not-exist.csv\n4\n7\n"),
449
650
  stdout: output,
450
651
  stderr: StringIO.new
451
652
  )
@@ -466,7 +667,7 @@ class TestCli < Minitest::Test
466
667
  "y",
467
668
  "2",
468
669
  "/tmp/not-a-dir/out.csv",
469
- "5"
670
+ "7"
470
671
  ].join("\n") + "\n"
471
672
 
472
673
  output = StringIO.new
@@ -16,7 +16,7 @@ class CliUnitTest < Minitest::Test
16
16
  end
17
17
 
18
18
  def test_menu_command_can_exit_zero
19
- status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("5\n"), stdout: StringIO.new, stderr: StringIO.new)
19
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("6\n"), stdout: StringIO.new, stderr: StringIO.new)
20
20
  assert_equal 0, status
21
21
  end
22
22
 
@@ -28,7 +28,7 @@ class CliUnitTest < Minitest::Test
28
28
  def test_menu_routes_to_row_range_shell
29
29
  stdout = StringIO.new
30
30
  fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
31
- input = ["2", fixture, "", "2", "3", "", "5"].join("\n") + "\n"
31
+ input = ["2", fixture, "", "2", "3", "", "6"].join("\n") + "\n"
32
32
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
33
33
  assert_equal 0, status
34
34
  assert_includes stdout.string, "name,city"
@@ -39,7 +39,7 @@ class CliUnitTest < Minitest::Test
39
39
  def test_menu_routes_to_randomize_rows_shell
40
40
  stdout = StringIO.new
41
41
  fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
42
- input = ["3", fixture, "", "", "", "", "5"].join("\n") + "\n"
42
+ input = ["3", fixture, "", "", "", "", "6"].join("\n") + "\n"
43
43
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
44
44
  assert_equal 0, status
45
45
  assert_includes stdout.string, "name,city"
@@ -52,7 +52,7 @@ class CliUnitTest < Minitest::Test
52
52
  stdout = StringIO.new
53
53
  source_fixture = File.expand_path("../fixtures/dedupe_source.csv", __dir__)
54
54
  reference_fixture = File.expand_path("../fixtures/dedupe_reference.csv", __dir__)
55
- input = ["4", source_fixture, "", "", reference_fixture, "", "", "customer_id", "external_id", "", "", "", "5"].join("\n") + "\n"
55
+ input = ["4", source_fixture, "", "", reference_fixture, "", "", "customer_id", "external_id", "", "", "", "6"].join("\n") + "\n"
56
56
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
57
57
  assert_equal 0, status
58
58
  assert_includes stdout.string, "customer_id,name"
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/csv_parity_session/parity_options"
5
+
6
+ class ParityOptionsTest < Minitest::Test
7
+ def test_requires_separator
8
+ assert_raises(ArgumentError) do
9
+ Csvtool::Domain::CsvParitySession::ParityOptions.new(separator: "", headers_present: true)
10
+ end
11
+ end
12
+
13
+ def test_exposes_headers_present
14
+ options = Csvtool::Domain::CsvParitySession::ParityOptions.new(separator: ",", headers_present: false)
15
+ assert_equal false, options.headers_present?
16
+ end
17
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/csv_parity_session/source_pair"
5
+ require "csvtool/domain/csv_parity_session/parity_options"
6
+ require "csvtool/domain/csv_parity_session/parity_session"
7
+
8
+ class ParitySessionTest < Minitest::Test
9
+ def test_stores_source_pair_and_options
10
+ source_pair = Csvtool::Domain::CsvParitySession::SourcePair.new(left_path: "/tmp/l.csv", right_path: "/tmp/r.csv")
11
+ options = Csvtool::Domain::CsvParitySession::ParityOptions.new(separator: ",", headers_present: true)
12
+
13
+ session = Csvtool::Domain::CsvParitySession::ParitySession.start(source_pair: source_pair, options: options)
14
+
15
+ assert_equal source_pair, session.source_pair
16
+ assert_equal options, session.options
17
+ end
18
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/csv_parity_session/source_pair"
5
+
6
+ class SourcePairTest < Minitest::Test
7
+ def test_requires_paths
8
+ assert_raises(ArgumentError) { Csvtool::Domain::CsvParitySession::SourcePair.new(left_path: "", right_path: "/tmp/r.csv") }
9
+ assert_raises(ArgumentError) { Csvtool::Domain::CsvParitySession::SourcePair.new(left_path: "/tmp/l.csv", right_path: "") }
10
+ end
11
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/infrastructure/csv/csv_parity_comparator"
5
+
6
+ class CsvParityComparatorTest < Minitest::Test
7
+ def fixture_path(name)
8
+ File.expand_path("../../../fixtures/#{name}", __dir__)
9
+ end
10
+
11
+ def test_reports_match_when_rows_are_equal_ignoring_order
12
+ comparator = Csvtool::Infrastructure::CSV::CsvParityComparator.new
13
+
14
+ result = comparator.call(
15
+ left_path: fixture_path("sample_people.csv"),
16
+ right_path: fixture_path("parity_people_reordered.csv"),
17
+ col_sep: ",",
18
+ headers_present: true
19
+ )
20
+
21
+ assert_equal true, result[:match]
22
+ assert_equal 0, result[:left_only_count]
23
+ assert_equal 0, result[:right_only_count]
24
+ end
25
+
26
+ def test_reports_mismatch_counts_for_different_rows
27
+ comparator = Csvtool::Infrastructure::CSV::CsvParityComparator.new
28
+
29
+ result = comparator.call(
30
+ left_path: fixture_path("sample_people.csv"),
31
+ right_path: fixture_path("parity_people_mismatch.csv"),
32
+ col_sep: ",",
33
+ headers_present: true
34
+ )
35
+
36
+ assert_equal false, result[:match]
37
+ assert_equal 1, result[:left_only_count]
38
+ assert_equal 1, result[:right_only_count]
39
+ assert_equal "Cara,Berlin", result[:left_only_examples][0][:row]
40
+ assert_equal 1, result[:left_only_examples][0][:count_delta]
41
+ assert_equal "Dina,Rome", result[:right_only_examples][0][:row]
42
+ assert_equal 1, result[:right_only_examples][0][:count_delta]
43
+ end
44
+
45
+ def test_respects_duplicate_counts
46
+ comparator = Csvtool::Infrastructure::CSV::CsvParityComparator.new
47
+
48
+ result = comparator.call(
49
+ left_path: fixture_path("parity_duplicates_left.csv"),
50
+ right_path: fixture_path("parity_duplicates_right.csv"),
51
+ col_sep: ",",
52
+ headers_present: true
53
+ )
54
+
55
+ assert_equal false, result[:match]
56
+ assert_equal 1, result[:left_only_count]
57
+ assert_equal 0, result[:right_only_count]
58
+ assert_equal "1,Alice", result[:left_only_examples][0][:row]
59
+ assert_equal 1, result[:left_only_examples][0][:count_delta]
60
+ end
61
+
62
+ def test_preserves_exact_semantics_for_larger_fixture_with_different_order
63
+ comparator = Csvtool::Infrastructure::CSV::CsvParityComparator.new
64
+
65
+ result = comparator.call(
66
+ left_path: fixture_path("sample_people_many.csv"),
67
+ right_path: fixture_path("parity_people_many_reordered.csv"),
68
+ col_sep: ",",
69
+ headers_present: true
70
+ )
71
+
72
+ assert_equal true, result[:match]
73
+ assert_equal 12, result[:left_rows]
74
+ assert_equal 12, result[:right_rows]
75
+ assert_equal 0, result[:left_only_count]
76
+ assert_equal 0, result[:right_only_count]
77
+ end
78
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/infrastructure/csv/csv_splitter"
5
+ require "tmpdir"
6
+
7
+ class CsvSplitterTest < Minitest::Test
8
+ def test_splits_large_file_in_order
9
+ splitter = Csvtool::Infrastructure::CSV::CsvSplitter.new
10
+
11
+ Dir.mktmpdir do |dir|
12
+ source_path = File.join(dir, "large.csv")
13
+ File.open(source_path, "w") do |f|
14
+ f.puts "id,value"
15
+ 5_000.times { |i| f.puts "#{i + 1},v#{i + 1}" }
16
+ end
17
+
18
+ stats = splitter.call(
19
+ file_path: source_path,
20
+ col_sep: ",",
21
+ headers_present: true,
22
+ chunk_size: 1_000,
23
+ output_directory: dir,
24
+ file_prefix: "large",
25
+ overwrite_existing: false
26
+ )
27
+
28
+ assert_equal 5, stats[:chunk_count]
29
+ assert_equal 5_000, stats[:data_rows]
30
+ assert_equal [1_000, 1_000, 1_000, 1_000, 1_000], stats[:chunk_row_counts]
31
+
32
+ first_chunk = File.read(File.join(dir, "large_part_001.csv")).lines.map(&:strip)
33
+ last_chunk = File.read(File.join(dir, "large_part_005.csv")).lines.map(&:strip)
34
+ assert_equal "id,value", first_chunk.first
35
+ assert_equal "1,v1", first_chunk[1]
36
+ assert_equal "1000,v1000", first_chunk[1000]
37
+ assert_equal "4001,v4001", last_chunk[1]
38
+ assert_equal "5000,v5000", last_chunk[1000]
39
+ end
40
+ end
41
+
42
+ def test_streaming_split_handles_headerless_file
43
+ splitter = Csvtool::Infrastructure::CSV::CsvSplitter.new
44
+
45
+ Dir.mktmpdir do |dir|
46
+ source_path = File.join(dir, "large_no_headers.csv")
47
+ File.open(source_path, "w") do |f|
48
+ 2_500.times { |i| f.puts "#{i + 1},v#{i + 1}" }
49
+ end
50
+
51
+ stats = splitter.call(
52
+ file_path: source_path,
53
+ col_sep: ",",
54
+ headers_present: false,
55
+ chunk_size: 1_000,
56
+ output_directory: dir,
57
+ file_prefix: "large_no_headers",
58
+ overwrite_existing: false
59
+ )
60
+
61
+ assert_equal 3, stats[:chunk_count]
62
+ assert_equal 2_500, stats[:data_rows]
63
+ assert_equal [1_000, 1_000, 500], stats[:chunk_row_counts]
64
+ first_line = File.read(File.join(dir, "large_no_headers_part_001.csv")).lines.first.strip
65
+ assert_equal "1,v1", first_line
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/infrastructure/output/csv_split_manifest_writer"
5
+ require "tmpdir"
6
+
7
+ class CsvSplitManifestWriterTest < Minitest::Test
8
+ def test_writes_manifest_csv
9
+ writer = Csvtool::Infrastructure::Output::CsvSplitManifestWriter.new
10
+
11
+ Dir.mktmpdir do |dir|
12
+ path = File.join(dir, "manifest.csv")
13
+ writer.call(
14
+ path: path,
15
+ chunk_paths: ["/tmp/a.csv", "/tmp/b.csv"],
16
+ chunk_row_counts: [10, 5]
17
+ )
18
+
19
+ lines = File.read(path).lines.map(&:strip)
20
+ assert_equal "chunk_index,chunk_path,row_count", lines[0]
21
+ assert_equal "1,/tmp/a.csv,10", lines[1]
22
+ assert_equal "2,/tmp/b.csv,5", lines[2]
23
+ end
24
+ end
25
+ end
@@ -24,6 +24,7 @@ class ErrorsPresenterTest < Minitest::Test
24
24
  presenter.invalid_end_row
25
25
  presenter.invalid_row_range_order
26
26
  presenter.row_range_out_of_bounds(3)
27
+ presenter.header_mismatch
27
28
 
28
29
  text = out.string
29
30
  assert_includes text, "File not found: /tmp/x.csv"
@@ -42,5 +43,6 @@ class ErrorsPresenterTest < Minitest::Test
42
43
  assert_includes text, "End row must be a positive integer."
43
44
  assert_includes text, "End row must be greater than or equal to start row."
44
45
  assert_includes text, "Row range is out of bounds. File has 3 data rows."
46
+ assert_includes text, "CSV headers do not match."
45
47
  end
46
48
  end