csvops 0.4.0.alpha → 0.6.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +60 -12
  3. data/docs/architecture.md +208 -21
  4. data/docs/release-v0.5.0-alpha.md +89 -0
  5. data/docs/release-v0.6.0-alpha.md +84 -0
  6. data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +17 -14
  7. data/lib/csvtool/application/use_cases/run_csv_parity.rb +70 -0
  8. data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
  9. data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
  10. data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
  11. data/lib/csvtool/cli.rb +11 -7
  12. data/lib/csvtool/domain/csv_parity_session/parity_options.rb +22 -0
  13. data/lib/csvtool/domain/csv_parity_session/parity_session.rb +20 -0
  14. data/lib/csvtool/domain/csv_parity_session/source_pair.rb +19 -0
  15. data/lib/csvtool/infrastructure/csv/csv_parity_comparator.rb +71 -0
  16. data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
  17. data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
  18. data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
  19. data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
  20. data/lib/csvtool/interface/cli/errors/presenter.rb +4 -0
  21. data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
  22. data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
  23. data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
  24. data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
  25. data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
  26. data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
  27. data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
  28. data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
  29. data/lib/csvtool/interface/cli/workflows/builders/csv_parity_session_builder.rb +33 -0
  30. data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
  31. data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
  32. data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
  33. data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
  34. data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +38 -0
  35. data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
  36. data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
  37. data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +48 -125
  38. data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +66 -0
  39. data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
  40. data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
  41. data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
  42. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
  43. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
  44. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
  45. data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
  46. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
  47. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
  48. data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
  49. data/lib/csvtool/interface/cli/workflows/steps/parity/build_session_step.rb +25 -0
  50. data/lib/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step.rb +32 -0
  51. data/lib/csvtool/interface/cli/workflows/steps/parity/execute_step.rb +26 -0
  52. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
  53. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
  54. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
  55. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
  56. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
  57. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
  58. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
  59. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
  60. data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
  61. data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
  62. data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
  63. data/lib/csvtool/version.rb +1 -1
  64. data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
  65. data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +28 -0
  66. data/test/csvtool/application/use_cases/run_csv_parity_test.rb +160 -0
  67. data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
  68. data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
  69. data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
  70. data/test/csvtool/cli_test.rb +175 -21
  71. data/test/csvtool/cli_unit_test.rb +4 -4
  72. data/test/csvtool/domain/csv_parity_session/parity_options_test.rb +17 -0
  73. data/test/csvtool/domain/csv_parity_session/parity_session_test.rb +18 -0
  74. data/test/csvtool/domain/csv_parity_session/source_pair_test.rb +11 -0
  75. data/test/csvtool/infrastructure/csv/csv_parity_comparator_test.rb +78 -0
  76. data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
  77. data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
  78. data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
  79. data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
  80. data/test/csvtool/interface/cli/errors/presenter_test.rb +2 -0
  81. data/test/csvtool/interface/cli/menu_loop_test.rb +59 -16
  82. data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
  83. data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
  84. data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
  85. data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
  86. data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
  87. data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
  88. data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
  89. data/test/csvtool/interface/cli/workflows/builders/csv_parity_session_builder_test.rb +20 -0
  90. data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
  91. data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
  92. data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
  93. data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
  94. data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +43 -0
  95. data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
  96. data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
  97. data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +94 -0
  98. data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
  99. data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
  100. data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
  101. data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
  102. data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
  103. data/test/csvtool/interface/cli/workflows/steps/parity/build_session_step_test.rb +41 -0
  104. data/test/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step_test.rb +30 -0
  105. data/test/csvtool/interface/cli/workflows/steps/parity/execute_step_test.rb +40 -0
  106. data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
  107. data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
  108. data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
  109. data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
  110. data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
  111. data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
  112. data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
  113. data/test/fixtures/parity_duplicates_left.csv +4 -0
  114. data/test/fixtures/parity_duplicates_right.csv +3 -0
  115. data/test/fixtures/parity_people_header_mismatch.csv +4 -0
  116. data/test/fixtures/parity_people_many_reordered.csv +13 -0
  117. data/test/fixtures/parity_people_mismatch.csv +4 -0
  118. data/test/fixtures/parity_people_reordered.csv +4 -0
  119. data/test/fixtures/parity_people_reordered.tsv +4 -0
  120. metadata +90 -1
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/infrastructure/csv/csv_parity_comparator"
5
+
6
+ class CsvParityComparatorTest < Minitest::Test
7
+ def fixture_path(name)
8
+ File.expand_path("../../../fixtures/#{name}", __dir__)
9
+ end
10
+
11
+ def test_reports_match_when_rows_are_equal_ignoring_order
12
+ comparator = Csvtool::Infrastructure::CSV::CsvParityComparator.new
13
+
14
+ result = comparator.call(
15
+ left_path: fixture_path("sample_people.csv"),
16
+ right_path: fixture_path("parity_people_reordered.csv"),
17
+ col_sep: ",",
18
+ headers_present: true
19
+ )
20
+
21
+ assert_equal true, result[:match]
22
+ assert_equal 0, result[:left_only_count]
23
+ assert_equal 0, result[:right_only_count]
24
+ end
25
+
26
+ def test_reports_mismatch_counts_for_different_rows
27
+ comparator = Csvtool::Infrastructure::CSV::CsvParityComparator.new
28
+
29
+ result = comparator.call(
30
+ left_path: fixture_path("sample_people.csv"),
31
+ right_path: fixture_path("parity_people_mismatch.csv"),
32
+ col_sep: ",",
33
+ headers_present: true
34
+ )
35
+
36
+ assert_equal false, result[:match]
37
+ assert_equal 1, result[:left_only_count]
38
+ assert_equal 1, result[:right_only_count]
39
+ assert_equal "Cara,Berlin", result[:left_only_examples][0][:row]
40
+ assert_equal 1, result[:left_only_examples][0][:count_delta]
41
+ assert_equal "Dina,Rome", result[:right_only_examples][0][:row]
42
+ assert_equal 1, result[:right_only_examples][0][:count_delta]
43
+ end
44
+
45
+ def test_respects_duplicate_counts
46
+ comparator = Csvtool::Infrastructure::CSV::CsvParityComparator.new
47
+
48
+ result = comparator.call(
49
+ left_path: fixture_path("parity_duplicates_left.csv"),
50
+ right_path: fixture_path("parity_duplicates_right.csv"),
51
+ col_sep: ",",
52
+ headers_present: true
53
+ )
54
+
55
+ assert_equal false, result[:match]
56
+ assert_equal 1, result[:left_only_count]
57
+ assert_equal 0, result[:right_only_count]
58
+ assert_equal "1,Alice", result[:left_only_examples][0][:row]
59
+ assert_equal 1, result[:left_only_examples][0][:count_delta]
60
+ end
61
+
62
+ def test_preserves_exact_semantics_for_larger_fixture_with_different_order
63
+ comparator = Csvtool::Infrastructure::CSV::CsvParityComparator.new
64
+
65
+ result = comparator.call(
66
+ left_path: fixture_path("sample_people_many.csv"),
67
+ right_path: fixture_path("parity_people_many_reordered.csv"),
68
+ col_sep: ",",
69
+ headers_present: true
70
+ )
71
+
72
+ assert_equal true, result[:match]
73
+ assert_equal 12, result[:left_rows]
74
+ assert_equal 12, result[:right_rows]
75
+ assert_equal 0, result[:left_only_count]
76
+ assert_equal 0, result[:right_only_count]
77
+ end
78
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer"
5
+ require "tmpdir"
6
+
7
+ class InfrastructureCsvCrossCsvDedupeFileWriterTest < Minitest::Test
8
+ class FakeDeduper
9
+ def each_retained(**_kwargs)
10
+ yield %w[1 Alice]
11
+ yield %w[3 Cara]
12
+ { source_rows: 5, removed_rows: 3, kept_rows_count: 2 }
13
+ end
14
+ end
15
+
16
+ def test_writes_retained_rows_and_returns_stats
17
+ writer = Csvtool::Infrastructure::Output::CsvCrossCsvDedupeFileWriter.new(deduper: FakeDeduper.new)
18
+
19
+ Dir.mktmpdir do |dir|
20
+ output_path = File.join(dir, "deduped.csv")
21
+ stats = writer.call(
22
+ path: output_path,
23
+ headers: ["customer_id", "name"],
24
+ col_sep: ",",
25
+ dedupe_options: { source_path: "source.csv", reference_path: "reference.csv" }
26
+ )
27
+
28
+ assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
29
+ assert_equal 2, stats[:kept_rows_count]
30
+ end
31
+ end
32
+ end
@@ -2,7 +2,6 @@
2
2
 
3
3
  require_relative "../../../test_helper"
4
4
  require "csvtool/infrastructure/output/csv_file_writer"
5
- require "csvtool/interface/cli/errors/presenter"
6
5
  require "tmpdir"
7
6
 
8
7
  class InfrastructureCsvFileWriterTest < Minitest::Test
@@ -13,10 +12,7 @@ class InfrastructureCsvFileWriterTest < Minitest::Test
13
12
  end
14
13
 
15
14
  def test_writes_header_and_values
16
- stdout = StringIO.new
17
15
  writer = Csvtool::Infrastructure::Output::CsvFileWriter.new(
18
- stdout: stdout,
19
- errors: Csvtool::Interface::CLI::Errors::Presenter.new(stdout: stdout),
20
16
  value_streamer: FakeStreamer.new
21
17
  )
22
18
 
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/infrastructure/output/csv_randomized_row_file_writer"
5
+ require "tmpdir"
6
+
7
+ class InfrastructureCsvRandomizedRowFileWriterTest < Minitest::Test
8
+ class FakeRandomizer
9
+ def each(file_path:, col_sep:, headers:, seed:)
10
+ yield ["Bob", "Paris"]
11
+ yield ["Cara", "Berlin"]
12
+ end
13
+ end
14
+
15
+ def test_writes_randomized_rows_with_headers
16
+ writer = Csvtool::Infrastructure::Output::CsvRandomizedRowFileWriter.new(row_randomizer: FakeRandomizer.new)
17
+
18
+ Dir.mktmpdir do |dir|
19
+ output_path = File.join(dir, "randomized.csv")
20
+ writer.call(
21
+ path: output_path,
22
+ headers: ["name", "city"],
23
+ file_path: "ignored.csv",
24
+ col_sep: ",",
25
+ headers_present: true,
26
+ seed: 123
27
+ )
28
+
29
+ assert_equal "name,city\nBob,Paris\nCara,Berlin\n", File.read(output_path)
30
+ end
31
+ end
32
+ end
@@ -2,7 +2,6 @@
2
2
 
3
3
  require_relative "../../../test_helper"
4
4
  require "csvtool/infrastructure/output/csv_row_file_writer"
5
- require "csvtool/interface/cli/errors/presenter"
6
5
  require "tmpdir"
7
6
 
8
7
  class InfrastructureCsvRowFileWriterTest < Minitest::Test
@@ -15,10 +14,7 @@ class InfrastructureCsvRowFileWriterTest < Minitest::Test
15
14
  end
16
15
 
17
16
  def test_writes_header_and_rows_to_file
18
- stdout = StringIO.new
19
17
  writer = Csvtool::Infrastructure::Output::CsvRowFileWriter.new(
20
- stdout: stdout,
21
- errors: Csvtool::Interface::CLI::Errors::Presenter.new(stdout: stdout),
22
18
  row_streamer: FakeRowStreamer.new
23
19
  )
24
20
 
@@ -35,6 +31,7 @@ class InfrastructureCsvRowFileWriterTest < Minitest::Test
35
31
 
36
32
  assert_equal "name,city\nBob,Paris\nCara,Berlin\n", File.read(output_path)
37
33
  assert_equal true, stats[:matched]
34
+ assert_equal true, stats[:wrote_rows]
38
35
  end
39
36
  end
40
37
  end
@@ -24,6 +24,7 @@ class ErrorsPresenterTest < Minitest::Test
24
24
  presenter.invalid_end_row
25
25
  presenter.invalid_row_range_order
26
26
  presenter.row_range_out_of_bounds(3)
27
+ presenter.header_mismatch
27
28
 
28
29
  text = out.string
29
30
  assert_includes text, "File not found: /tmp/x.csv"
@@ -42,5 +43,6 @@ class ErrorsPresenterTest < Minitest::Test
42
43
  assert_includes text, "End row must be a positive integer."
43
44
  assert_includes text, "End row must be greater than or equal to start row."
44
45
  assert_includes text, "Row range is out of bounds. File has 3 data rows."
46
+ assert_includes text, "CSV headers do not match."
45
47
  end
46
48
  end
@@ -21,15 +21,17 @@ class MenuLoopTest < Minitest::Test
21
21
  rows_action = FakeAction.new
22
22
  randomize_rows_action = FakeAction.new
23
23
  dedupe_action = FakeAction.new
24
+ parity_action = FakeAction.new
24
25
  stdout = StringIO.new
25
26
  menu = Csvtool::Interface::CLI::MenuLoop.new(
26
- stdin: StringIO.new("1\n5\n"),
27
+ stdin: StringIO.new("1\n6\n"),
27
28
  stdout: stdout,
28
- menu_options: ["Extract column", "Extract rows (range)", "Randomize rows", "Dedupe using another CSV", "Exit"],
29
+ menu_options: ["Extract column", "Extract rows (range)", "Randomize rows", "Dedupe using another CSV", "Validate parity", "Exit"],
29
30
  extract_column_action: column_action,
30
31
  extract_rows_action: rows_action,
31
32
  randomize_rows_action: randomize_rows_action,
32
- dedupe_action: dedupe_action
33
+ dedupe_action: dedupe_action,
34
+ parity_action: parity_action
33
35
  )
34
36
 
35
37
  status = menu.run
@@ -39,6 +41,7 @@ class MenuLoopTest < Minitest::Test
39
41
  assert_equal 0, rows_action.runs
40
42
  assert_equal 0, randomize_rows_action.runs
41
43
  assert_equal 0, dedupe_action.runs
44
+ assert_equal 0, parity_action.runs
42
45
  assert_includes stdout.string, "CSV Tool Menu"
43
46
  end
44
47
 
@@ -47,15 +50,17 @@ class MenuLoopTest < Minitest::Test
47
50
  rows_action = FakeAction.new
48
51
  randomize_rows_action = FakeAction.new
49
52
  dedupe_action = FakeAction.new
53
+ parity_action = FakeAction.new
50
54
  stdout = StringIO.new
51
55
  menu = Csvtool::Interface::CLI::MenuLoop.new(
52
- stdin: StringIO.new("2\n5\n"),
56
+ stdin: StringIO.new("2\n6\n"),
53
57
  stdout: stdout,
54
- menu_options: ["Extract column", "Extract rows (range)", "Randomize rows", "Dedupe using another CSV", "Exit"],
58
+ menu_options: ["Extract column", "Extract rows (range)", "Randomize rows", "Dedupe using another CSV", "Validate parity", "Exit"],
55
59
  extract_column_action: column_action,
56
60
  extract_rows_action: rows_action,
57
61
  randomize_rows_action: randomize_rows_action,
58
- dedupe_action: dedupe_action
62
+ dedupe_action: dedupe_action,
63
+ parity_action: parity_action
59
64
  )
60
65
 
61
66
  status = menu.run
@@ -65,6 +70,7 @@ class MenuLoopTest < Minitest::Test
65
70
  assert_equal 1, rows_action.runs
66
71
  assert_equal 0, randomize_rows_action.runs
67
72
  assert_equal 0, dedupe_action.runs
73
+ assert_equal 0, parity_action.runs
68
74
  end
69
75
 
70
76
  def test_routes_randomize_rows_then_exit
@@ -72,15 +78,17 @@ class MenuLoopTest < Minitest::Test
72
78
  rows_action = FakeAction.new
73
79
  randomize_rows_action = FakeAction.new
74
80
  dedupe_action = FakeAction.new
81
+ parity_action = FakeAction.new
75
82
  stdout = StringIO.new
76
83
  menu = Csvtool::Interface::CLI::MenuLoop.new(
77
- stdin: StringIO.new("3\n5\n"),
84
+ stdin: StringIO.new("3\n6\n"),
78
85
  stdout: stdout,
79
- menu_options: ["Extract column", "Extract rows (range)", "Randomize rows", "Dedupe using another CSV", "Exit"],
86
+ menu_options: ["Extract column", "Extract rows (range)", "Randomize rows", "Dedupe using another CSV", "Validate parity", "Exit"],
80
87
  extract_column_action: column_action,
81
88
  extract_rows_action: rows_action,
82
89
  randomize_rows_action: randomize_rows_action,
83
- dedupe_action: dedupe_action
90
+ dedupe_action: dedupe_action,
91
+ parity_action: parity_action
84
92
  )
85
93
 
86
94
  status = menu.run
@@ -90,6 +98,7 @@ class MenuLoopTest < Minitest::Test
90
98
  assert_equal 0, rows_action.runs
91
99
  assert_equal 1, randomize_rows_action.runs
92
100
  assert_equal 0, dedupe_action.runs
101
+ assert_equal 0, parity_action.runs
93
102
  end
94
103
 
95
104
  def test_routes_dedupe_then_exit
@@ -97,15 +106,17 @@ class MenuLoopTest < Minitest::Test
97
106
  rows_action = FakeAction.new
98
107
  randomize_rows_action = FakeAction.new
99
108
  dedupe_action = FakeAction.new
109
+ parity_action = FakeAction.new
100
110
  stdout = StringIO.new
101
111
  menu = Csvtool::Interface::CLI::MenuLoop.new(
102
- stdin: StringIO.new("4\n5\n"),
112
+ stdin: StringIO.new("4\n6\n"),
103
113
  stdout: stdout,
104
- menu_options: ["Extract column", "Extract rows (range)", "Randomize rows", "Dedupe using another CSV", "Exit"],
114
+ menu_options: ["Extract column", "Extract rows (range)", "Randomize rows", "Dedupe using another CSV", "Validate parity", "Exit"],
105
115
  extract_column_action: column_action,
106
116
  extract_rows_action: rows_action,
107
117
  randomize_rows_action: randomize_rows_action,
108
- dedupe_action: dedupe_action
118
+ dedupe_action: dedupe_action,
119
+ parity_action: parity_action
109
120
  )
110
121
 
111
122
  status = menu.run
@@ -115,6 +126,35 @@ class MenuLoopTest < Minitest::Test
115
126
  assert_equal 0, rows_action.runs
116
127
  assert_equal 0, randomize_rows_action.runs
117
128
  assert_equal 1, dedupe_action.runs
129
+ assert_equal 0, parity_action.runs
130
+ end
131
+
132
+ def test_routes_parity_then_exit
133
+ column_action = FakeAction.new
134
+ rows_action = FakeAction.new
135
+ randomize_rows_action = FakeAction.new
136
+ dedupe_action = FakeAction.new
137
+ parity_action = FakeAction.new
138
+ stdout = StringIO.new
139
+ menu = Csvtool::Interface::CLI::MenuLoop.new(
140
+ stdin: StringIO.new("5\n6\n"),
141
+ stdout: stdout,
142
+ menu_options: ["Extract column", "Extract rows (range)", "Randomize rows", "Dedupe using another CSV", "Validate parity", "Exit"],
143
+ extract_column_action: column_action,
144
+ extract_rows_action: rows_action,
145
+ randomize_rows_action: randomize_rows_action,
146
+ dedupe_action: dedupe_action,
147
+ parity_action: parity_action
148
+ )
149
+
150
+ status = menu.run
151
+
152
+ assert_equal 0, status
153
+ assert_equal 0, column_action.runs
154
+ assert_equal 0, rows_action.runs
155
+ assert_equal 0, randomize_rows_action.runs
156
+ assert_equal 0, dedupe_action.runs
157
+ assert_equal 1, parity_action.runs
118
158
  end
119
159
 
120
160
  def test_invalid_choice_shows_prompt
@@ -122,23 +162,26 @@ class MenuLoopTest < Minitest::Test
122
162
  rows_action = FakeAction.new
123
163
  randomize_rows_action = FakeAction.new
124
164
  dedupe_action = FakeAction.new
165
+ parity_action = FakeAction.new
125
166
  stdout = StringIO.new
126
167
  menu = Csvtool::Interface::CLI::MenuLoop.new(
127
- stdin: StringIO.new("x\n5\n"),
168
+ stdin: StringIO.new("x\n6\n"),
128
169
  stdout: stdout,
129
- menu_options: ["Extract column", "Extract rows (range)", "Randomize rows", "Dedupe using another CSV", "Exit"],
170
+ menu_options: ["Extract column", "Extract rows (range)", "Randomize rows", "Dedupe using another CSV", "Validate parity", "Exit"],
130
171
  extract_column_action: column_action,
131
172
  extract_rows_action: rows_action,
132
173
  randomize_rows_action: randomize_rows_action,
133
- dedupe_action: dedupe_action
174
+ dedupe_action: dedupe_action,
175
+ parity_action: parity_action
134
176
  )
135
177
 
136
178
  menu.run
137
179
 
138
- assert_includes stdout.string, "Please choose 1, 2, 3, 4, or 5."
180
+ assert_includes stdout.string, "Please choose 1, 2, 3, 4, 5, or 6."
139
181
  assert_equal 0, column_action.runs
140
182
  assert_equal 0, rows_action.runs
141
183
  assert_equal 0, randomize_rows_action.runs
142
184
  assert_equal 0, dedupe_action.runs
185
+ assert_equal 0, parity_action.runs
143
186
  end
144
187
  end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../test_helper"
4
+ require "csvtool/interface/cli/prompts/dedupe_key_selector_prompt"
5
+
6
+ class DedupeKeySelectorPromptTest < Minitest::Test
7
+ def test_builds_name_selector_in_header_mode
8
+ prompt = Csvtool::Interface::CLI::Prompts::DedupeKeySelectorPrompt.new(stdin: StringIO.new("customer_id\n"), stdout: StringIO.new)
9
+
10
+ selector = prompt.call(label: "Source", headers_present: true)
11
+
12
+ assert_equal true, selector.headers_present?
13
+ assert_equal "customer_id", selector.value
14
+ end
15
+
16
+ def test_builds_index_selector_in_headerless_mode
17
+ prompt = Csvtool::Interface::CLI::Prompts::DedupeKeySelectorPrompt.new(stdin: StringIO.new("2\n"), stdout: StringIO.new)
18
+
19
+ selector = prompt.call(label: "Reference", headers_present: false)
20
+
21
+ assert_equal true, selector.index?
22
+ assert_equal 2, selector.value
23
+ end
24
+
25
+ def test_returns_nil_for_invalid_selector
26
+ prompt = Csvtool::Interface::CLI::Prompts::DedupeKeySelectorPrompt.new(stdin: StringIO.new("\n"), stdout: StringIO.new)
27
+
28
+ assert_nil prompt.call(label: "Source", headers_present: true)
29
+ end
30
+ end
@@ -8,4 +8,13 @@ class FilePathPromptTest < Minitest::Test
8
8
  prompt = Csvtool::Interface::CLI::Prompts::FilePathPrompt.new(stdin: StringIO.new(" /tmp/a.csv \n"), stdout: StringIO.new)
9
9
  assert_equal "/tmp/a.csv", prompt.call
10
10
  end
11
+
12
+ def test_supports_custom_label
13
+ out = StringIO.new
14
+ prompt = Csvtool::Interface::CLI::Prompts::FilePathPrompt.new(stdin: StringIO.new("/tmp/a.csv\n"), stdout: out)
15
+
16
+ prompt.call(label: "Reference CSV file path: ")
17
+
18
+ assert_includes out.string, "Reference CSV file path: "
19
+ end
11
20
  end
@@ -11,4 +11,14 @@ class HeadersPresentPromptTest < Minitest::Test
11
11
  assert_equal true, yes_prompt.call
12
12
  assert_equal false, no_prompt.call
13
13
  end
14
+
15
+ def test_supports_custom_label
16
+ out = StringIO.new
17
+ prompt = Csvtool::Interface::CLI::Prompts::HeadersPresentPrompt.new(stdin: StringIO.new("yes\n"), stdout: out)
18
+
19
+ result = prompt.call(label: "Source headers present? [Y/n]: ")
20
+
21
+ assert_equal true, result
22
+ assert_includes out.string, "Source headers present? [Y/n]: "
23
+ end
14
24
  end
@@ -28,4 +28,14 @@ class SeparatorPromptTest < Minitest::Test
28
28
  assert_nil prompt.call
29
29
  assert_includes errors.calls, :empty_custom_separator
30
30
  end
31
+
32
+ def test_supports_custom_label
33
+ errors = FakeErrors.new
34
+ out = StringIO.new
35
+ prompt = Csvtool::Interface::CLI::Prompts::SeparatorPrompt.new(stdin: StringIO.new("\n"), stdout: out, errors: errors)
36
+
37
+ prompt.call(label: "Reference CSV separator:")
38
+
39
+ assert_includes out.string, "Reference CSV separator:"
40
+ end
31
41
  end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../test_helper"
4
+ require "csvtool/interface/cli/prompts/yes_no_prompt"
5
+
6
+ class YesNoPromptTest < Minitest::Test
7
+ def test_uses_default_for_blank_or_invalid
8
+ prompt_blank = Csvtool::Interface::CLI::Prompts::YesNoPrompt.new(stdin: StringIO.new("\n"), stdout: StringIO.new)
9
+ prompt_invalid = Csvtool::Interface::CLI::Prompts::YesNoPrompt.new(stdin: StringIO.new("maybe\n"), stdout: StringIO.new)
10
+
11
+ assert_equal true, prompt_blank.call(label: "Q? ", default: true)
12
+ assert_equal false, prompt_invalid.call(label: "Q? ", default: false)
13
+ end
14
+
15
+ def test_accepts_yes_and_no_inputs
16
+ prompt_yes = Csvtool::Interface::CLI::Prompts::YesNoPrompt.new(stdin: StringIO.new("y\n"), stdout: StringIO.new)
17
+ prompt_no = Csvtool::Interface::CLI::Prompts::YesNoPrompt.new(stdin: StringIO.new("no\n"), stdout: StringIO.new)
18
+
19
+ assert_equal true, prompt_yes.call(label: "Q? ", default: false)
20
+ assert_equal false, prompt_no.call(label: "Q? ", default: true)
21
+ end
22
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/builders/column_session_builder"
5
+
6
+ class ColumnSessionBuilderTest < Minitest::Test
7
+ def test_builds_column_session
8
+ builder = Csvtool::Interface::CLI::Workflows::Builders::ColumnSessionBuilder.new
9
+
10
+ session = builder.call(file_path: "/tmp/data.csv", col_sep: ",", column_name: "name", skip_blanks: true)
11
+
12
+ assert_equal "/tmp/data.csv", session.source.path
13
+ assert_equal ",", session.source.separator.value
14
+ assert_equal "name", session.column_selection.name
15
+ assert_equal true, session.options.skip_blanks?
16
+ end
17
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder"
5
+ require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
6
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
7
+ require "csvtool/domain/shared/output_destination"
8
+
9
+ class CrossCsvDedupeSessionBuilderTest < Minitest::Test
10
+ def test_builds_cross_csv_dedupe_session
11
+ builder = Csvtool::Interface::CLI::Workflows::Builders::CrossCsvDedupeSessionBuilder.new
12
+ source = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(path: "/tmp/source.csv", separator: ",", headers_present: true)
13
+ reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(path: "/tmp/reference.csv", separator: ",", headers_present: true)
14
+ source_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "id")
15
+ reference_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "rid")
16
+ destination = Csvtool::Domain::Shared::OutputDestination.console
17
+
18
+ session = builder.call(
19
+ source: source,
20
+ reference: reference,
21
+ source_selector: source_selector,
22
+ reference_selector: reference_selector,
23
+ trim_whitespace: true,
24
+ case_insensitive: false,
25
+ destination: destination
26
+ )
27
+
28
+ assert_equal "/tmp/source.csv", session.source.path
29
+ assert_equal "/tmp/reference.csv", session.reference.path
30
+ assert_equal "id", session.key_mapping.source_selector.value
31
+ assert_equal "rid", session.key_mapping.reference_selector.value
32
+ assert_equal true, session.match_options.trim_whitespace?
33
+ assert_equal false, session.match_options.case_insensitive?
34
+ assert_equal true, session.output_destination.console?
35
+ end
36
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/builders/csv_parity_session_builder"
5
+
6
+ class CsvParitySessionBuilderTest < Minitest::Test
7
+ def test_builds_parity_session
8
+ session = Csvtool::Interface::CLI::Workflows::Builders::CsvParitySessionBuilder.new.call(
9
+ left_path: "/tmp/left.csv",
10
+ right_path: "/tmp/right.csv",
11
+ col_sep: "\t",
12
+ headers_present: false
13
+ )
14
+
15
+ assert_equal "/tmp/left.csv", session.source_pair.left_path
16
+ assert_equal "/tmp/right.csv", session.source_pair.right_path
17
+ assert_equal "\t", session.options.separator
18
+ assert_equal false, session.options.headers_present?
19
+ end
20
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/builders/row_extraction_session_builder"
5
+ require "csvtool/domain/row_session/row_range"
6
+ require "csvtool/domain/shared/output_destination"
7
+
8
+ class RowExtractionSessionBuilderTest < Minitest::Test
9
+ def test_builds_row_extraction_session
10
+ builder = Csvtool::Interface::CLI::Workflows::Builders::RowExtractionSessionBuilder.new
11
+ row_range = Csvtool::Domain::RowSession::RowRange.new(start_row: 2, end_row: 4)
12
+ destination = Csvtool::Domain::Shared::OutputDestination.console
13
+
14
+ session = builder.call(file_path: "/tmp/data.csv", col_sep: ";", row_range: row_range, destination: destination)
15
+
16
+ assert_equal "/tmp/data.csv", session.source.path
17
+ assert_equal ";", session.source.separator
18
+ assert_equal 2, session.row_range.start_row
19
+ assert_equal true, session.output_destination.console?
20
+ end
21
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/builders/row_randomization_session_builder"
5
+ require "csvtool/domain/shared/output_destination"
6
+
7
+ class RowRandomizationSessionBuilderTest < Minitest::Test
8
+ def test_builds_row_randomization_session
9
+ builder = Csvtool::Interface::CLI::Workflows::Builders::RowRandomizationSessionBuilder.new
10
+ destination = Csvtool::Domain::Shared::OutputDestination.file(path: "/tmp/out.csv")
11
+
12
+ session = builder.call(
13
+ file_path: "/tmp/data.csv",
14
+ col_sep: "\t",
15
+ headers_present: false,
16
+ seed: 12,
17
+ destination: destination
18
+ )
19
+
20
+ assert_equal "/tmp/data.csv", session.source.path
21
+ assert_equal "\t", session.source.separator
22
+ assert_equal false, session.source.headers_present?
23
+ assert_equal 12, session.options.seed
24
+ assert_equal true, session.output_destination.file?
25
+ end
26
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/presenters/column_extraction_presenter"
5
+
6
+ class ColumnExtractionPresenterTest < Minitest::Test
7
+ def test_prints_value
8
+ out = StringIO.new
9
+ presenter = Csvtool::Interface::CLI::Workflows::Presenters::ColumnExtractionPresenter.new(stdout: out)
10
+
11
+ presenter.print_value("Alice")
12
+
13
+ assert_equal "Alice\n", out.string
14
+ end
15
+
16
+ def test_prints_file_written_message
17
+ out = StringIO.new
18
+ presenter = Csvtool::Interface::CLI::Workflows::Presenters::ColumnExtractionPresenter.new(stdout: out)
19
+
20
+ presenter.print_file_written("/tmp/names.csv")
21
+
22
+ assert_includes out.string, "Wrote output to /tmp/names.csv"
23
+ end
24
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter"
5
+
6
+ class CrossCsvDedupePresenterTest < Minitest::Test
7
+ def test_prints_header_row_and_summary
8
+ out = StringIO.new
9
+ presenter = Csvtool::Interface::CLI::Workflows::Presenters::CrossCsvDedupePresenter.new(stdout: out, col_sep: ",")
10
+
11
+ presenter.print_header(["id", "name"])
12
+ presenter.print_row(["1", "Alice"])
13
+ presenter.print_summary(source_rows: 5, removed_rows: 3, kept_rows_count: 2)
14
+
15
+ assert_includes out.string, "\nid,name\n"
16
+ assert_includes out.string, "1,Alice"
17
+ assert_includes out.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
18
+ end
19
+
20
+ def test_prints_zero_and_all_removed_messages
21
+ out = StringIO.new
22
+ presenter = Csvtool::Interface::CLI::Workflows::Presenters::CrossCsvDedupePresenter.new(stdout: out, col_sep: ",")
23
+
24
+ presenter.print_summary(source_rows: 5, removed_rows: 0, kept_rows_count: 5)
25
+ presenter.print_summary(source_rows: 5, removed_rows: 5, kept_rows_count: 0)
26
+
27
+ assert_includes out.string, "No rows removed; no matching keys found."
28
+ assert_includes out.string, "All source rows were removed by dedupe."
29
+ end
30
+ end