csvops 0.3.0.alpha → 0.5.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +69 -149
  3. data/docs/architecture.md +396 -0
  4. data/docs/release-v0.4.0-alpha.md +87 -0
  5. data/docs/release-v0.5.0-alpha.md +89 -0
  6. data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +96 -0
  7. data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
  8. data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
  9. data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
  10. data/lib/csvtool/cli.rb +11 -7
  11. data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
  12. data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
  13. data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
  14. data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
  15. data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
  16. data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +1 -0
  17. data/lib/csvtool/domain/row_session/row_source.rb +3 -0
  18. data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
  19. data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
  20. data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
  21. data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
  22. data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
  23. data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
  24. data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
  25. data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
  26. data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
  27. data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
  28. data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
  29. data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
  30. data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
  31. data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
  32. data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
  33. data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
  34. data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
  35. data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
  36. data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
  37. data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
  38. data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
  39. data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +86 -0
  40. data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
  41. data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
  42. data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
  43. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
  44. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
  45. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
  46. data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
  47. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
  48. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
  49. data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
  50. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
  51. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
  52. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
  53. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
  54. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
  55. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
  56. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
  57. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
  58. data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
  59. data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
  60. data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
  61. data/lib/csvtool/version.rb +1 -1
  62. data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
  63. data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +141 -0
  64. data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
  65. data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
  66. data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
  67. data/test/csvtool/cli_test.rb +130 -16
  68. data/test/csvtool/cli_unit_test.rb +16 -3
  69. data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
  70. data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
  71. data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
  72. data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
  73. data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
  74. data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
  75. data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
  76. data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +2 -2
  77. data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +15 -1
  78. data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
  79. data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
  80. data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
  81. data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
  82. data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
  83. data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
  84. data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
  85. data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
  86. data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
  87. data/test/csvtool/interface/cli/menu_loop_test.rb +50 -13
  88. data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
  89. data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
  90. data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
  91. data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
  92. data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
  93. data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
  94. data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
  95. data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
  96. data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
  97. data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
  98. data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
  99. data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
  100. data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
  101. data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
  102. data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
  103. data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
  104. data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
  105. data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
  106. data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
  107. data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
  108. data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
  109. data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
  110. data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
  111. data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
  112. data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
  113. data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
  114. data/test/fixtures/dedupe_reference.csv +3 -0
  115. data/test/fixtures/dedupe_reference.tsv +3 -0
  116. data/test/fixtures/dedupe_reference_all.csv +5 -0
  117. data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
  118. data/test/fixtures/dedupe_reference_none.csv +2 -0
  119. data/test/fixtures/dedupe_reference_normalization.csv +3 -0
  120. data/test/fixtures/dedupe_source.csv +6 -0
  121. data/test/fixtures/dedupe_source.tsv +6 -0
  122. data/test/fixtures/dedupe_source_no_headers.csv +5 -0
  123. data/test/fixtures/dedupe_source_normalization.csv +4 -0
  124. metadata +93 -8
  125. data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +0 -31
  126. data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
  127. data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
  128. data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +0 -21
  129. data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
@@ -2,123 +2,133 @@
2
2
 
3
3
  require_relative "../../../test_helper"
4
4
  require "csvtool/application/use_cases/run_row_randomization"
5
+ require "csvtool/domain/row_randomization_session/randomization_source"
6
+ require "csvtool/domain/row_randomization_session/randomization_options"
7
+ require "csvtool/domain/row_randomization_session/randomization_session"
8
+ require "csvtool/domain/shared/output_destination"
5
9
  require "tmpdir"
6
10
 
7
11
  class RunRowRandomizationTest < Minitest::Test
8
- def test_prints_header_then_all_randomized_rows
9
- fixture = File.expand_path("../../../fixtures/sample_people.csv", __dir__)
10
- output = StringIO.new
11
- input = StringIO.new("#{fixture}\n\n\n\n\n")
12
-
13
- Csvtool::Application::UseCases::RunRowRandomization.new(stdin: input, stdout: output).call
14
-
15
- assert_includes output.string, "CSV file path:"
16
- header_index = output.string.index("name,city")
17
- assert header_index
18
- %w[Alice,London Bob,Paris Cara,Berlin].each do |row|
19
- row_index = output.string.index(row)
20
- assert row_index
21
- assert_operator header_index, :<, row_index
12
+ class RaisingWriter
13
+ def call(**_kwargs)
14
+ raise Errno::ENOENT
22
15
  end
23
16
  end
24
17
 
25
- def test_missing_file_shows_friendly_error
26
- output = StringIO.new
27
- input = StringIO.new("/tmp/does-not-exist.csv\n")
28
-
29
- Csvtool::Application::UseCases::RunRowRandomization.new(stdin: input, stdout: output).call
18
+ def fixture_path(name)
19
+ File.expand_path("../../../fixtures/#{name}", __dir__)
20
+ end
30
21
 
31
- assert_includes output.string, "File not found: /tmp/does-not-exist.csv"
22
+ def build_session(file_path:, separator: ",", headers_present: true, seed: nil, output: :console, output_path: nil)
23
+ source = Csvtool::Domain::RowRandomizationSession::RandomizationSource.new(
24
+ path: file_path,
25
+ separator: separator,
26
+ headers_present: headers_present
27
+ )
28
+ options = Csvtool::Domain::RowRandomizationSession::RandomizationOptions.new(seed: seed)
29
+ session = Csvtool::Domain::RowRandomizationSession::RandomizationSession.start(source: source, options: options)
30
+
31
+ session.with_output_destination(
32
+ if output == :file
33
+ Csvtool::Domain::Shared::OutputDestination.file(path: output_path)
34
+ else
35
+ Csvtool::Domain::Shared::OutputDestination.console
36
+ end
37
+ )
32
38
  end
33
39
 
34
- def test_can_write_randomized_rows_to_file
35
- fixture = File.expand_path("../../../fixtures/sample_people.csv", __dir__)
36
- output = StringIO.new
40
+ def test_read_headers_returns_headers_when_enabled
41
+ use_case = Csvtool::Application::UseCases::RunRowRandomization.new
37
42
 
38
- Dir.mktmpdir do |dir|
39
- output_path = File.join(dir, "randomized.csv")
40
- input = StringIO.new("#{fixture}\n\n\n\n2\n#{output_path}\n")
43
+ result = use_case.read_headers(file_path: fixture_path("sample_people.csv"), col_sep: ",", headers_present: true)
41
44
 
42
- Csvtool::Application::UseCases::RunRowRandomization.new(stdin: input, stdout: output).call
43
-
44
- written = File.read(output_path).lines.map(&:strip)
45
- assert_equal "name,city", written.first
46
- assert_equal ["Alice,London", "Bob,Paris", "Cara,Berlin"].sort, written[1..].sort
47
- assert_includes output.string, "Wrote output to #{output_path}"
48
- end
45
+ assert result.ok?
46
+ assert_equal ["name", "city"], result.data[:headers]
49
47
  end
50
48
 
51
- def test_supports_tsv_separator
52
- fixture = File.expand_path("../../../fixtures/sample_people.tsv", __dir__)
53
- output = StringIO.new
54
- input = StringIO.new("#{fixture}\n2\n\n\n\n")
49
+ def test_read_headers_returns_nil_when_headers_disabled
50
+ use_case = Csvtool::Application::UseCases::RunRowRandomization.new
55
51
 
56
- Csvtool::Application::UseCases::RunRowRandomization.new(stdin: input, stdout: output).call
52
+ result = use_case.read_headers(file_path: fixture_path("sample_people_no_headers.csv"), col_sep: ",", headers_present: false)
57
53
 
58
- assert_includes output.string, "name\tcity"
59
- assert_includes output.string, "Alice\tLondon"
60
- assert_includes output.string, "Bob\tParis"
61
- assert_includes output.string, "Cara\tBerlin"
54
+ assert result.ok?
55
+ assert_nil result.data[:headers]
62
56
  end
63
57
 
64
- def test_supports_custom_separator
65
- fixture = File.expand_path("../../../fixtures/sample_people_colon.txt", __dir__)
66
- output = StringIO.new
67
- input = StringIO.new("#{fixture}\n5\n:\n\n\n\n")
58
+ def test_read_headers_fails_for_missing_file
59
+ use_case = Csvtool::Application::UseCases::RunRowRandomization.new
68
60
 
69
- Csvtool::Application::UseCases::RunRowRandomization.new(stdin: input, stdout: output).call
61
+ result = use_case.read_headers(file_path: "/tmp/not-present.csv", col_sep: ",", headers_present: true)
70
62
 
71
- assert_includes output.string, "name:city"
72
- assert_includes output.string, "Alice:London"
73
- assert_includes output.string, "Bob:Paris"
74
- assert_includes output.string, "Cara:Berlin"
63
+ refute result.ok?
64
+ assert_equal :file_not_found, result.error
75
65
  end
76
66
 
77
- def test_headerless_mode_randomizes_all_rows
78
- fixture = File.expand_path("../../../fixtures/sample_people_no_headers.csv", __dir__)
79
- output = StringIO.new
80
- input = StringIO.new("#{fixture}\n\nn\n\n\n")
67
+ def test_randomize_streams_rows_for_console_mode
68
+ use_case = Csvtool::Application::UseCases::RunRowRandomization.new
69
+ session = build_session(file_path: fixture_path("sample_people.csv"), seed: 123)
70
+ rows = []
81
71
 
82
- Csvtool::Application::UseCases::RunRowRandomization.new(stdin: input, stdout: output).call
72
+ result = use_case.randomize(session: session, headers: ["name", "city"], on_row: ->(fields) { rows << fields })
83
73
 
84
- refute_includes output.string, "name,city"
85
- assert_includes output.string, "Alice,London"
86
- assert_includes output.string, "Bob,Paris"
87
- assert_includes output.string, "Cara,Berlin"
74
+ assert result.ok?
75
+ assert_equal 3, rows.length
76
+ assert_equal [["Alice", "London"], ["Bob", "Paris"], ["Cara", "Berlin"]].sort, rows.sort
88
77
  end
89
78
 
90
- def test_same_seed_produces_same_output_order
91
- fixture = File.expand_path("../../../fixtures/sample_people_many.csv", __dir__)
92
- input_data = "#{fixture}\n\n\n123\n\n"
93
-
94
- out1 = StringIO.new
95
- out2 = StringIO.new
79
+ def test_randomize_writes_rows_to_file
80
+ use_case = Csvtool::Application::UseCases::RunRowRandomization.new
96
81
 
97
- Csvtool::Application::UseCases::RunRowRandomization.new(stdin: StringIO.new(input_data), stdout: out1).call
98
- Csvtool::Application::UseCases::RunRowRandomization.new(stdin: StringIO.new(input_data), stdout: out2).call
99
-
100
- rows1 = out1.string.lines.map(&:strip).select { |line| line.include?(",") && !line.start_with?("name,city") }
101
- rows2 = out2.string.lines.map(&:strip).select { |line| line.include?(",") && !line.start_with?("name,city") }
102
- assert_equal rows1, rows2
82
+ Dir.mktmpdir do |dir|
83
+ output_path = File.join(dir, "randomized.csv")
84
+ session = build_session(
85
+ file_path: fixture_path("sample_people.csv"),
86
+ seed: 123,
87
+ output: :file,
88
+ output_path: output_path
89
+ )
90
+
91
+ result = use_case.randomize(session: session, headers: ["name", "city"])
92
+
93
+ assert result.ok?
94
+ assert_equal output_path, result.data[:output_path]
95
+ lines = File.read(output_path).lines.map(&:strip)
96
+ assert_equal "name,city", lines.first
97
+ assert_equal ["Alice,London", "Bob,Paris", "Cara,Berlin"].sort, lines[1..].sort
98
+ end
103
99
  end
104
100
 
105
- def test_invalid_seed_shows_friendly_error
106
- fixture = File.expand_path("../../../fixtures/sample_people.csv", __dir__)
107
- output = StringIO.new
108
- input = StringIO.new("#{fixture}\n\n\nabc\n")
101
+ def test_same_seed_produces_stable_order
102
+ use_case = Csvtool::Application::UseCases::RunRowRandomization.new
103
+ session_1 = build_session(file_path: fixture_path("sample_people_many.csv"), seed: 123)
104
+ session_2 = build_session(file_path: fixture_path("sample_people_many.csv"), seed: 123)
105
+ rows_1 = []
106
+ rows_2 = []
109
107
 
110
- Csvtool::Application::UseCases::RunRowRandomization.new(stdin: input, stdout: output).call
108
+ result_1 = use_case.randomize(session: session_1, headers: ["name", "city"], on_row: ->(fields) { rows_1 << fields })
109
+ result_2 = use_case.randomize(session: session_2, headers: ["name", "city"], on_row: ->(fields) { rows_2 << fields })
111
110
 
112
- assert_includes output.string, "Seed must be an integer."
111
+ assert result_1.ok?
112
+ assert result_2.ok?
113
+ assert_equal rows_1, rows_2
113
114
  end
114
115
 
115
- def test_malformed_csv_shows_friendly_error
116
- fixture = File.expand_path("../../../fixtures/sample_people_bad_tail.csv", __dir__)
117
- output = StringIO.new
118
- input = StringIO.new("#{fixture}\n\n\n\n\n")
119
-
120
- Csvtool::Application::UseCases::RunRowRandomization.new(stdin: input, stdout: output).call
121
-
122
- assert_includes output.string, "Could not parse CSV file."
116
+ def test_randomize_returns_cannot_write_output_file_when_writer_fails
117
+ use_case = Csvtool::Application::UseCases::RunRowRandomization.new(
118
+ csv_randomized_row_file_writer: RaisingWriter.new
119
+ )
120
+ session = build_session(
121
+ file_path: fixture_path("sample_people.csv"),
122
+ seed: 123,
123
+ output: :file,
124
+ output_path: "/tmp/randomized.csv"
125
+ )
126
+
127
+ result = use_case.randomize(session: session, headers: ["name", "city"])
128
+
129
+ refute result.ok?
130
+ assert_equal :cannot_write_output_file, result.error
131
+ assert_equal "/tmp/randomized.csv", result.data[:path]
132
+ assert_equal Errno::ENOENT, result.data[:error_class]
123
133
  end
124
134
  end
@@ -11,7 +11,7 @@ class TestCli < Minitest::Test
11
11
 
12
12
  def test_menu_can_exit_cleanly
13
13
  output = StringIO.new
14
- status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("4\n"), stdout: output, stderr: StringIO.new)
14
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("5\n"), stdout: output, stderr: StringIO.new)
15
15
  assert_equal 0, status
16
16
  assert_includes output.string, "CSV Tool Menu"
17
17
  end
@@ -26,7 +26,7 @@ class TestCli < Minitest::Test
26
26
  "",
27
27
  "y",
28
28
  "",
29
- "4"
29
+ "5"
30
30
  ].join("\n") + "\n"
31
31
 
32
32
  output = StringIO.new
@@ -58,7 +58,7 @@ class TestCli < Minitest::Test
58
58
  "2",
59
59
  "3",
60
60
  "",
61
- "4"
61
+ "5"
62
62
  ].join("\n") + "\n"
63
63
 
64
64
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -79,7 +79,7 @@ class TestCli < Minitest::Test
79
79
  "0",
80
80
  "3",
81
81
  "",
82
- "4"
82
+ "5"
83
83
  ].join("\n") + "\n"
84
84
 
85
85
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -98,7 +98,7 @@ class TestCli < Minitest::Test
98
98
  "2",
99
99
  "3",
100
100
  "",
101
- "4"
101
+ "5"
102
102
  ].join("\n") + "\n"
103
103
 
104
104
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -119,7 +119,7 @@ class TestCli < Minitest::Test
119
119
  "2",
120
120
  "3",
121
121
  "",
122
- "4"
122
+ "5"
123
123
  ].join("\n") + "\n"
124
124
 
125
125
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -144,7 +144,7 @@ class TestCli < Minitest::Test
144
144
  "3",
145
145
  "2",
146
146
  output_path,
147
- "4"
147
+ "5"
148
148
  ].join("\n") + "\n"
149
149
 
150
150
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -164,7 +164,7 @@ class TestCli < Minitest::Test
164
164
  "1",
165
165
  "2",
166
166
  "",
167
- "4"
167
+ "5"
168
168
  ].join("\n") + "\n"
169
169
 
170
170
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -184,7 +184,7 @@ class TestCli < Minitest::Test
184
184
  "",
185
185
  "",
186
186
  "",
187
- "4"
187
+ "5"
188
188
  ].join("\n") + "\n"
189
189
 
190
190
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -209,7 +209,7 @@ class TestCli < Minitest::Test
209
209
  "",
210
210
  "2",
211
211
  output_path,
212
- "4"
212
+ "5"
213
213
  ].join("\n") + "\n"
214
214
 
215
215
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -231,7 +231,7 @@ class TestCli < Minitest::Test
231
231
  "",
232
232
  "",
233
233
  "",
234
- "4"
234
+ "5"
235
235
  ].join("\n") + "\n"
236
236
 
237
237
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -250,7 +250,7 @@ class TestCli < Minitest::Test
250
250
  "n",
251
251
  "",
252
252
  "",
253
- "4"
253
+ "5"
254
254
  ].join("\n") + "\n"
255
255
 
256
256
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -270,7 +270,7 @@ class TestCli < Minitest::Test
270
270
  "",
271
271
  "",
272
272
  "abc",
273
- "4"
273
+ "5"
274
274
  ].join("\n") + "\n"
275
275
 
276
276
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -280,6 +280,120 @@ class TestCli < Minitest::Test
280
280
  assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
281
281
  end
282
282
 
283
+ def test_dedupe_workflow_shell_prompts_and_returns_to_menu
284
+ output = StringIO.new
285
+ input = [
286
+ "4",
287
+ fixture_path("dedupe_source.csv"),
288
+ "",
289
+ "",
290
+ fixture_path("dedupe_reference.csv"),
291
+ "",
292
+ "",
293
+ "customer_id",
294
+ "external_id",
295
+ "",
296
+ "",
297
+ "",
298
+ "5"
299
+ ].join("\n") + "\n"
300
+
301
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
302
+
303
+ assert_equal 0, status
304
+ assert_includes output.string, "Reference CSV file path:"
305
+ assert_includes output.string, "Source key column name:"
306
+ assert_includes output.string, "Reference key column name:"
307
+ assert_includes output.string, "customer_id,name"
308
+ assert_includes output.string, "1,Alice"
309
+ assert_includes output.string, "3,Cara"
310
+ assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
311
+ end
312
+
313
+ def test_dedupe_workflow_can_write_to_file
314
+ output = StringIO.new
315
+
316
+ Dir.mktmpdir do |dir|
317
+ output_path = File.join(dir, "deduped.csv")
318
+ input = [
319
+ "4",
320
+ fixture_path("dedupe_source.csv"),
321
+ "",
322
+ "",
323
+ fixture_path("dedupe_reference.csv"),
324
+ "",
325
+ "",
326
+ "customer_id",
327
+ "external_id",
328
+ "",
329
+ "",
330
+ "2",
331
+ output_path,
332
+ "5"
333
+ ].join("\n") + "\n"
334
+
335
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
336
+
337
+ assert_equal 0, status
338
+ assert_includes output.string, "Wrote output to #{output_path}"
339
+ assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
340
+ assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
341
+ end
342
+ end
343
+
344
+ def test_dedupe_workflow_supports_tsv_separators
345
+ output = StringIO.new
346
+ input = [
347
+ "4",
348
+ fixture_path("dedupe_source.tsv"),
349
+ "2",
350
+ "",
351
+ fixture_path("dedupe_reference.tsv"),
352
+ "2",
353
+ "",
354
+ "customer_id",
355
+ "external_id",
356
+ "",
357
+ "",
358
+ "",
359
+ "5"
360
+ ].join("\n") + "\n"
361
+
362
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
363
+
364
+ assert_equal 0, status
365
+ assert_includes output.string, "customer_id\tname"
366
+ assert_includes output.string, "1\tAlice"
367
+ assert_includes output.string, "3\tCara"
368
+ end
369
+
370
+ def test_dedupe_workflow_headerless_mode_supports_index
371
+ output = StringIO.new
372
+ input = [
373
+ "4",
374
+ fixture_path("dedupe_source_no_headers.csv"),
375
+ "",
376
+ "n",
377
+ fixture_path("dedupe_reference_no_headers.csv"),
378
+ "",
379
+ "n",
380
+ "1",
381
+ "1",
382
+ "",
383
+ "",
384
+ "",
385
+ "5"
386
+ ].join("\n") + "\n"
387
+
388
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
389
+
390
+ assert_equal 0, status
391
+ refute_includes output.string, "customer_id,name"
392
+ assert_includes output.string, "1,Alice"
393
+ assert_includes output.string, "3,Cara"
394
+ assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
395
+ end
396
+
283
397
  def test_end_to_end_file_output_writes_expected_csv
284
398
  output = StringIO.new
285
399
  output_path = nil
@@ -296,7 +410,7 @@ class TestCli < Minitest::Test
296
410
  "y",
297
411
  "2",
298
412
  output_path,
299
- "4"
413
+ "5"
300
414
  ].join("\n") + "\n"
301
415
 
302
416
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -316,7 +430,7 @@ class TestCli < Minitest::Test
316
430
  "1",
317
431
  "",
318
432
  "n",
319
- "4"
433
+ "5"
320
434
  ].join("\n") + "\n"
321
435
 
322
436
  output = StringIO.new
@@ -352,7 +466,7 @@ class TestCli < Minitest::Test
352
466
  "y",
353
467
  "2",
354
468
  "/tmp/not-a-dir/out.csv",
355
- "4"
469
+ "5"
356
470
  ].join("\n") + "\n"
357
471
 
358
472
  output = StringIO.new
@@ -16,7 +16,7 @@ class CliUnitTest < Minitest::Test
16
16
  end
17
17
 
18
18
  def test_menu_command_can_exit_zero
19
- status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("4\n"), stdout: StringIO.new, stderr: StringIO.new)
19
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("5\n"), stdout: StringIO.new, stderr: StringIO.new)
20
20
  assert_equal 0, status
21
21
  end
22
22
 
@@ -28,7 +28,7 @@ class CliUnitTest < Minitest::Test
28
28
  def test_menu_routes_to_row_range_shell
29
29
  stdout = StringIO.new
30
30
  fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
31
- input = ["2", fixture, "", "2", "3", "", "4"].join("\n") + "\n"
31
+ input = ["2", fixture, "", "2", "3", "", "5"].join("\n") + "\n"
32
32
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
33
33
  assert_equal 0, status
34
34
  assert_includes stdout.string, "name,city"
@@ -39,7 +39,7 @@ class CliUnitTest < Minitest::Test
39
39
  def test_menu_routes_to_randomize_rows_shell
40
40
  stdout = StringIO.new
41
41
  fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
42
- input = ["3", fixture, "", "", "", "", "4"].join("\n") + "\n"
42
+ input = ["3", fixture, "", "", "", "", "5"].join("\n") + "\n"
43
43
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
44
44
  assert_equal 0, status
45
45
  assert_includes stdout.string, "name,city"
@@ -47,4 +47,17 @@ class CliUnitTest < Minitest::Test
47
47
  assert_includes stdout.string, "Bob,Paris"
48
48
  assert_includes stdout.string, "Cara,Berlin"
49
49
  end
50
+
51
+ def test_menu_routes_to_dedupe_shell
52
+ stdout = StringIO.new
53
+ source_fixture = File.expand_path("../fixtures/dedupe_source.csv", __dir__)
54
+ reference_fixture = File.expand_path("../fixtures/dedupe_reference.csv", __dir__)
55
+ input = ["4", source_fixture, "", "", reference_fixture, "", "", "customer_id", "external_id", "", "", "", "5"].join("\n") + "\n"
56
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
57
+ assert_equal 0, status
58
+ assert_includes stdout.string, "customer_id,name"
59
+ assert_includes stdout.string, "1,Alice"
60
+ assert_includes stdout.string, "3,Cara"
61
+ assert_includes stdout.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
62
+ end
50
63
  end
@@ -8,7 +8,7 @@ require "csvtool/domain/column_session/column_selection"
8
8
  require "csvtool/domain/column_session/extraction_options"
9
9
  require "csvtool/domain/column_session/preview"
10
10
  require "csvtool/domain/column_session/extraction_value"
11
- require "csvtool/domain/column_session/output_destination"
11
+ require "csvtool/domain/shared/output_destination"
12
12
 
13
13
  class ColumnSessionTest < Minitest::Test
14
14
  def test_state_transitions
@@ -25,7 +25,7 @@ class ColumnSessionTest < Minitest::Test
25
25
  values: [Csvtool::Domain::ColumnSession::ExtractionValue.new("Alice")]
26
26
  )
27
27
  session = session.with_preview(preview).confirm!.with_output_destination(
28
- Csvtool::Domain::ColumnSession::OutputDestination.console
28
+ Csvtool::Domain::Shared::OutputDestination.console
29
29
  )
30
30
 
31
31
  assert_equal true, session.confirmed?
@@ -11,4 +11,14 @@ class CsvSourceTest < Minitest::Test
11
11
  assert_equal "/tmp/a.csv", source.path
12
12
  assert_equal separator, source.separator
13
13
  end
14
+
15
+ def test_rejects_empty_path
16
+ separator = Csvtool::Domain::ColumnSession::Separator.new(",")
17
+
18
+ error = assert_raises(ArgumentError) do
19
+ Csvtool::Domain::ColumnSession::CsvSource.new(path: "", separator: separator)
20
+ end
21
+
22
+ assert_equal "path cannot be empty", error.message
23
+ end
14
24
  end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
5
+
6
+ class CrossCsvDedupeColumnSelectorTest < Minitest::Test
7
+ def test_builds_header_selector_from_input
8
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "customer_id")
9
+
10
+ assert_equal "customer_id", selector.value
11
+ assert_equal true, selector.headers_present?
12
+ end
13
+
14
+ def test_builds_index_selector_from_input
15
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "2")
16
+
17
+ assert_equal 2, selector.value
18
+ assert_equal true, selector.index?
19
+ end
20
+
21
+ def test_rejects_invalid_index_input
22
+ error = assert_raises(ArgumentError) do
23
+ Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "0")
24
+ end
25
+
26
+ assert_equal "column index must be a positive integer", error.message
27
+ end
28
+
29
+ def test_extracts_from_headered_row
30
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "customer_id")
31
+ row = { "customer_id" => "42" }
32
+
33
+ assert_equal "42", selector.extract_from(row)
34
+ end
35
+
36
+ def test_extracts_from_headerless_row_by_index
37
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "2")
38
+ row = ["a", "b", "c"]
39
+
40
+ assert_equal "b", selector.extract_from(row)
41
+ end
42
+ end