csvops 0.3.0.alpha → 0.5.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +69 -149
  3. data/docs/architecture.md +396 -0
  4. data/docs/release-v0.4.0-alpha.md +87 -0
  5. data/docs/release-v0.5.0-alpha.md +89 -0
  6. data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +96 -0
  7. data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
  8. data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
  9. data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
  10. data/lib/csvtool/cli.rb +11 -7
  11. data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
  12. data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
  13. data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
  14. data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
  15. data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
  16. data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +1 -0
  17. data/lib/csvtool/domain/row_session/row_source.rb +3 -0
  18. data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
  19. data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
  20. data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
  21. data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
  22. data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
  23. data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
  24. data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
  25. data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
  26. data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
  27. data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
  28. data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
  29. data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
  30. data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
  31. data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
  32. data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
  33. data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
  34. data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
  35. data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
  36. data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
  37. data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
  38. data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
  39. data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +86 -0
  40. data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
  41. data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
  42. data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
  43. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
  44. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
  45. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
  46. data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
  47. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
  48. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
  49. data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
  50. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
  51. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
  52. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
  53. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
  54. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
  55. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
  56. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
  57. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
  58. data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
  59. data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
  60. data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
  61. data/lib/csvtool/version.rb +1 -1
  62. data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
  63. data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +141 -0
  64. data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
  65. data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
  66. data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
  67. data/test/csvtool/cli_test.rb +130 -16
  68. data/test/csvtool/cli_unit_test.rb +16 -3
  69. data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
  70. data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
  71. data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
  72. data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
  73. data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
  74. data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
  75. data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
  76. data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +2 -2
  77. data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +15 -1
  78. data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
  79. data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
  80. data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
  81. data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
  82. data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
  83. data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
  84. data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
  85. data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
  86. data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
  87. data/test/csvtool/interface/cli/menu_loop_test.rb +50 -13
  88. data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
  89. data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
  90. data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
  91. data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
  92. data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
  93. data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
  94. data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
  95. data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
  96. data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
  97. data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
  98. data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
  99. data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
  100. data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
  101. data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
  102. data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
  103. data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
  104. data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
  105. data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
  106. data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
  107. data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
  108. data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
  109. data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
  110. data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
  111. data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
  112. data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
  113. data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
  114. data/test/fixtures/dedupe_reference.csv +3 -0
  115. data/test/fixtures/dedupe_reference.tsv +3 -0
  116. data/test/fixtures/dedupe_reference_all.csv +5 -0
  117. data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
  118. data/test/fixtures/dedupe_reference_none.csv +2 -0
  119. data/test/fixtures/dedupe_reference_normalization.csv +3 -0
  120. data/test/fixtures/dedupe_source.csv +6 -0
  121. data/test/fixtures/dedupe_source.tsv +6 -0
  122. data/test/fixtures/dedupe_source_no_headers.csv +5 -0
  123. data/test/fixtures/dedupe_source_normalization.csv +4 -0
  124. metadata +93 -8
  125. data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +0 -31
  126. data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
  127. data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
  128. data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +0 -21
  129. data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
@@ -0,0 +1,246 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow"
5
+ require "tmpdir"
6
+
7
+ class RunCrossCsvDedupeWorkflowTest < Minitest::Test
8
+ def fixture_path(name)
9
+ File.expand_path("../../../../fixtures/#{name}", __dir__)
10
+ end
11
+
12
+ def test_dedupes_source_rows_by_reference_column
13
+ output = StringIO.new
14
+ input = [
15
+ fixture_path("dedupe_source.csv"),
16
+ "",
17
+ "",
18
+ fixture_path("dedupe_reference.csv"),
19
+ "",
20
+ "",
21
+ "customer_id",
22
+ "external_id",
23
+ "",
24
+ "",
25
+ ""
26
+ ].join("\n") + "\n"
27
+
28
+ Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
29
+ .new(stdin: StringIO.new(input), stdout: output)
30
+ .call
31
+
32
+ assert_includes output.string, "CSV file path:"
33
+ assert_includes output.string, "Reference CSV file path:"
34
+ assert_includes output.string, "Source key column name:"
35
+ assert_includes output.string, "Reference key column name:"
36
+ assert_includes output.string, "customer_id,name"
37
+ assert_includes output.string, "1,Alice"
38
+ assert_includes output.string, "3,Cara"
39
+ refute_includes output.string, "2,Bob"
40
+ refute_includes output.string, "4,Dan"
41
+ assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
42
+ end
43
+
44
+ def test_can_write_deduped_rows_to_file
45
+ output = StringIO.new
46
+
47
+ Dir.mktmpdir do |dir|
48
+ output_path = File.join(dir, "deduped.csv")
49
+ input = [
50
+ fixture_path("dedupe_source.csv"),
51
+ "",
52
+ "",
53
+ fixture_path("dedupe_reference.csv"),
54
+ "",
55
+ "",
56
+ "customer_id",
57
+ "external_id",
58
+ "",
59
+ "",
60
+ "2",
61
+ output_path
62
+ ].join("\n") + "\n"
63
+
64
+ Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
65
+ .new(stdin: StringIO.new(input), stdout: output)
66
+ .call
67
+
68
+ assert_includes output.string, "Wrote output to #{output_path}"
69
+ assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
70
+ assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
71
+ end
72
+ end
73
+
74
+ def test_supports_tsv_separators
75
+ output = StringIO.new
76
+ input = [
77
+ fixture_path("dedupe_source.tsv"),
78
+ "2",
79
+ "",
80
+ fixture_path("dedupe_reference.tsv"),
81
+ "2",
82
+ "",
83
+ "customer_id",
84
+ "external_id",
85
+ "",
86
+ "",
87
+ ""
88
+ ].join("\n") + "\n"
89
+
90
+ Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
91
+ .new(stdin: StringIO.new(input), stdout: output)
92
+ .call
93
+
94
+ assert_includes output.string, "customer_id\tname"
95
+ assert_includes output.string, "1\tAlice"
96
+ assert_includes output.string, "3\tCara"
97
+ end
98
+
99
+ def test_headerless_mode_supports_column_index
100
+ output = StringIO.new
101
+ input = [
102
+ fixture_path("dedupe_source_no_headers.csv"),
103
+ "",
104
+ "n",
105
+ fixture_path("dedupe_reference_no_headers.csv"),
106
+ "",
107
+ "n",
108
+ "1",
109
+ "1",
110
+ "",
111
+ "",
112
+ ""
113
+ ].join("\n") + "\n"
114
+
115
+ Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
116
+ .new(stdin: StringIO.new(input), stdout: output)
117
+ .call
118
+
119
+ refute_includes output.string, "customer_id,name"
120
+ assert_includes output.string, "1,Alice"
121
+ assert_includes output.string, "3,Cara"
122
+ assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
123
+ end
124
+
125
+ def test_reports_column_not_found_when_missing
126
+ output = StringIO.new
127
+ input = [
128
+ fixture_path("dedupe_source.csv"),
129
+ "",
130
+ "",
131
+ fixture_path("dedupe_reference.csv"),
132
+ "",
133
+ "",
134
+ "missing",
135
+ "external_id",
136
+ "",
137
+ ""
138
+ ].join("\n") + "\n"
139
+
140
+ Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
141
+ .new(stdin: StringIO.new(input), stdout: output)
142
+ .call
143
+
144
+ assert_includes output.string, "Column not found."
145
+ end
146
+
147
+ def test_reports_when_no_rows_were_removed
148
+ output = StringIO.new
149
+ input = [
150
+ fixture_path("dedupe_source.csv"),
151
+ "",
152
+ "",
153
+ fixture_path("dedupe_reference_none.csv"),
154
+ "",
155
+ "",
156
+ "customer_id",
157
+ "external_id",
158
+ "",
159
+ "",
160
+ ""
161
+ ].join("\n") + "\n"
162
+
163
+ Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
164
+ .new(stdin: StringIO.new(input), stdout: output)
165
+ .call
166
+
167
+ assert_includes output.string, "Summary: source_rows=5 removed_rows=0 kept_rows=5"
168
+ assert_includes output.string, "No rows removed; no matching keys found."
169
+ end
170
+
171
+ def test_reports_when_all_rows_were_removed
172
+ output = StringIO.new
173
+ input = [
174
+ fixture_path("dedupe_source.csv"),
175
+ "",
176
+ "",
177
+ fixture_path("dedupe_reference_all.csv"),
178
+ "",
179
+ "",
180
+ "customer_id",
181
+ "external_id",
182
+ "",
183
+ "",
184
+ ""
185
+ ].join("\n") + "\n"
186
+
187
+ Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
188
+ .new(stdin: StringIO.new(input), stdout: output)
189
+ .call
190
+
191
+ assert_includes output.string, "Summary: source_rows=5 removed_rows=5 kept_rows=0"
192
+ assert_includes output.string, "All source rows were removed by dedupe."
193
+ end
194
+
195
+ def test_normalization_trim_on_and_case_insensitive_on_matches_equivalent_keys
196
+ output = StringIO.new
197
+ input = [
198
+ fixture_path("dedupe_source_normalization.csv"),
199
+ "",
200
+ "",
201
+ fixture_path("dedupe_reference_normalization.csv"),
202
+ "",
203
+ "",
204
+ "customer_id",
205
+ "external_id",
206
+ "",
207
+ "y",
208
+ ""
209
+ ].join("\n") + "\n"
210
+
211
+ Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
212
+ .new(stdin: StringIO.new(input), stdout: output)
213
+ .call
214
+
215
+ refute_includes output.string, " A1 ,Alice"
216
+ refute_includes output.string, "c3,Cara"
217
+ assert_includes output.string, "B2,Bob"
218
+ assert_includes output.string, "Summary: source_rows=3 removed_rows=2 kept_rows=1"
219
+ end
220
+
221
+ def test_normalization_disabled_preserves_exact_match_behavior
222
+ output = StringIO.new
223
+ input = [
224
+ fixture_path("dedupe_source_normalization.csv"),
225
+ "",
226
+ "",
227
+ fixture_path("dedupe_reference_normalization.csv"),
228
+ "",
229
+ "",
230
+ "customer_id",
231
+ "external_id",
232
+ "n",
233
+ "n",
234
+ ""
235
+ ].join("\n") + "\n"
236
+
237
+ Csvtool::Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow
238
+ .new(stdin: StringIO.new(input), stdout: output)
239
+ .call
240
+
241
+ assert_includes output.string, " A1 ,Alice"
242
+ assert_includes output.string, "B2,Bob"
243
+ assert_includes output.string, "c3,Cara"
244
+ assert_includes output.string, "Summary: source_rows=3 removed_rows=0 kept_rows=3"
245
+ end
246
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/run_extraction_workflow"
5
+ require "tmpdir"
6
+
7
+ class RunExtractionWorkflowTest < Minitest::Test
8
+ def fixture_path(name)
9
+ File.expand_path("../../../../fixtures/#{name}", __dir__)
10
+ end
11
+
12
+ def test_missing_file_path_reports_error
13
+ out = StringIO.new
14
+ workflow = Csvtool::Interface::CLI::Workflows::RunExtractionWorkflow.new(
15
+ stdin: StringIO.new("/tmp/not-present.csv\n\n"),
16
+ stdout: out
17
+ )
18
+
19
+ workflow.call
20
+
21
+ assert_includes out.string, "File not found: /tmp/not-present.csv"
22
+ end
23
+
24
+ def test_workflow_can_run_console_happy_path
25
+ out = StringIO.new
26
+ fixture = fixture_path("sample_people.csv")
27
+ input = ["#{fixture}", "1", "", "1", "", "y", ""].join("\n") + "\n"
28
+
29
+ Csvtool::Interface::CLI::Workflows::RunExtractionWorkflow.new(
30
+ stdin: StringIO.new(input),
31
+ stdout: out
32
+ ).call
33
+
34
+ assert_includes out.string, "Alice"
35
+ assert_includes out.string, "Bob"
36
+ assert_includes out.string, "Cara"
37
+ end
38
+
39
+ def test_workflow_can_write_output_file
40
+ out = StringIO.new
41
+
42
+ Dir.mktmpdir do |dir|
43
+ output_path = File.join(dir, "names.csv")
44
+ fixture = fixture_path("sample_people.csv")
45
+ input = ["#{fixture}", "1", "", "1", "", "y", "2", output_path].join("\n") + "\n"
46
+
47
+ Csvtool::Interface::CLI::Workflows::RunExtractionWorkflow.new(
48
+ stdin: StringIO.new(input),
49
+ stdout: out
50
+ ).call
51
+
52
+ assert_includes out.string, "Wrote output to #{output_path}"
53
+ assert_equal "name\nAlice\nBob\nCara\n", File.read(output_path)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/run_row_extraction_workflow"
5
+ require "tmpdir"
6
+
7
+ class RunRowExtractionWorkflowTest < Minitest::Test
8
+ def fixture_path(name)
9
+ File.expand_path("../../../../fixtures/#{name}", __dir__)
10
+ end
11
+
12
+ def test_missing_file_path_reports_error
13
+ out = StringIO.new
14
+ workflow = Csvtool::Interface::CLI::Workflows::RunRowExtractionWorkflow.new(
15
+ stdin: StringIO.new("/tmp/not-present.csv\n\n"),
16
+ stdout: out
17
+ )
18
+
19
+ workflow.call
20
+
21
+ assert_includes out.string, "File not found: /tmp/not-present.csv"
22
+ end
23
+
24
+ def test_workflow_can_run_console_happy_path
25
+ out = StringIO.new
26
+ fixture = fixture_path("sample_people.csv")
27
+ input = [fixture, "", "2", "3", ""].join("\n") + "\n"
28
+
29
+ Csvtool::Interface::CLI::Workflows::RunRowExtractionWorkflow.new(
30
+ stdin: StringIO.new(input),
31
+ stdout: out
32
+ ).call
33
+
34
+ assert_includes out.string, "name,city"
35
+ assert_includes out.string, "Bob,Paris"
36
+ assert_includes out.string, "Cara,Berlin"
37
+ refute_includes out.string, "Alice,London"
38
+ end
39
+
40
+ def test_workflow_can_write_output_file
41
+ out = StringIO.new
42
+
43
+ Dir.mktmpdir do |dir|
44
+ output_path = File.join(dir, "rows.csv")
45
+ fixture = fixture_path("sample_people.csv")
46
+ input = [fixture, "", "2", "3", "2", output_path].join("\n") + "\n"
47
+
48
+ Csvtool::Interface::CLI::Workflows::RunRowExtractionWorkflow.new(
49
+ stdin: StringIO.new(input),
50
+ stdout: out
51
+ ).call
52
+
53
+ assert_includes out.string, "Wrote output to #{output_path}"
54
+ assert_equal "name,city\nBob,Paris\nCara,Berlin\n", File.read(output_path)
55
+ end
56
+ end
57
+
58
+ def test_rejects_non_numeric_start_row
59
+ out = StringIO.new
60
+ fixture = fixture_path("sample_people.csv")
61
+ input = [fixture, "", "abc", "3", ""].join("\n") + "\n"
62
+
63
+ Csvtool::Interface::CLI::Workflows::RunRowExtractionWorkflow.new(
64
+ stdin: StringIO.new(input),
65
+ stdout: out
66
+ ).call
67
+
68
+ assert_includes out.string, "Start row must be a positive integer."
69
+ end
70
+
71
+ def test_reports_out_of_bounds_range
72
+ out = StringIO.new
73
+ fixture = fixture_path("sample_people.csv")
74
+ input = [fixture, "", "10", "12", ""].join("\n") + "\n"
75
+
76
+ Csvtool::Interface::CLI::Workflows::RunRowExtractionWorkflow.new(
77
+ stdin: StringIO.new(input),
78
+ stdout: out
79
+ ).call
80
+
81
+ assert_includes out.string, "Row range is out of bounds. File has 3 data rows."
82
+ end
83
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/run_row_randomization_workflow"
5
+ require "tmpdir"
6
+
7
+ class RunRowRandomizationWorkflowTest < Minitest::Test
8
+ def fixture_path(name)
9
+ File.expand_path("../../../../fixtures/#{name}", __dir__)
10
+ end
11
+
12
+ def test_missing_file_shows_friendly_error
13
+ output = StringIO.new
14
+ input = StringIO.new("/tmp/does-not-exist.csv\n\n")
15
+
16
+ Csvtool::Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: input, stdout: output).call
17
+
18
+ assert_includes output.string, "File not found: /tmp/does-not-exist.csv"
19
+ end
20
+
21
+ def test_workflow_prints_header_then_all_randomized_rows
22
+ output = StringIO.new
23
+ input = StringIO.new([fixture_path("sample_people.csv"), "", "", "", ""].join("\n") + "\n")
24
+
25
+ Csvtool::Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: input, stdout: output).call
26
+
27
+ assert_includes output.string, "name,city"
28
+ assert_includes output.string, "Alice,London"
29
+ assert_includes output.string, "Bob,Paris"
30
+ assert_includes output.string, "Cara,Berlin"
31
+ end
32
+
33
+ def test_workflow_can_write_randomized_rows_to_file
34
+ output = StringIO.new
35
+
36
+ Dir.mktmpdir do |dir|
37
+ output_path = File.join(dir, "randomized.csv")
38
+ input = StringIO.new([fixture_path("sample_people.csv"), "", "", "", "2", output_path].join("\n") + "\n")
39
+
40
+ Csvtool::Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: input, stdout: output).call
41
+
42
+ written = File.read(output_path).lines.map(&:strip)
43
+ assert_equal "name,city", written.first
44
+ assert_equal ["Alice,London", "Bob,Paris", "Cara,Berlin"].sort, written[1..].sort
45
+ assert_includes output.string, "Wrote output to #{output_path}"
46
+ end
47
+ end
48
+
49
+ def test_workflow_supports_headerless_mode
50
+ output = StringIO.new
51
+ input = StringIO.new([fixture_path("sample_people_no_headers.csv"), "", "n", "", ""].join("\n") + "\n")
52
+
53
+ Csvtool::Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: input, stdout: output).call
54
+
55
+ refute_includes output.string, "name,city"
56
+ assert_includes output.string, "Alice,London"
57
+ assert_includes output.string, "Bob,Paris"
58
+ assert_includes output.string, "Cara,Berlin"
59
+ end
60
+
61
+ def test_invalid_seed_shows_friendly_error
62
+ output = StringIO.new
63
+ input = StringIO.new([fixture_path("sample_people.csv"), "", "", "abc"].join("\n") + "\n")
64
+
65
+ Csvtool::Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: input, stdout: output).call
66
+
67
+ assert_includes output.string, "Seed must be an integer."
68
+ end
69
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step"
5
+ require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
6
+
7
+ class CrossCsvDedupeCollectOptionsStepTest < Minitest::Test
8
+ class FakeErrors
9
+ attr_reader :column_not_found_called
10
+
11
+ def column_not_found
12
+ @column_not_found_called = true
13
+ end
14
+ end
15
+
16
+ def test_halts_when_source_selector_invalid
17
+ selector_prompt = Object.new
18
+ yes_no_prompt = Object.new
19
+ output_destination_prompt = Object.new
20
+ session_builder = Object.new
21
+ mapper = Object.new
22
+ errors = FakeErrors.new
23
+
24
+ def selector_prompt.call(label:, headers_present:) = nil
25
+
26
+ step = Csvtool::Interface::CLI::Workflows::Steps::CrossCsvDedupe::CollectOptionsStep.new(
27
+ selector_prompt: selector_prompt,
28
+ yes_no_prompt: yes_no_prompt,
29
+ output_destination_prompt: output_destination_prompt,
30
+ errors: errors
31
+ )
32
+
33
+ source = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(path: "/tmp/a.csv", separator: ",", headers_present: true)
34
+ reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(path: "/tmp/b.csv", separator: ",", headers_present: true)
35
+
36
+ result = step.call(source: source, reference: reference, session_builder: session_builder, output_destination_mapper: mapper)
37
+
38
+ assert_equal :halt, result
39
+ assert_equal true, errors.column_not_found_called
40
+ end
41
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step"
5
+
6
+ class ExtractionCollectInputsStepTest < Minitest::Test
7
+ Result = Struct.new(:ok, :data) do
8
+ def ok? = ok
9
+ end
10
+
11
+ class FakeUseCase
12
+ def initialize(result)
13
+ @result = result
14
+ end
15
+
16
+ def read_headers(file_path:, col_sep:)
17
+ @result
18
+ end
19
+ end
20
+
21
+ def test_halts_when_separator_missing
22
+ file_prompt = Object.new
23
+ separator_prompt = Object.new
24
+ selector_prompt = Object.new
25
+ skip_prompt = Object.new
26
+ def file_prompt.call = "/tmp/data.csv"
27
+ def separator_prompt.call = nil
28
+
29
+ step = Csvtool::Interface::CLI::Workflows::Steps::Extraction::CollectInputsStep.new(
30
+ file_path_prompt: file_prompt,
31
+ separator_prompt: separator_prompt,
32
+ column_selector_prompt: selector_prompt,
33
+ skip_blanks_prompt: skip_prompt
34
+ )
35
+
36
+ assert_equal :halt, step.call(
37
+ use_case: FakeUseCase.new(Result.new(true, { headers: [] })),
38
+ session_builder: Object.new,
39
+ handle_error: ->(_r) {}
40
+ )
41
+ end
42
+
43
+ def test_halts_when_header_read_fails
44
+ file_prompt = Object.new
45
+ separator_prompt = Object.new
46
+ selector_prompt = Object.new
47
+ skip_prompt = Object.new
48
+ builder = Object.new
49
+ handled = []
50
+ def file_prompt.call = "/tmp/data.csv"
51
+ def separator_prompt.call = ","
52
+
53
+ step = Csvtool::Interface::CLI::Workflows::Steps::Extraction::CollectInputsStep.new(
54
+ file_path_prompt: file_prompt,
55
+ separator_prompt: separator_prompt,
56
+ column_selector_prompt: selector_prompt,
57
+ skip_blanks_prompt: skip_prompt
58
+ )
59
+
60
+ fail_result = Result.new(false, {})
61
+ result = step.call(use_case: FakeUseCase.new(fail_result), session_builder: builder, handle_error: ->(r) { handled << r })
62
+
63
+ assert_equal :halt, result
64
+ assert_equal [fail_result], handled
65
+ end
66
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step"
5
+
6
+ class CollectSourceStepTest < Minitest::Test
7
+ def test_collects_file_and_separator
8
+ file_prompt = Object.new
9
+ separator_prompt = Object.new
10
+ def file_prompt.call = "/tmp/data.csv"
11
+ def separator_prompt.call = ","
12
+
13
+ step = Csvtool::Interface::CLI::Workflows::Steps::RowExtraction::CollectSourceStep.new(
14
+ file_path_prompt: file_prompt,
15
+ separator_prompt: separator_prompt
16
+ )
17
+ context = {}
18
+
19
+ result = step.call(context)
20
+
21
+ assert_nil result
22
+ assert_equal "/tmp/data.csv", context[:file_path]
23
+ assert_equal ",", context[:col_sep]
24
+ end
25
+
26
+ def test_halts_when_separator_missing
27
+ file_prompt = Object.new
28
+ separator_prompt = Object.new
29
+ def file_prompt.call = "/tmp/data.csv"
30
+ def separator_prompt.call = nil
31
+
32
+ step = Csvtool::Interface::CLI::Workflows::Steps::RowExtraction::CollectSourceStep.new(
33
+ file_path_prompt: file_prompt,
34
+ separator_prompt: separator_prompt
35
+ )
36
+
37
+ assert_equal :halt, step.call({})
38
+ end
39
+ end
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../../../../test_helper"
4
+ require "csvtool/interface/cli/workflows/steps/row_extraction/execute_step"
5
+
6
+ class ExecuteStepTest < Minitest::Test
7
+ Result = Struct.new(:ok, :data) do
8
+ def ok? = ok
9
+ end
10
+
11
+ class FakeUseCase
12
+ def initialize(result)
13
+ @result = result
14
+ end
15
+
16
+ def extract(session:, headers:, on_row:)
17
+ @called = true
18
+ on_row.call(["Bob", "Paris"]) if @result.ok?
19
+ @result
20
+ end
21
+
22
+ attr_reader :called
23
+ end
24
+
25
+ class FakePresenter
26
+ attr_reader :rows, :written
27
+
28
+ def initialize(stdout:, headers:, col_sep:)
29
+ @rows = []
30
+ @written = nil
31
+ end
32
+
33
+ def print_row(fields)
34
+ @rows << fields
35
+ end
36
+
37
+ def print_file_written(path)
38
+ @written = path
39
+ end
40
+ end
41
+
42
+ class FakeErrors
43
+ attr_reader :out_of_bounds
44
+
45
+ def row_range_out_of_bounds(count)
46
+ @out_of_bounds = count
47
+ end
48
+ end
49
+
50
+ def test_prints_rows_and_reports_out_of_bounds
51
+ errors = FakeErrors.new
52
+ step = Csvtool::Interface::CLI::Workflows::Steps::RowExtraction::ExecuteStep.new(
53
+ stdout: StringIO.new,
54
+ errors: errors,
55
+ presenter_class: FakePresenter
56
+ )
57
+ use_case = FakeUseCase.new(Result.new(true, { matched: false, row_count: 3, wrote_rows: false }))
58
+ context = {
59
+ session: Object.new,
60
+ headers: ["name", "city"],
61
+ use_case: use_case,
62
+ handle_error: ->(_r) { raise "unexpected" }
63
+ }
64
+
65
+ result = step.call(context)
66
+
67
+ assert_nil result
68
+ assert_equal 3, errors.out_of_bounds
69
+ end
70
+
71
+ def test_halts_on_use_case_failure
72
+ handled = []
73
+ step = Csvtool::Interface::CLI::Workflows::Steps::RowExtraction::ExecuteStep.new(
74
+ stdout: StringIO.new,
75
+ errors: FakeErrors.new,
76
+ presenter_class: FakePresenter
77
+ )
78
+ fail_result = Result.new(false, {})
79
+ use_case = FakeUseCase.new(fail_result)
80
+
81
+ result = step.call(
82
+ session: Object.new,
83
+ headers: ["name", "city"],
84
+ use_case: use_case,
85
+ handle_error: ->(r) { handled << r }
86
+ )
87
+
88
+ assert_equal :halt, result
89
+ assert_equal [fail_result], handled
90
+ end
91
+ end