csvops 0.2.0.alpha → 0.4.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +56 -108
  3. data/docs/architecture.md +266 -0
  4. data/docs/release-v0.3.0-alpha.md +74 -0
  5. data/docs/release-v0.4.0-alpha.md +87 -0
  6. data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +93 -0
  7. data/lib/csvtool/application/use_cases/run_extraction.rb +3 -3
  8. data/lib/csvtool/application/use_cases/run_row_extraction.rb +3 -3
  9. data/lib/csvtool/application/use_cases/run_row_randomization.rb +105 -0
  10. data/lib/csvtool/cli.rb +9 -1
  11. data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
  12. data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
  13. data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
  14. data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
  15. data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
  16. data/lib/csvtool/domain/row_randomization_session/randomization_options.rb +17 -0
  17. data/lib/csvtool/domain/row_randomization_session/randomization_session.rb +25 -0
  18. data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +24 -0
  19. data/lib/csvtool/domain/row_session/row_source.rb +3 -0
  20. data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
  21. data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
  22. data/lib/csvtool/infrastructure/csv/row_randomizer.rb +83 -0
  23. data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
  24. data/lib/csvtool/interface/cli/errors/presenter.rb +4 -0
  25. data/lib/csvtool/interface/cli/menu_loop.rb +8 -2
  26. data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +22 -0
  27. data/lib/csvtool/interface/cli/prompts/seed_prompt.rb +29 -0
  28. data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +163 -0
  29. data/lib/csvtool/version.rb +1 -1
  30. data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +113 -0
  31. data/test/csvtool/application/use_cases/run_row_randomization_test.rb +124 -0
  32. data/test/csvtool/cli_test.rb +231 -12
  33. data/test/csvtool/cli_unit_test.rb +27 -2
  34. data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
  35. data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
  36. data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
  37. data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
  38. data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
  39. data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
  40. data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
  41. data/test/csvtool/domain/row_randomization_session/randomization_options_test.rb +20 -0
  42. data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +26 -0
  43. data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +42 -0
  44. data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
  45. data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
  46. data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
  47. data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
  48. data/test/csvtool/infrastructure/csv/row_randomizer_test.rb +37 -0
  49. data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
  50. data/test/csvtool/interface/cli/errors/presenter_test.rb +2 -0
  51. data/test/csvtool/interface/cli/menu_loop_test.rb +78 -10
  52. data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +14 -0
  53. data/test/csvtool/interface/cli/prompts/seed_prompt_test.rb +39 -0
  54. data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
  55. data/test/fixtures/dedupe_reference.csv +3 -0
  56. data/test/fixtures/dedupe_reference.tsv +3 -0
  57. data/test/fixtures/dedupe_reference_all.csv +5 -0
  58. data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
  59. data/test/fixtures/dedupe_reference_none.csv +2 -0
  60. data/test/fixtures/dedupe_reference_normalization.csv +3 -0
  61. data/test/fixtures/dedupe_source.csv +6 -0
  62. data/test/fixtures/dedupe_source.tsv +6 -0
  63. data/test/fixtures/dedupe_source_no_headers.csv +5 -0
  64. data/test/fixtures/dedupe_source_normalization.csv +4 -0
  65. data/test/fixtures/sample_people_no_headers.csv +3 -0
  66. metadata +50 -6
  67. data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
  68. data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
  69. data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
@@ -11,7 +11,7 @@ class TestCli < Minitest::Test
11
11
 
12
12
  def test_menu_can_exit_cleanly
13
13
  output = StringIO.new
14
- status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("3\n"), stdout: output, stderr: StringIO.new)
14
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("5\n"), stdout: output, stderr: StringIO.new)
15
15
  assert_equal 0, status
16
16
  assert_includes output.string, "CSV Tool Menu"
17
17
  end
@@ -26,7 +26,7 @@ class TestCli < Minitest::Test
26
26
  "",
27
27
  "y",
28
28
  "",
29
- "3"
29
+ "5"
30
30
  ].join("\n") + "\n"
31
31
 
32
32
  output = StringIO.new
@@ -58,7 +58,7 @@ class TestCli < Minitest::Test
58
58
  "2",
59
59
  "3",
60
60
  "",
61
- "3"
61
+ "5"
62
62
  ].join("\n") + "\n"
63
63
 
64
64
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -79,7 +79,7 @@ class TestCli < Minitest::Test
79
79
  "0",
80
80
  "3",
81
81
  "",
82
- "3"
82
+ "5"
83
83
  ].join("\n") + "\n"
84
84
 
85
85
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -98,7 +98,7 @@ class TestCli < Minitest::Test
98
98
  "2",
99
99
  "3",
100
100
  "",
101
- "3"
101
+ "5"
102
102
  ].join("\n") + "\n"
103
103
 
104
104
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -119,7 +119,7 @@ class TestCli < Minitest::Test
119
119
  "2",
120
120
  "3",
121
121
  "",
122
- "3"
122
+ "5"
123
123
  ].join("\n") + "\n"
124
124
 
125
125
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -144,7 +144,7 @@ class TestCli < Minitest::Test
144
144
  "3",
145
145
  "2",
146
146
  output_path,
147
- "3"
147
+ "5"
148
148
  ].join("\n") + "\n"
149
149
 
150
150
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -164,7 +164,7 @@ class TestCli < Minitest::Test
164
164
  "1",
165
165
  "2",
166
166
  "",
167
- "3"
167
+ "5"
168
168
  ].join("\n") + "\n"
169
169
 
170
170
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -175,6 +175,225 @@ class TestCli < Minitest::Test
175
175
  refute_includes output.string, "Could not parse CSV file."
176
176
  end
177
177
 
178
+ def test_randomize_rows_workflow_prints_header_and_all_data_rows
179
+ output = StringIO.new
180
+ input = [
181
+ "3",
182
+ fixture_path("sample_people.csv"),
183
+ "",
184
+ "",
185
+ "",
186
+ "",
187
+ "5"
188
+ ].join("\n") + "\n"
189
+
190
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
191
+
192
+ assert_equal 0, status
193
+ assert_includes output.string, "name,city"
194
+ assert_includes output.string, "Alice,London"
195
+ assert_includes output.string, "Bob,Paris"
196
+ assert_includes output.string, "Cara,Berlin"
197
+ end
198
+
199
+ def test_randomize_rows_workflow_can_write_to_file
200
+ output = StringIO.new
201
+
202
+ Dir.mktmpdir do |dir|
203
+ output_path = File.join(dir, "randomized_rows.csv")
204
+ input = [
205
+ "3",
206
+ fixture_path("sample_people.csv"),
207
+ "",
208
+ "",
209
+ "",
210
+ "2",
211
+ output_path,
212
+ "5"
213
+ ].join("\n") + "\n"
214
+
215
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
216
+
217
+ assert_equal 0, status
218
+ assert_includes output.string, "Wrote output to #{output_path}"
219
+ lines = File.read(output_path).lines.map(&:strip)
220
+ assert_equal "name,city", lines.first
221
+ assert_equal ["Alice,London", "Bob,Paris", "Cara,Berlin"].sort, lines[1..].sort
222
+ end
223
+ end
224
+
225
+ def test_randomize_rows_workflow_supports_tsv_separator
226
+ output = StringIO.new
227
+ input = [
228
+ "3",
229
+ fixture_path("sample_people.tsv"),
230
+ "2",
231
+ "",
232
+ "",
233
+ "",
234
+ "5"
235
+ ].join("\n") + "\n"
236
+
237
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
238
+
239
+ assert_equal 0, status
240
+ assert_includes output.string, "name\tcity"
241
+ assert_includes output.string, "Alice\tLondon"
242
+ end
243
+
244
+ def test_randomize_rows_workflow_headerless_mode_randomizes_all_rows
245
+ output = StringIO.new
246
+ input = [
247
+ "3",
248
+ fixture_path("sample_people_no_headers.csv"),
249
+ "",
250
+ "n",
251
+ "",
252
+ "",
253
+ "5"
254
+ ].join("\n") + "\n"
255
+
256
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
257
+
258
+ assert_equal 0, status
259
+ refute_includes output.string, "name,city"
260
+ assert_includes output.string, "Alice,London"
261
+ assert_includes output.string, "Bob,Paris"
262
+ assert_includes output.string, "Cara,Berlin"
263
+ end
264
+
265
+ def test_randomize_rows_invalid_seed_returns_to_menu
266
+ output = StringIO.new
267
+ input = [
268
+ "3",
269
+ fixture_path("sample_people.csv"),
270
+ "",
271
+ "",
272
+ "abc",
273
+ "5"
274
+ ].join("\n") + "\n"
275
+
276
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
277
+
278
+ assert_equal 0, status
279
+ assert_includes output.string, "Seed must be an integer."
280
+ assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
281
+ end
282
+
283
+ def test_dedupe_workflow_shell_prompts_and_returns_to_menu
284
+ output = StringIO.new
285
+ input = [
286
+ "4",
287
+ fixture_path("dedupe_source.csv"),
288
+ "",
289
+ "",
290
+ fixture_path("dedupe_reference.csv"),
291
+ "",
292
+ "",
293
+ "customer_id",
294
+ "external_id",
295
+ "",
296
+ "",
297
+ "",
298
+ "5"
299
+ ].join("\n") + "\n"
300
+
301
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
302
+
303
+ assert_equal 0, status
304
+ assert_includes output.string, "Reference CSV file path:"
305
+ assert_includes output.string, "Source key column name:"
306
+ assert_includes output.string, "Reference key column name:"
307
+ assert_includes output.string, "customer_id,name"
308
+ assert_includes output.string, "1,Alice"
309
+ assert_includes output.string, "3,Cara"
310
+ assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
311
+ end
312
+
313
+ def test_dedupe_workflow_can_write_to_file
314
+ output = StringIO.new
315
+
316
+ Dir.mktmpdir do |dir|
317
+ output_path = File.join(dir, "deduped.csv")
318
+ input = [
319
+ "4",
320
+ fixture_path("dedupe_source.csv"),
321
+ "",
322
+ "",
323
+ fixture_path("dedupe_reference.csv"),
324
+ "",
325
+ "",
326
+ "customer_id",
327
+ "external_id",
328
+ "",
329
+ "",
330
+ "2",
331
+ output_path,
332
+ "5"
333
+ ].join("\n") + "\n"
334
+
335
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
336
+
337
+ assert_equal 0, status
338
+ assert_includes output.string, "Wrote output to #{output_path}"
339
+ assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
340
+ assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
341
+ end
342
+ end
343
+
344
+ def test_dedupe_workflow_supports_tsv_separators
345
+ output = StringIO.new
346
+ input = [
347
+ "4",
348
+ fixture_path("dedupe_source.tsv"),
349
+ "2",
350
+ "",
351
+ fixture_path("dedupe_reference.tsv"),
352
+ "2",
353
+ "",
354
+ "customer_id",
355
+ "external_id",
356
+ "",
357
+ "",
358
+ "",
359
+ "5"
360
+ ].join("\n") + "\n"
361
+
362
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
363
+
364
+ assert_equal 0, status
365
+ assert_includes output.string, "customer_id\tname"
366
+ assert_includes output.string, "1\tAlice"
367
+ assert_includes output.string, "3\tCara"
368
+ end
369
+
370
+ def test_dedupe_workflow_headerless_mode_supports_index
371
+ output = StringIO.new
372
+ input = [
373
+ "4",
374
+ fixture_path("dedupe_source_no_headers.csv"),
375
+ "",
376
+ "n",
377
+ fixture_path("dedupe_reference_no_headers.csv"),
378
+ "",
379
+ "n",
380
+ "1",
381
+ "1",
382
+ "",
383
+ "",
384
+ "",
385
+ "5"
386
+ ].join("\n") + "\n"
387
+
388
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
389
+
390
+ assert_equal 0, status
391
+ refute_includes output.string, "customer_id,name"
392
+ assert_includes output.string, "1,Alice"
393
+ assert_includes output.string, "3,Cara"
394
+ assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
395
+ end
396
+
178
397
  def test_end_to_end_file_output_writes_expected_csv
179
398
  output = StringIO.new
180
399
  output_path = nil
@@ -191,7 +410,7 @@ class TestCli < Minitest::Test
191
410
  "y",
192
411
  "2",
193
412
  output_path,
194
- "3"
413
+ "5"
195
414
  ].join("\n") + "\n"
196
415
 
197
416
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -211,7 +430,7 @@ class TestCli < Minitest::Test
211
430
  "1",
212
431
  "",
213
432
  "n",
214
- "3"
433
+ "5"
215
434
  ].join("\n") + "\n"
216
435
 
217
436
  output = StringIO.new
@@ -226,7 +445,7 @@ class TestCli < Minitest::Test
226
445
  output = StringIO.new
227
446
  status = Csvtool::CLI.start(
228
447
  ["menu"],
229
- stdin: StringIO.new("1\n/tmp/does-not-exist.csv\n3\n"),
448
+ stdin: StringIO.new("1\n/tmp/does-not-exist.csv\n4\n"),
230
449
  stdout: output,
231
450
  stderr: StringIO.new
232
451
  )
@@ -247,7 +466,7 @@ class TestCli < Minitest::Test
247
466
  "y",
248
467
  "2",
249
468
  "/tmp/not-a-dir/out.csv",
250
- "3"
469
+ "5"
251
470
  ].join("\n") + "\n"
252
471
 
253
472
  output = StringIO.new
@@ -16,7 +16,7 @@ class CliUnitTest < Minitest::Test
16
16
  end
17
17
 
18
18
  def test_menu_command_can_exit_zero
19
- status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("3\n"), stdout: StringIO.new, stderr: StringIO.new)
19
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("5\n"), stdout: StringIO.new, stderr: StringIO.new)
20
20
  assert_equal 0, status
21
21
  end
22
22
 
@@ -28,11 +28,36 @@ class CliUnitTest < Minitest::Test
28
28
  def test_menu_routes_to_row_range_shell
29
29
  stdout = StringIO.new
30
30
  fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
31
- input = ["2", fixture, "", "2", "3", "", "3"].join("\n") + "\n"
31
+ input = ["2", fixture, "", "2", "3", "", "5"].join("\n") + "\n"
32
32
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
33
33
  assert_equal 0, status
34
34
  assert_includes stdout.string, "name,city"
35
35
  assert_includes stdout.string, "Bob,Paris"
36
36
  assert_includes stdout.string, "Cara,Berlin"
37
37
  end
38
+
39
+ def test_menu_routes_to_randomize_rows_shell
40
+ stdout = StringIO.new
41
+ fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
42
+ input = ["3", fixture, "", "", "", "", "5"].join("\n") + "\n"
43
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
44
+ assert_equal 0, status
45
+ assert_includes stdout.string, "name,city"
46
+ assert_includes stdout.string, "Alice,London"
47
+ assert_includes stdout.string, "Bob,Paris"
48
+ assert_includes stdout.string, "Cara,Berlin"
49
+ end
50
+
51
+ def test_menu_routes_to_dedupe_shell
52
+ stdout = StringIO.new
53
+ source_fixture = File.expand_path("../fixtures/dedupe_source.csv", __dir__)
54
+ reference_fixture = File.expand_path("../fixtures/dedupe_reference.csv", __dir__)
55
+ input = ["4", source_fixture, "", "", reference_fixture, "", "", "customer_id", "external_id", "", "", "", "5"].join("\n") + "\n"
56
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
57
+ assert_equal 0, status
58
+ assert_includes stdout.string, "customer_id,name"
59
+ assert_includes stdout.string, "1,Alice"
60
+ assert_includes stdout.string, "3,Cara"
61
+ assert_includes stdout.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
62
+ end
38
63
  end
@@ -8,7 +8,7 @@ require "csvtool/domain/column_session/column_selection"
8
8
  require "csvtool/domain/column_session/extraction_options"
9
9
  require "csvtool/domain/column_session/preview"
10
10
  require "csvtool/domain/column_session/extraction_value"
11
- require "csvtool/domain/column_session/output_destination"
11
+ require "csvtool/domain/shared/output_destination"
12
12
 
13
13
  class ColumnSessionTest < Minitest::Test
14
14
  def test_state_transitions
@@ -25,7 +25,7 @@ class ColumnSessionTest < Minitest::Test
25
25
  values: [Csvtool::Domain::ColumnSession::ExtractionValue.new("Alice")]
26
26
  )
27
27
  session = session.with_preview(preview).confirm!.with_output_destination(
28
- Csvtool::Domain::ColumnSession::OutputDestination.console
28
+ Csvtool::Domain::Shared::OutputDestination.console
29
29
  )
30
30
 
31
31
  assert_equal true, session.confirmed?
@@ -11,4 +11,14 @@ class CsvSourceTest < Minitest::Test
11
11
  assert_equal "/tmp/a.csv", source.path
12
12
  assert_equal separator, source.separator
13
13
  end
14
+
15
+ def test_rejects_empty_path
16
+ separator = Csvtool::Domain::ColumnSession::Separator.new(",")
17
+
18
+ error = assert_raises(ArgumentError) do
19
+ Csvtool::Domain::ColumnSession::CsvSource.new(path: "", separator: separator)
20
+ end
21
+
22
+ assert_equal "path cannot be empty", error.message
23
+ end
14
24
  end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
5
+
6
+ class CrossCsvDedupeColumnSelectorTest < Minitest::Test
7
+ def test_builds_header_selector_from_input
8
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "customer_id")
9
+
10
+ assert_equal "customer_id", selector.value
11
+ assert_equal true, selector.headers_present?
12
+ end
13
+
14
+ def test_builds_index_selector_from_input
15
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "2")
16
+
17
+ assert_equal 2, selector.value
18
+ assert_equal true, selector.index?
19
+ end
20
+
21
+ def test_rejects_invalid_index_input
22
+ error = assert_raises(ArgumentError) do
23
+ Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "0")
24
+ end
25
+
26
+ assert_equal "column index must be a positive integer", error.message
27
+ end
28
+
29
+ def test_extracts_from_headered_row
30
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "customer_id")
31
+ row = { "customer_id" => "42" }
32
+
33
+ assert_equal "42", selector.extract_from(row)
34
+ end
35
+
36
+ def test_extracts_from_headerless_row_by_index
37
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "2")
38
+ row = ["a", "b", "c"]
39
+
40
+ assert_equal "b", selector.extract_from(row)
41
+ end
42
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
5
+ require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
6
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
7
+ require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
8
+ require "csvtool/domain/cross_csv_dedupe_session/match_options"
9
+ require "csvtool/domain/shared/output_destination"
10
+
11
+ class CrossCsvDedupeSessionTest < Minitest::Test
12
+ def test_start_and_with_output_destination
13
+ source = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
14
+ path: "/tmp/source.csv",
15
+ separator: ",",
16
+ headers_present: true
17
+ )
18
+ reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
19
+ path: "/tmp/reference.csv",
20
+ separator: ",",
21
+ headers_present: true
22
+ )
23
+ key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
24
+ source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id"),
25
+ reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "reference_id")
26
+ )
27
+ match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
28
+ trim_whitespace: true,
29
+ case_insensitive: false
30
+ )
31
+
32
+ session = Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
33
+ source: source,
34
+ reference: reference,
35
+ key_mapping: key_mapping,
36
+ match_options: match_options
37
+ )
38
+
39
+ destination = Csvtool::Domain::Shared::OutputDestination.console
40
+ updated = session.with_output_destination(destination)
41
+
42
+ assert_equal source, updated.source
43
+ assert_equal reference, updated.reference
44
+ assert_equal key_mapping, updated.key_mapping
45
+ assert_equal match_options, updated.match_options
46
+ assert_equal destination, updated.output_destination
47
+ end
48
+
49
+ def test_rejects_invalid_source_type
50
+ reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
51
+ path: "/tmp/reference.csv",
52
+ separator: ",",
53
+ headers_present: true
54
+ )
55
+ key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
56
+ source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id"),
57
+ reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "reference_id")
58
+ )
59
+ match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
60
+ trim_whitespace: true,
61
+ case_insensitive: false
62
+ )
63
+
64
+ error = assert_raises(ArgumentError) do
65
+ Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
66
+ source: "bad",
67
+ reference: reference,
68
+ key_mapping: key_mapping,
69
+ match_options: match_options
70
+ )
71
+ end
72
+
73
+ assert_equal "source must be CsvProfile", error.message
74
+ end
75
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
5
+
6
+ class CrossCsvDedupeCsvProfileTest < Minitest::Test
7
+ def test_initializes_with_expected_fields
8
+ profile = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
9
+ path: "/tmp/source.csv",
10
+ separator: ",",
11
+ headers_present: true
12
+ )
13
+
14
+ assert_equal "/tmp/source.csv", profile.path
15
+ assert_equal ",", profile.separator
16
+ assert_equal true, profile.headers_present?
17
+ end
18
+
19
+ def test_requires_path
20
+ error = assert_raises(ArgumentError) do
21
+ Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(path: "", separator: ",", headers_present: true)
22
+ end
23
+
24
+ assert_equal "path cannot be empty", error.message
25
+ end
26
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
5
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
6
+
7
+ class CrossCsvDedupeKeyMappingTest < Minitest::Test
8
+ def test_holds_source_and_reference_selectors
9
+ source_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id")
10
+ reference_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "ref_id")
11
+
12
+ mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
13
+ source_selector: source_selector,
14
+ reference_selector: reference_selector
15
+ )
16
+
17
+ assert_equal source_selector, mapping.source_selector
18
+ assert_equal reference_selector, mapping.reference_selector
19
+ end
20
+
21
+ def test_rejects_non_selector_inputs
22
+ error = assert_raises(ArgumentError) do
23
+ Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
24
+ source_selector: "id",
25
+ reference_selector: "external_id"
26
+ )
27
+ end
28
+
29
+ assert_equal "selectors must be ColumnSelector", error.message
30
+ end
31
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/match_options"
5
+
6
+ class CrossCsvDedupeMatchOptionsTest < Minitest::Test
7
+ def test_predicates_return_boolean_flags
8
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
9
+ trim_whitespace: true,
10
+ case_insensitive: false
11
+ )
12
+
13
+ assert_equal true, options.trim_whitespace?
14
+ assert_equal false, options.case_insensitive?
15
+ end
16
+
17
+ def test_normalize_trim_on_case_off
18
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
19
+ trim_whitespace: true,
20
+ case_insensitive: false
21
+ )
22
+
23
+ assert_equal "AbC", options.normalize(" AbC ")
24
+ end
25
+
26
+ def test_normalize_trim_on_case_on
27
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
28
+ trim_whitespace: true,
29
+ case_insensitive: true
30
+ )
31
+
32
+ assert_equal "abc", options.normalize(" AbC ")
33
+ end
34
+
35
+ def test_normalize_trim_off_case_on
36
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
37
+ trim_whitespace: false,
38
+ case_insensitive: true
39
+ )
40
+
41
+ assert_equal " abc ", options.normalize(" AbC ")
42
+ end
43
+
44
+ def test_normalize_trim_off_case_off
45
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
46
+ trim_whitespace: false,
47
+ case_insensitive: false
48
+ )
49
+
50
+ assert_equal " AbC ", options.normalize(" AbC ")
51
+ end
52
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/row_randomization_session/randomization_options"
5
+
6
+ class RandomizationOptionsTest < Minitest::Test
7
+ def test_accepts_nil_or_integer_seed
8
+ with_seed = Csvtool::Domain::RowRandomizationSession::RandomizationOptions.new(seed: 42)
9
+ without_seed = Csvtool::Domain::RowRandomizationSession::RandomizationOptions.new(seed: nil)
10
+
11
+ assert_equal 42, with_seed.seed
12
+ assert_nil without_seed.seed
13
+ end
14
+
15
+ def test_rejects_non_integer_seed
16
+ assert_raises(ArgumentError) do
17
+ Csvtool::Domain::RowRandomizationSession::RandomizationOptions.new(seed: "abc")
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/row_randomization_session/randomization_session"
5
+ require "csvtool/domain/row_randomization_session/randomization_source"
6
+ require "csvtool/domain/row_randomization_session/randomization_options"
7
+ require "csvtool/domain/shared/output_destination"
8
+
9
+ class RandomizationSessionTest < Minitest::Test
10
+ def test_with_output_destination_returns_updated_session
11
+ source = Csvtool::Domain::RowRandomizationSession::RandomizationSource.new(
12
+ path: "/tmp/in.csv",
13
+ separator: ",",
14
+ headers_present: true
15
+ )
16
+ options = Csvtool::Domain::RowRandomizationSession::RandomizationOptions.new(seed: 7)
17
+ session = Csvtool::Domain::RowRandomizationSession::RandomizationSession.start(source: source, options: options)
18
+ destination = Csvtool::Domain::Shared::OutputDestination.console
19
+
20
+ updated = session.with_output_destination(destination)
21
+
22
+ assert_equal source, updated.source
23
+ assert_equal options, updated.options
24
+ assert_equal destination, updated.output_destination
25
+ end
26
+ end