csvops 0.3.0.alpha → 0.4.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +56 -142
  3. data/docs/architecture.md +266 -0
  4. data/docs/release-v0.4.0-alpha.md +87 -0
  5. data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +93 -0
  6. data/lib/csvtool/application/use_cases/run_extraction.rb +3 -3
  7. data/lib/csvtool/application/use_cases/run_row_extraction.rb +3 -3
  8. data/lib/csvtool/application/use_cases/run_row_randomization.rb +3 -3
  9. data/lib/csvtool/cli.rb +5 -1
  10. data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
  11. data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
  12. data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
  13. data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
  14. data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
  15. data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +1 -0
  16. data/lib/csvtool/domain/row_session/row_source.rb +3 -0
  17. data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
  18. data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
  19. data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
  20. data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
  21. data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +163 -0
  22. data/lib/csvtool/version.rb +1 -1
  23. data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +113 -0
  24. data/test/csvtool/cli_test.rb +130 -16
  25. data/test/csvtool/cli_unit_test.rb +16 -3
  26. data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
  27. data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
  28. data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
  29. data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
  30. data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
  31. data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
  32. data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
  33. data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +2 -2
  34. data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +15 -1
  35. data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
  36. data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
  37. data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
  38. data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
  39. data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
  40. data/test/csvtool/interface/cli/menu_loop_test.rb +50 -13
  41. data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
  42. data/test/fixtures/dedupe_reference.csv +3 -0
  43. data/test/fixtures/dedupe_reference.tsv +3 -0
  44. data/test/fixtures/dedupe_reference_all.csv +5 -0
  45. data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
  46. data/test/fixtures/dedupe_reference_none.csv +2 -0
  47. data/test/fixtures/dedupe_reference_normalization.csv +3 -0
  48. data/test/fixtures/dedupe_source.csv +6 -0
  49. data/test/fixtures/dedupe_source.tsv +6 -0
  50. data/test/fixtures/dedupe_source_no_headers.csv +5 -0
  51. data/test/fixtures/dedupe_source_normalization.csv +4 -0
  52. metadata +34 -8
  53. data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +0 -31
  54. data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
  55. data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
  56. data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +0 -21
  57. data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/application/use_cases/run_cross_csv_dedupe"
5
+ require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
6
+ require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
7
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
8
+ require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
9
+ require "csvtool/domain/cross_csv_dedupe_session/match_options"
10
+ require "csvtool/domain/shared/output_destination"
11
+ require "tmpdir"
12
+
13
+ class RunCrossCsvDedupeTest < Minitest::Test
14
+ def fixture_path(name)
15
+ File.expand_path("../../../fixtures/#{name}", __dir__)
16
+ end
17
+
18
+ def test_streams_retained_rows_to_callbacks
19
+ use_case = Csvtool::Application::UseCases::RunCrossCsvDedupe.new
20
+ headers = nil
21
+ rows = []
22
+
23
+ result = use_case.call(
24
+ session: build_session(
25
+ source_path: fixture_path("dedupe_source.csv"),
26
+ reference_path: fixture_path("dedupe_reference.csv"),
27
+ source_selector_input: "customer_id",
28
+ reference_selector_input: "external_id",
29
+ output_destination: Csvtool::Domain::Shared::OutputDestination.console
30
+ ),
31
+ on_header: ->(value) { headers = value },
32
+ on_row: ->(fields) { rows << fields }
33
+ )
34
+
35
+ assert_equal true, result.ok?
36
+ assert_equal ["customer_id", "name"], headers
37
+ assert_equal [%w[1 Alice], %w[3 Cara]], rows
38
+ assert_equal 5, result.data[:stats][:source_rows]
39
+ assert_equal 3, result.data[:stats][:removed_rows]
40
+ assert_equal 2, result.data[:stats][:kept_rows_count]
41
+ end
42
+
43
+ def test_writes_to_file_output_destination
44
+ use_case = Csvtool::Application::UseCases::RunCrossCsvDedupe.new
45
+
46
+ Dir.mktmpdir do |dir|
47
+ output_path = File.join(dir, "deduped.csv")
48
+ result = use_case.call(
49
+ session: build_session(
50
+ source_path: fixture_path("dedupe_source.csv"),
51
+ reference_path: fixture_path("dedupe_reference.csv"),
52
+ source_selector_input: "customer_id",
53
+ reference_selector_input: "external_id",
54
+ output_destination: Csvtool::Domain::Shared::OutputDestination.file(path: output_path)
55
+ )
56
+ )
57
+
58
+ assert_equal true, result.ok?
59
+ assert_equal output_path, result.data[:output_path]
60
+ assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
61
+ end
62
+ end
63
+
64
+ def test_returns_column_not_found_when_selector_invalid
65
+ use_case = Csvtool::Application::UseCases::RunCrossCsvDedupe.new
66
+
67
+ result = use_case.call(
68
+ session: build_session(
69
+ source_path: fixture_path("dedupe_source.csv"),
70
+ reference_path: fixture_path("dedupe_reference.csv"),
71
+ source_selector_input: "missing",
72
+ reference_selector_input: "external_id",
73
+ output_destination: Csvtool::Domain::Shared::OutputDestination.console
74
+ )
75
+ )
76
+
77
+ assert_equal false, result.ok?
78
+ assert_equal :column_not_found, result.error
79
+ end
80
+
81
+ private
82
+
83
+ def build_session(source_path:, reference_path:, source_selector_input:, reference_selector_input:, output_destination:)
84
+ source = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
85
+ path: source_path,
86
+ separator: ",",
87
+ headers_present: true
88
+ )
89
+ reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
90
+ path: reference_path,
91
+ separator: ",",
92
+ headers_present: true
93
+ )
94
+ key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
95
+ source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(
96
+ headers_present: true,
97
+ input: source_selector_input
98
+ ),
99
+ reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(
100
+ headers_present: true,
101
+ input: reference_selector_input
102
+ )
103
+ )
104
+ match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
105
+ trim_whitespace: true,
106
+ case_insensitive: false
107
+ )
108
+
109
+ Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession
110
+ .start(source: source, reference: reference, key_mapping: key_mapping, match_options: match_options)
111
+ .with_output_destination(output_destination)
112
+ end
113
+ end
@@ -11,7 +11,7 @@ class TestCli < Minitest::Test
11
11
 
12
12
  def test_menu_can_exit_cleanly
13
13
  output = StringIO.new
14
- status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("4\n"), stdout: output, stderr: StringIO.new)
14
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("5\n"), stdout: output, stderr: StringIO.new)
15
15
  assert_equal 0, status
16
16
  assert_includes output.string, "CSV Tool Menu"
17
17
  end
@@ -26,7 +26,7 @@ class TestCli < Minitest::Test
26
26
  "",
27
27
  "y",
28
28
  "",
29
- "4"
29
+ "5"
30
30
  ].join("\n") + "\n"
31
31
 
32
32
  output = StringIO.new
@@ -58,7 +58,7 @@ class TestCli < Minitest::Test
58
58
  "2",
59
59
  "3",
60
60
  "",
61
- "4"
61
+ "5"
62
62
  ].join("\n") + "\n"
63
63
 
64
64
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -79,7 +79,7 @@ class TestCli < Minitest::Test
79
79
  "0",
80
80
  "3",
81
81
  "",
82
- "4"
82
+ "5"
83
83
  ].join("\n") + "\n"
84
84
 
85
85
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -98,7 +98,7 @@ class TestCli < Minitest::Test
98
98
  "2",
99
99
  "3",
100
100
  "",
101
- "4"
101
+ "5"
102
102
  ].join("\n") + "\n"
103
103
 
104
104
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -119,7 +119,7 @@ class TestCli < Minitest::Test
119
119
  "2",
120
120
  "3",
121
121
  "",
122
- "4"
122
+ "5"
123
123
  ].join("\n") + "\n"
124
124
 
125
125
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -144,7 +144,7 @@ class TestCli < Minitest::Test
144
144
  "3",
145
145
  "2",
146
146
  output_path,
147
- "4"
147
+ "5"
148
148
  ].join("\n") + "\n"
149
149
 
150
150
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -164,7 +164,7 @@ class TestCli < Minitest::Test
164
164
  "1",
165
165
  "2",
166
166
  "",
167
- "4"
167
+ "5"
168
168
  ].join("\n") + "\n"
169
169
 
170
170
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -184,7 +184,7 @@ class TestCli < Minitest::Test
184
184
  "",
185
185
  "",
186
186
  "",
187
- "4"
187
+ "5"
188
188
  ].join("\n") + "\n"
189
189
 
190
190
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -209,7 +209,7 @@ class TestCli < Minitest::Test
209
209
  "",
210
210
  "2",
211
211
  output_path,
212
- "4"
212
+ "5"
213
213
  ].join("\n") + "\n"
214
214
 
215
215
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -231,7 +231,7 @@ class TestCli < Minitest::Test
231
231
  "",
232
232
  "",
233
233
  "",
234
- "4"
234
+ "5"
235
235
  ].join("\n") + "\n"
236
236
 
237
237
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -250,7 +250,7 @@ class TestCli < Minitest::Test
250
250
  "n",
251
251
  "",
252
252
  "",
253
- "4"
253
+ "5"
254
254
  ].join("\n") + "\n"
255
255
 
256
256
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -270,7 +270,7 @@ class TestCli < Minitest::Test
270
270
  "",
271
271
  "",
272
272
  "abc",
273
- "4"
273
+ "5"
274
274
  ].join("\n") + "\n"
275
275
 
276
276
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -280,6 +280,120 @@ class TestCli < Minitest::Test
280
280
  assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
281
281
  end
282
282
 
283
+ def test_dedupe_workflow_shell_prompts_and_returns_to_menu
284
+ output = StringIO.new
285
+ input = [
286
+ "4",
287
+ fixture_path("dedupe_source.csv"),
288
+ "",
289
+ "",
290
+ fixture_path("dedupe_reference.csv"),
291
+ "",
292
+ "",
293
+ "customer_id",
294
+ "external_id",
295
+ "",
296
+ "",
297
+ "",
298
+ "5"
299
+ ].join("\n") + "\n"
300
+
301
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
302
+
303
+ assert_equal 0, status
304
+ assert_includes output.string, "Reference CSV file path:"
305
+ assert_includes output.string, "Source key column name:"
306
+ assert_includes output.string, "Reference key column name:"
307
+ assert_includes output.string, "customer_id,name"
308
+ assert_includes output.string, "1,Alice"
309
+ assert_includes output.string, "3,Cara"
310
+ assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
311
+ end
312
+
313
+ def test_dedupe_workflow_can_write_to_file
314
+ output = StringIO.new
315
+
316
+ Dir.mktmpdir do |dir|
317
+ output_path = File.join(dir, "deduped.csv")
318
+ input = [
319
+ "4",
320
+ fixture_path("dedupe_source.csv"),
321
+ "",
322
+ "",
323
+ fixture_path("dedupe_reference.csv"),
324
+ "",
325
+ "",
326
+ "customer_id",
327
+ "external_id",
328
+ "",
329
+ "",
330
+ "2",
331
+ output_path,
332
+ "5"
333
+ ].join("\n") + "\n"
334
+
335
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
336
+
337
+ assert_equal 0, status
338
+ assert_includes output.string, "Wrote output to #{output_path}"
339
+ assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
340
+ assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
341
+ end
342
+ end
343
+
344
+ def test_dedupe_workflow_supports_tsv_separators
345
+ output = StringIO.new
346
+ input = [
347
+ "4",
348
+ fixture_path("dedupe_source.tsv"),
349
+ "2",
350
+ "",
351
+ fixture_path("dedupe_reference.tsv"),
352
+ "2",
353
+ "",
354
+ "customer_id",
355
+ "external_id",
356
+ "",
357
+ "",
358
+ "",
359
+ "5"
360
+ ].join("\n") + "\n"
361
+
362
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
363
+
364
+ assert_equal 0, status
365
+ assert_includes output.string, "customer_id\tname"
366
+ assert_includes output.string, "1\tAlice"
367
+ assert_includes output.string, "3\tCara"
368
+ end
369
+
370
+ def test_dedupe_workflow_headerless_mode_supports_index
371
+ output = StringIO.new
372
+ input = [
373
+ "4",
374
+ fixture_path("dedupe_source_no_headers.csv"),
375
+ "",
376
+ "n",
377
+ fixture_path("dedupe_reference_no_headers.csv"),
378
+ "",
379
+ "n",
380
+ "1",
381
+ "1",
382
+ "",
383
+ "",
384
+ "",
385
+ "5"
386
+ ].join("\n") + "\n"
387
+
388
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
389
+
390
+ assert_equal 0, status
391
+ refute_includes output.string, "customer_id,name"
392
+ assert_includes output.string, "1,Alice"
393
+ assert_includes output.string, "3,Cara"
394
+ assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
395
+ end
396
+
283
397
  def test_end_to_end_file_output_writes_expected_csv
284
398
  output = StringIO.new
285
399
  output_path = nil
@@ -296,7 +410,7 @@ class TestCli < Minitest::Test
296
410
  "y",
297
411
  "2",
298
412
  output_path,
299
- "4"
413
+ "5"
300
414
  ].join("\n") + "\n"
301
415
 
302
416
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -316,7 +430,7 @@ class TestCli < Minitest::Test
316
430
  "1",
317
431
  "",
318
432
  "n",
319
- "4"
433
+ "5"
320
434
  ].join("\n") + "\n"
321
435
 
322
436
  output = StringIO.new
@@ -352,7 +466,7 @@ class TestCli < Minitest::Test
352
466
  "y",
353
467
  "2",
354
468
  "/tmp/not-a-dir/out.csv",
355
- "4"
469
+ "5"
356
470
  ].join("\n") + "\n"
357
471
 
358
472
  output = StringIO.new
@@ -16,7 +16,7 @@ class CliUnitTest < Minitest::Test
16
16
  end
17
17
 
18
18
  def test_menu_command_can_exit_zero
19
- status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("4\n"), stdout: StringIO.new, stderr: StringIO.new)
19
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("5\n"), stdout: StringIO.new, stderr: StringIO.new)
20
20
  assert_equal 0, status
21
21
  end
22
22
 
@@ -28,7 +28,7 @@ class CliUnitTest < Minitest::Test
28
28
  def test_menu_routes_to_row_range_shell
29
29
  stdout = StringIO.new
30
30
  fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
31
- input = ["2", fixture, "", "2", "3", "", "4"].join("\n") + "\n"
31
+ input = ["2", fixture, "", "2", "3", "", "5"].join("\n") + "\n"
32
32
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
33
33
  assert_equal 0, status
34
34
  assert_includes stdout.string, "name,city"
@@ -39,7 +39,7 @@ class CliUnitTest < Minitest::Test
39
39
  def test_menu_routes_to_randomize_rows_shell
40
40
  stdout = StringIO.new
41
41
  fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
42
- input = ["3", fixture, "", "", "", "", "4"].join("\n") + "\n"
42
+ input = ["3", fixture, "", "", "", "", "5"].join("\n") + "\n"
43
43
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
44
44
  assert_equal 0, status
45
45
  assert_includes stdout.string, "name,city"
@@ -47,4 +47,17 @@ class CliUnitTest < Minitest::Test
47
47
  assert_includes stdout.string, "Bob,Paris"
48
48
  assert_includes stdout.string, "Cara,Berlin"
49
49
  end
50
+
51
+ def test_menu_routes_to_dedupe_shell
52
+ stdout = StringIO.new
53
+ source_fixture = File.expand_path("../fixtures/dedupe_source.csv", __dir__)
54
+ reference_fixture = File.expand_path("../fixtures/dedupe_reference.csv", __dir__)
55
+ input = ["4", source_fixture, "", "", reference_fixture, "", "", "customer_id", "external_id", "", "", "", "5"].join("\n") + "\n"
56
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
57
+ assert_equal 0, status
58
+ assert_includes stdout.string, "customer_id,name"
59
+ assert_includes stdout.string, "1,Alice"
60
+ assert_includes stdout.string, "3,Cara"
61
+ assert_includes stdout.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
62
+ end
50
63
  end
@@ -8,7 +8,7 @@ require "csvtool/domain/column_session/column_selection"
8
8
  require "csvtool/domain/column_session/extraction_options"
9
9
  require "csvtool/domain/column_session/preview"
10
10
  require "csvtool/domain/column_session/extraction_value"
11
- require "csvtool/domain/column_session/output_destination"
11
+ require "csvtool/domain/shared/output_destination"
12
12
 
13
13
  class ColumnSessionTest < Minitest::Test
14
14
  def test_state_transitions
@@ -25,7 +25,7 @@ class ColumnSessionTest < Minitest::Test
25
25
  values: [Csvtool::Domain::ColumnSession::ExtractionValue.new("Alice")]
26
26
  )
27
27
  session = session.with_preview(preview).confirm!.with_output_destination(
28
- Csvtool::Domain::ColumnSession::OutputDestination.console
28
+ Csvtool::Domain::Shared::OutputDestination.console
29
29
  )
30
30
 
31
31
  assert_equal true, session.confirmed?
@@ -11,4 +11,14 @@ class CsvSourceTest < Minitest::Test
11
11
  assert_equal "/tmp/a.csv", source.path
12
12
  assert_equal separator, source.separator
13
13
  end
14
+
15
+ def test_rejects_empty_path
16
+ separator = Csvtool::Domain::ColumnSession::Separator.new(",")
17
+
18
+ error = assert_raises(ArgumentError) do
19
+ Csvtool::Domain::ColumnSession::CsvSource.new(path: "", separator: separator)
20
+ end
21
+
22
+ assert_equal "path cannot be empty", error.message
23
+ end
14
24
  end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
5
+
6
+ class CrossCsvDedupeColumnSelectorTest < Minitest::Test
7
+ def test_builds_header_selector_from_input
8
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "customer_id")
9
+
10
+ assert_equal "customer_id", selector.value
11
+ assert_equal true, selector.headers_present?
12
+ end
13
+
14
+ def test_builds_index_selector_from_input
15
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "2")
16
+
17
+ assert_equal 2, selector.value
18
+ assert_equal true, selector.index?
19
+ end
20
+
21
+ def test_rejects_invalid_index_input
22
+ error = assert_raises(ArgumentError) do
23
+ Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "0")
24
+ end
25
+
26
+ assert_equal "column index must be a positive integer", error.message
27
+ end
28
+
29
+ def test_extracts_from_headered_row
30
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "customer_id")
31
+ row = { "customer_id" => "42" }
32
+
33
+ assert_equal "42", selector.extract_from(row)
34
+ end
35
+
36
+ def test_extracts_from_headerless_row_by_index
37
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "2")
38
+ row = ["a", "b", "c"]
39
+
40
+ assert_equal "b", selector.extract_from(row)
41
+ end
42
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
5
+ require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
6
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
7
+ require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
8
+ require "csvtool/domain/cross_csv_dedupe_session/match_options"
9
+ require "csvtool/domain/shared/output_destination"
10
+
11
+ class CrossCsvDedupeSessionTest < Minitest::Test
12
+ def test_start_and_with_output_destination
13
+ source = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
14
+ path: "/tmp/source.csv",
15
+ separator: ",",
16
+ headers_present: true
17
+ )
18
+ reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
19
+ path: "/tmp/reference.csv",
20
+ separator: ",",
21
+ headers_present: true
22
+ )
23
+ key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
24
+ source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id"),
25
+ reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "reference_id")
26
+ )
27
+ match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
28
+ trim_whitespace: true,
29
+ case_insensitive: false
30
+ )
31
+
32
+ session = Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
33
+ source: source,
34
+ reference: reference,
35
+ key_mapping: key_mapping,
36
+ match_options: match_options
37
+ )
38
+
39
+ destination = Csvtool::Domain::Shared::OutputDestination.console
40
+ updated = session.with_output_destination(destination)
41
+
42
+ assert_equal source, updated.source
43
+ assert_equal reference, updated.reference
44
+ assert_equal key_mapping, updated.key_mapping
45
+ assert_equal match_options, updated.match_options
46
+ assert_equal destination, updated.output_destination
47
+ end
48
+
49
+ def test_rejects_invalid_source_type
50
+ reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
51
+ path: "/tmp/reference.csv",
52
+ separator: ",",
53
+ headers_present: true
54
+ )
55
+ key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
56
+ source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id"),
57
+ reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "reference_id")
58
+ )
59
+ match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
60
+ trim_whitespace: true,
61
+ case_insensitive: false
62
+ )
63
+
64
+ error = assert_raises(ArgumentError) do
65
+ Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
66
+ source: "bad",
67
+ reference: reference,
68
+ key_mapping: key_mapping,
69
+ match_options: match_options
70
+ )
71
+ end
72
+
73
+ assert_equal "source must be CsvProfile", error.message
74
+ end
75
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
5
+
6
+ class CrossCsvDedupeCsvProfileTest < Minitest::Test
7
+ def test_initializes_with_expected_fields
8
+ profile = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
9
+ path: "/tmp/source.csv",
10
+ separator: ",",
11
+ headers_present: true
12
+ )
13
+
14
+ assert_equal "/tmp/source.csv", profile.path
15
+ assert_equal ",", profile.separator
16
+ assert_equal true, profile.headers_present?
17
+ end
18
+
19
+ def test_requires_path
20
+ error = assert_raises(ArgumentError) do
21
+ Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(path: "", separator: ",", headers_present: true)
22
+ end
23
+
24
+ assert_equal "path cannot be empty", error.message
25
+ end
26
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
5
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
6
+
7
+ class CrossCsvDedupeKeyMappingTest < Minitest::Test
8
+ def test_holds_source_and_reference_selectors
9
+ source_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id")
10
+ reference_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "ref_id")
11
+
12
+ mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
13
+ source_selector: source_selector,
14
+ reference_selector: reference_selector
15
+ )
16
+
17
+ assert_equal source_selector, mapping.source_selector
18
+ assert_equal reference_selector, mapping.reference_selector
19
+ end
20
+
21
+ def test_rejects_non_selector_inputs
22
+ error = assert_raises(ArgumentError) do
23
+ Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
24
+ source_selector: "id",
25
+ reference_selector: "external_id"
26
+ )
27
+ end
28
+
29
+ assert_equal "selectors must be ColumnSelector", error.message
30
+ end
31
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/match_options"
5
+
6
+ class CrossCsvDedupeMatchOptionsTest < Minitest::Test
7
+ def test_predicates_return_boolean_flags
8
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
9
+ trim_whitespace: true,
10
+ case_insensitive: false
11
+ )
12
+
13
+ assert_equal true, options.trim_whitespace?
14
+ assert_equal false, options.case_insensitive?
15
+ end
16
+
17
+ def test_normalize_trim_on_case_off
18
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
19
+ trim_whitespace: true,
20
+ case_insensitive: false
21
+ )
22
+
23
+ assert_equal "AbC", options.normalize(" AbC ")
24
+ end
25
+
26
+ def test_normalize_trim_on_case_on
27
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
28
+ trim_whitespace: true,
29
+ case_insensitive: true
30
+ )
31
+
32
+ assert_equal "abc", options.normalize(" AbC ")
33
+ end
34
+
35
+ def test_normalize_trim_off_case_on
36
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
37
+ trim_whitespace: false,
38
+ case_insensitive: true
39
+ )
40
+
41
+ assert_equal " abc ", options.normalize(" AbC ")
42
+ end
43
+
44
+ def test_normalize_trim_off_case_off
45
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
46
+ trim_whitespace: false,
47
+ case_insensitive: false
48
+ )
49
+
50
+ assert_equal " AbC ", options.normalize(" AbC ")
51
+ end
52
+ end