csvops 0.3.0.alpha → 0.5.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +69 -149
  3. data/docs/architecture.md +396 -0
  4. data/docs/release-v0.4.0-alpha.md +87 -0
  5. data/docs/release-v0.5.0-alpha.md +89 -0
  6. data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +96 -0
  7. data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
  8. data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
  9. data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
  10. data/lib/csvtool/cli.rb +11 -7
  11. data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
  12. data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
  13. data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
  14. data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
  15. data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
  16. data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +1 -0
  17. data/lib/csvtool/domain/row_session/row_source.rb +3 -0
  18. data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
  19. data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
  20. data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
  21. data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
  22. data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
  23. data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
  24. data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
  25. data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
  26. data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
  27. data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
  28. data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
  29. data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
  30. data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
  31. data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
  32. data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
  33. data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
  34. data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
  35. data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
  36. data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
  37. data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
  38. data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
  39. data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +86 -0
  40. data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
  41. data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
  42. data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
  43. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
  44. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
  45. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
  46. data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
  47. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
  48. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
  49. data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
  50. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
  51. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
  52. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
  53. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
  54. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
  55. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
  56. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
  57. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
  58. data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
  59. data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
  60. data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
  61. data/lib/csvtool/version.rb +1 -1
  62. data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
  63. data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +141 -0
  64. data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
  65. data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
  66. data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
  67. data/test/csvtool/cli_test.rb +130 -16
  68. data/test/csvtool/cli_unit_test.rb +16 -3
  69. data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
  70. data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
  71. data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
  72. data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
  73. data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
  74. data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
  75. data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
  76. data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +2 -2
  77. data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +15 -1
  78. data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
  79. data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
  80. data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
  81. data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
  82. data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
  83. data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
  84. data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
  85. data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
  86. data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
  87. data/test/csvtool/interface/cli/menu_loop_test.rb +50 -13
  88. data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
  89. data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
  90. data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
  91. data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
  92. data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
  93. data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
  94. data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
  95. data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
  96. data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
  97. data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
  98. data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
  99. data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
  100. data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
  101. data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
  102. data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
  103. data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
  104. data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
  105. data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
  106. data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
  107. data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
  108. data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
  109. data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
  110. data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
  111. data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
  112. data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
  113. data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
  114. data/test/fixtures/dedupe_reference.csv +3 -0
  115. data/test/fixtures/dedupe_reference.tsv +3 -0
  116. data/test/fixtures/dedupe_reference_all.csv +5 -0
  117. data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
  118. data/test/fixtures/dedupe_reference_none.csv +2 -0
  119. data/test/fixtures/dedupe_reference_normalization.csv +3 -0
  120. data/test/fixtures/dedupe_source.csv +6 -0
  121. data/test/fixtures/dedupe_source.tsv +6 -0
  122. data/test/fixtures/dedupe_source_no_headers.csv +5 -0
  123. data/test/fixtures/dedupe_source_normalization.csv +4 -0
  124. metadata +93 -8
  125. data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +0 -31
  126. data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
  127. data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
  128. data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +0 -21
  129. data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
5
+ require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
6
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
7
+ require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
8
+ require "csvtool/domain/cross_csv_dedupe_session/match_options"
9
+ require "csvtool/domain/shared/output_destination"
10
+
11
+ class CrossCsvDedupeSessionTest < Minitest::Test
12
+ def test_start_and_with_output_destination
13
+ source = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
14
+ path: "/tmp/source.csv",
15
+ separator: ",",
16
+ headers_present: true
17
+ )
18
+ reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
19
+ path: "/tmp/reference.csv",
20
+ separator: ",",
21
+ headers_present: true
22
+ )
23
+ key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
24
+ source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id"),
25
+ reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "reference_id")
26
+ )
27
+ match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
28
+ trim_whitespace: true,
29
+ case_insensitive: false
30
+ )
31
+
32
+ session = Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
33
+ source: source,
34
+ reference: reference,
35
+ key_mapping: key_mapping,
36
+ match_options: match_options
37
+ )
38
+
39
+ destination = Csvtool::Domain::Shared::OutputDestination.console
40
+ updated = session.with_output_destination(destination)
41
+
42
+ assert_equal source, updated.source
43
+ assert_equal reference, updated.reference
44
+ assert_equal key_mapping, updated.key_mapping
45
+ assert_equal match_options, updated.match_options
46
+ assert_equal destination, updated.output_destination
47
+ end
48
+
49
+ def test_rejects_invalid_source_type
50
+ reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
51
+ path: "/tmp/reference.csv",
52
+ separator: ",",
53
+ headers_present: true
54
+ )
55
+ key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
56
+ source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id"),
57
+ reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "reference_id")
58
+ )
59
+ match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
60
+ trim_whitespace: true,
61
+ case_insensitive: false
62
+ )
63
+
64
+ error = assert_raises(ArgumentError) do
65
+ Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
66
+ source: "bad",
67
+ reference: reference,
68
+ key_mapping: key_mapping,
69
+ match_options: match_options
70
+ )
71
+ end
72
+
73
+ assert_equal "source must be CsvProfile", error.message
74
+ end
75
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
5
+
6
+ class CrossCsvDedupeCsvProfileTest < Minitest::Test
7
+ def test_initializes_with_expected_fields
8
+ profile = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
9
+ path: "/tmp/source.csv",
10
+ separator: ",",
11
+ headers_present: true
12
+ )
13
+
14
+ assert_equal "/tmp/source.csv", profile.path
15
+ assert_equal ",", profile.separator
16
+ assert_equal true, profile.headers_present?
17
+ end
18
+
19
+ def test_requires_path
20
+ error = assert_raises(ArgumentError) do
21
+ Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(path: "", separator: ",", headers_present: true)
22
+ end
23
+
24
+ assert_equal "path cannot be empty", error.message
25
+ end
26
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
5
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
6
+
7
+ class CrossCsvDedupeKeyMappingTest < Minitest::Test
8
+ def test_holds_source_and_reference_selectors
9
+ source_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id")
10
+ reference_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "ref_id")
11
+
12
+ mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
13
+ source_selector: source_selector,
14
+ reference_selector: reference_selector
15
+ )
16
+
17
+ assert_equal source_selector, mapping.source_selector
18
+ assert_equal reference_selector, mapping.reference_selector
19
+ end
20
+
21
+ def test_rejects_non_selector_inputs
22
+ error = assert_raises(ArgumentError) do
23
+ Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
24
+ source_selector: "id",
25
+ reference_selector: "external_id"
26
+ )
27
+ end
28
+
29
+ assert_equal "selectors must be ColumnSelector", error.message
30
+ end
31
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/cross_csv_dedupe_session/match_options"
5
+
6
+ class CrossCsvDedupeMatchOptionsTest < Minitest::Test
7
+ def test_predicates_return_boolean_flags
8
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
9
+ trim_whitespace: true,
10
+ case_insensitive: false
11
+ )
12
+
13
+ assert_equal true, options.trim_whitespace?
14
+ assert_equal false, options.case_insensitive?
15
+ end
16
+
17
+ def test_normalize_trim_on_case_off
18
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
19
+ trim_whitespace: true,
20
+ case_insensitive: false
21
+ )
22
+
23
+ assert_equal "AbC", options.normalize(" AbC ")
24
+ end
25
+
26
+ def test_normalize_trim_on_case_on
27
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
28
+ trim_whitespace: true,
29
+ case_insensitive: true
30
+ )
31
+
32
+ assert_equal "abc", options.normalize(" AbC ")
33
+ end
34
+
35
+ def test_normalize_trim_off_case_on
36
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
37
+ trim_whitespace: false,
38
+ case_insensitive: true
39
+ )
40
+
41
+ assert_equal " abc ", options.normalize(" AbC ")
42
+ end
43
+
44
+ def test_normalize_trim_off_case_off
45
+ options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
46
+ trim_whitespace: false,
47
+ case_insensitive: false
48
+ )
49
+
50
+ assert_equal " AbC ", options.normalize(" AbC ")
51
+ end
52
+ end
@@ -4,7 +4,7 @@ require_relative "../../../test_helper"
4
4
  require "csvtool/domain/row_randomization_session/randomization_session"
5
5
  require "csvtool/domain/row_randomization_session/randomization_source"
6
6
  require "csvtool/domain/row_randomization_session/randomization_options"
7
- require "csvtool/domain/row_randomization_session/randomization_output_destination"
7
+ require "csvtool/domain/shared/output_destination"
8
8
 
9
9
  class RandomizationSessionTest < Minitest::Test
10
10
  def test_with_output_destination_returns_updated_session
@@ -15,7 +15,7 @@ class RandomizationSessionTest < Minitest::Test
15
15
  )
16
16
  options = Csvtool::Domain::RowRandomizationSession::RandomizationOptions.new(seed: 7)
17
17
  session = Csvtool::Domain::RowRandomizationSession::RandomizationSession.start(source: source, options: options)
18
- destination = Csvtool::Domain::RowRandomizationSession::RandomizationOutputDestination.console
18
+ destination = Csvtool::Domain::Shared::OutputDestination.console
19
19
 
20
20
  updated = session.with_output_destination(destination)
21
21
 
@@ -17,12 +17,26 @@ class RandomizationSourceTest < Minitest::Test
17
17
  end
18
18
 
19
19
  def test_rejects_empty_separator
20
- assert_raises(ArgumentError) do
20
+ error = assert_raises(ArgumentError) do
21
21
  Csvtool::Domain::RowRandomizationSession::RandomizationSource.new(
22
22
  path: "/tmp/a.csv",
23
23
  separator: "",
24
24
  headers_present: true
25
25
  )
26
26
  end
27
+
28
+ assert_equal "separator cannot be empty", error.message
29
+ end
30
+
31
+ def test_rejects_empty_path
32
+ error = assert_raises(ArgumentError) do
33
+ Csvtool::Domain::RowRandomizationSession::RandomizationSource.new(
34
+ path: "",
35
+ separator: ",",
36
+ headers_present: true
37
+ )
38
+ end
39
+
40
+ assert_equal "path cannot be empty", error.message
27
41
  end
28
42
  end
@@ -4,7 +4,7 @@ require_relative "../../../test_helper"
4
4
  require "csvtool/domain/row_session/row_session"
5
5
  require "csvtool/domain/row_session/row_source"
6
6
  require "csvtool/domain/row_session/row_range"
7
- require "csvtool/domain/row_session/row_output_destination"
7
+ require "csvtool/domain/shared/output_destination"
8
8
 
9
9
  class RowSessionTest < Minitest::Test
10
10
  def test_starts_and_sets_output_destination
@@ -12,7 +12,7 @@ class RowSessionTest < Minitest::Test
12
12
  row_range = Csvtool::Domain::RowSession::RowRange.new(start_row: 1, end_row: 2)
13
13
 
14
14
  session = Csvtool::Domain::RowSession::RowSession.start(source: source, row_range: row_range)
15
- destination = Csvtool::Domain::RowSession::RowOutputDestination.console
15
+ destination = Csvtool::Domain::Shared::OutputDestination.console
16
16
  updated = session.with_output_destination(destination)
17
17
 
18
18
  assert_equal source, updated.source
@@ -9,4 +9,20 @@ class RowSourceTest < Minitest::Test
9
9
  assert_equal "/tmp/a.csv", source.path
10
10
  assert_equal "\t", source.separator
11
11
  end
12
+
13
+ def test_rejects_empty_path
14
+ error = assert_raises(ArgumentError) do
15
+ Csvtool::Domain::RowSession::RowSource.new(path: "", separator: ",")
16
+ end
17
+
18
+ assert_equal "path cannot be empty", error.message
19
+ end
20
+
21
+ def test_rejects_empty_separator
22
+ error = assert_raises(ArgumentError) do
23
+ Csvtool::Domain::RowSession::RowSource.new(path: "/tmp/a.csv", separator: "")
24
+ end
25
+
26
+ assert_equal "separator cannot be empty", error.message
27
+ end
12
28
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/domain/shared/output_destination"
5
+
6
+ class SharedOutputDestinationTest < Minitest::Test
7
+ def test_builds_console_and_file_destinations
8
+ console = Csvtool::Domain::Shared::OutputDestination.console
9
+ file = Csvtool::Domain::Shared::OutputDestination.file(path: "/tmp/out.csv")
10
+
11
+ assert_equal true, console.console?
12
+ assert_equal false, console.file?
13
+ assert_equal true, file.file?
14
+ assert_equal "/tmp/out.csv", file.path
15
+ end
16
+
17
+ def test_rejects_empty_file_path
18
+ error = assert_raises(ArgumentError) do
19
+ Csvtool::Domain::Shared::OutputDestination.file(path: "")
20
+ end
21
+
22
+ assert_equal "file output path cannot be empty", error.message
23
+ end
24
+ end
@@ -0,0 +1,155 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/infrastructure/csv/cross_csv_deduper"
5
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
6
+ require "csvtool/domain/cross_csv_dedupe_session/match_options"
7
+ require "tmpdir"
8
+
9
+ class InfrastructureCrossCsvDeduperTest < Minitest::Test
10
+ def fixture_path(name)
11
+ File.expand_path("../../../fixtures/#{name}", __dir__)
12
+ end
13
+
14
+ def test_filters_source_rows_by_reference_column_values
15
+ deduper = Csvtool::Infrastructure::CSV::CrossCsvDeduper.new
16
+
17
+ result = deduper.call(
18
+ source_path: fixture_path("dedupe_source.csv"),
19
+ reference_path: fixture_path("dedupe_reference.csv"),
20
+ source_selector: header_selector("customer_id"),
21
+ reference_selector: header_selector("external_id"),
22
+ source_col_sep: ",",
23
+ reference_col_sep: ","
24
+ )
25
+
26
+ assert_equal ["customer_id", "name"], result[:headers]
27
+ assert_equal 5, result[:source_rows]
28
+ assert_equal 3, result[:removed_rows]
29
+ assert_equal 2, result[:kept_rows_count]
30
+ assert_equal [%w[1 Alice], %w[3 Cara]], result[:kept_rows]
31
+ end
32
+
33
+ def test_normalization_trim_on_case_off
34
+ deduper = Csvtool::Infrastructure::CSV::CrossCsvDeduper.new
35
+
36
+ result = deduper.call(
37
+ source_path: fixture_path("dedupe_source_normalization.csv"),
38
+ reference_path: fixture_path("dedupe_reference_normalization.csv"),
39
+ source_selector: header_selector("customer_id"),
40
+ reference_selector: header_selector("external_id"),
41
+ match_options: match_options(trim_whitespace: true, case_insensitive: false)
42
+ )
43
+
44
+ assert_equal 3, result[:kept_rows_count]
45
+ end
46
+
47
+ def test_normalization_trim_on_case_on
48
+ deduper = Csvtool::Infrastructure::CSV::CrossCsvDeduper.new
49
+
50
+ result = deduper.call(
51
+ source_path: fixture_path("dedupe_source_normalization.csv"),
52
+ reference_path: fixture_path("dedupe_reference_normalization.csv"),
53
+ source_selector: header_selector("customer_id"),
54
+ reference_selector: header_selector("external_id"),
55
+ match_options: match_options(trim_whitespace: true, case_insensitive: true)
56
+ )
57
+
58
+ assert_equal 1, result[:kept_rows_count]
59
+ assert_equal [%w[B2 Bob]], result[:kept_rows]
60
+ end
61
+
62
+ def test_normalization_trim_off_case_on
63
+ deduper = Csvtool::Infrastructure::CSV::CrossCsvDeduper.new
64
+
65
+ result = deduper.call(
66
+ source_path: fixture_path("dedupe_source_normalization.csv"),
67
+ reference_path: fixture_path("dedupe_reference_normalization.csv"),
68
+ source_selector: header_selector("customer_id"),
69
+ reference_selector: header_selector("external_id"),
70
+ match_options: match_options(trim_whitespace: false, case_insensitive: true)
71
+ )
72
+
73
+ assert_equal 2, result[:kept_rows_count]
74
+ assert_equal [[" A1 ", "Alice"], %w[B2 Bob]], result[:kept_rows]
75
+ end
76
+
77
+ def test_normalization_trim_off_case_off
78
+ deduper = Csvtool::Infrastructure::CSV::CrossCsvDeduper.new
79
+
80
+ result = deduper.call(
81
+ source_path: fixture_path("dedupe_source_normalization.csv"),
82
+ reference_path: fixture_path("dedupe_reference_normalization.csv"),
83
+ source_selector: header_selector("customer_id"),
84
+ reference_selector: header_selector("external_id"),
85
+ match_options: match_options(trim_whitespace: false, case_insensitive: false)
86
+ )
87
+
88
+ assert_equal 3, result[:kept_rows_count]
89
+ end
90
+
91
+ def test_each_retained_streams_rows_and_reports_stats
92
+ deduper = Csvtool::Infrastructure::CSV::CrossCsvDeduper.new
93
+ yielded_rows = []
94
+
95
+ result = deduper.each_retained(
96
+ source_path: fixture_path("dedupe_source.csv"),
97
+ reference_path: fixture_path("dedupe_reference.csv"),
98
+ source_selector: header_selector("customer_id"),
99
+ reference_selector: header_selector("external_id")
100
+ ) { |fields| yielded_rows << fields }
101
+
102
+ assert_equal [%w[1 Alice], %w[3 Cara]], yielded_rows
103
+ assert_equal 5, result[:source_rows]
104
+ assert_equal 3, result[:removed_rows]
105
+ assert_equal 2, result[:kept_rows_count]
106
+ refute_includes result.keys, :kept_rows
107
+ end
108
+
109
+ def test_each_retained_supports_large_inputs_with_streaming
110
+ deduper = Csvtool::Infrastructure::CSV::CrossCsvDeduper.new
111
+
112
+ Dir.mktmpdir do |dir|
113
+ source_path = File.join(dir, "source.csv")
114
+ reference_path = File.join(dir, "reference.csv")
115
+
116
+ File.open(source_path, "w") do |file|
117
+ file.puts "id,name"
118
+ 10_000.times { |index| file.puts "#{index},name#{index}" }
119
+ end
120
+
121
+ File.open(reference_path, "w") do |file|
122
+ file.puts "external_id"
123
+ 10_000.times do |index|
124
+ file.puts index.to_s if (index % 2).zero?
125
+ end
126
+ end
127
+
128
+ yielded_count = 0
129
+ result = deduper.each_retained(
130
+ source_path: source_path,
131
+ reference_path: reference_path,
132
+ source_selector: header_selector("id"),
133
+ reference_selector: header_selector("external_id")
134
+ ) { |_fields| yielded_count += 1 }
135
+
136
+ assert_equal 10_000, result[:source_rows]
137
+ assert_equal 5_000, result[:removed_rows]
138
+ assert_equal 5_000, result[:kept_rows_count]
139
+ assert_equal 5_000, yielded_count
140
+ end
141
+ end
142
+
143
+ private
144
+
145
+ def header_selector(name)
146
+ Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: name)
147
+ end
148
+
149
+ def match_options(trim_whitespace:, case_insensitive:)
150
+ Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
151
+ trim_whitespace: trim_whitespace,
152
+ case_insensitive: case_insensitive
153
+ )
154
+ end
155
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/infrastructure/csv/selector_validator"
5
+ require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
6
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
7
+
8
+ class InfrastructureSelectorValidatorTest < Minitest::Test
9
+ def fixture_path(name)
10
+ File.expand_path("../../../fixtures/#{name}", __dir__)
11
+ end
12
+
13
+ def test_accepts_header_selector_when_column_exists
14
+ validator = Csvtool::Infrastructure::CSV::SelectorValidator.new
15
+ profile = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
16
+ path: fixture_path("dedupe_source.csv"),
17
+ separator: ",",
18
+ headers_present: true
19
+ )
20
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(
21
+ headers_present: true,
22
+ input: "customer_id"
23
+ )
24
+
25
+ assert_equal true, validator.valid?(profile: profile, selector: selector)
26
+ end
27
+
28
+ def test_rejects_header_selector_when_column_missing
29
+ validator = Csvtool::Infrastructure::CSV::SelectorValidator.new
30
+ profile = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
31
+ path: fixture_path("dedupe_source.csv"),
32
+ separator: ",",
33
+ headers_present: true
34
+ )
35
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(
36
+ headers_present: true,
37
+ input: "missing"
38
+ )
39
+
40
+ assert_equal false, validator.valid?(profile: profile, selector: selector)
41
+ end
42
+
43
+ def test_accepts_index_selector_when_in_range
44
+ validator = Csvtool::Infrastructure::CSV::SelectorValidator.new
45
+ profile = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
46
+ path: fixture_path("dedupe_source_no_headers.csv"),
47
+ separator: ",",
48
+ headers_present: false
49
+ )
50
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(
51
+ headers_present: false,
52
+ input: "2"
53
+ )
54
+
55
+ assert_equal true, validator.valid?(profile: profile, selector: selector)
56
+ end
57
+
58
+ def test_rejects_index_selector_when_out_of_range
59
+ validator = Csvtool::Infrastructure::CSV::SelectorValidator.new
60
+ profile = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
61
+ path: fixture_path("dedupe_source_no_headers.csv"),
62
+ separator: ",",
63
+ headers_present: false
64
+ )
65
+ selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(
66
+ headers_present: false,
67
+ input: "9"
68
+ )
69
+
70
+ assert_equal false, validator.valid?(profile: profile, selector: selector)
71
+ end
72
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer"
5
+ require "tmpdir"
6
+
7
+ class InfrastructureCsvCrossCsvDedupeFileWriterTest < Minitest::Test
8
+ class FakeDeduper
9
+ def each_retained(**_kwargs)
10
+ yield %w[1 Alice]
11
+ yield %w[3 Cara]
12
+ { source_rows: 5, removed_rows: 3, kept_rows_count: 2 }
13
+ end
14
+ end
15
+
16
+ def test_writes_retained_rows_and_returns_stats
17
+ writer = Csvtool::Infrastructure::Output::CsvCrossCsvDedupeFileWriter.new(deduper: FakeDeduper.new)
18
+
19
+ Dir.mktmpdir do |dir|
20
+ output_path = File.join(dir, "deduped.csv")
21
+ stats = writer.call(
22
+ path: output_path,
23
+ headers: ["customer_id", "name"],
24
+ col_sep: ",",
25
+ dedupe_options: { source_path: "source.csv", reference_path: "reference.csv" }
26
+ )
27
+
28
+ assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
29
+ assert_equal 2, stats[:kept_rows_count]
30
+ end
31
+ end
32
+ end
@@ -2,7 +2,6 @@
2
2
 
3
3
  require_relative "../../../test_helper"
4
4
  require "csvtool/infrastructure/output/csv_file_writer"
5
- require "csvtool/interface/cli/errors/presenter"
6
5
  require "tmpdir"
7
6
 
8
7
  class InfrastructureCsvFileWriterTest < Minitest::Test
@@ -13,10 +12,7 @@ class InfrastructureCsvFileWriterTest < Minitest::Test
13
12
  end
14
13
 
15
14
  def test_writes_header_and_values
16
- stdout = StringIO.new
17
15
  writer = Csvtool::Infrastructure::Output::CsvFileWriter.new(
18
- stdout: stdout,
19
- errors: Csvtool::Interface::CLI::Errors::Presenter.new(stdout: stdout),
20
16
  value_streamer: FakeStreamer.new
21
17
  )
22
18
 
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/infrastructure/output/csv_randomized_row_file_writer"
5
+ require "tmpdir"
6
+
7
+ class InfrastructureCsvRandomizedRowFileWriterTest < Minitest::Test
8
+ class FakeRandomizer
9
+ def each(file_path:, col_sep:, headers:, seed:)
10
+ yield ["Bob", "Paris"]
11
+ yield ["Cara", "Berlin"]
12
+ end
13
+ end
14
+
15
+ def test_writes_randomized_rows_with_headers
16
+ writer = Csvtool::Infrastructure::Output::CsvRandomizedRowFileWriter.new(row_randomizer: FakeRandomizer.new)
17
+
18
+ Dir.mktmpdir do |dir|
19
+ output_path = File.join(dir, "randomized.csv")
20
+ writer.call(
21
+ path: output_path,
22
+ headers: ["name", "city"],
23
+ file_path: "ignored.csv",
24
+ col_sep: ",",
25
+ headers_present: true,
26
+ seed: 123
27
+ )
28
+
29
+ assert_equal "name,city\nBob,Paris\nCara,Berlin\n", File.read(output_path)
30
+ end
31
+ end
32
+ end
@@ -2,7 +2,6 @@
2
2
 
3
3
  require_relative "../../../test_helper"
4
4
  require "csvtool/infrastructure/output/csv_row_file_writer"
5
- require "csvtool/interface/cli/errors/presenter"
6
5
  require "tmpdir"
7
6
 
8
7
  class InfrastructureCsvRowFileWriterTest < Minitest::Test
@@ -15,10 +14,7 @@ class InfrastructureCsvRowFileWriterTest < Minitest::Test
15
14
  end
16
15
 
17
16
  def test_writes_header_and_rows_to_file
18
- stdout = StringIO.new
19
17
  writer = Csvtool::Infrastructure::Output::CsvRowFileWriter.new(
20
- stdout: stdout,
21
- errors: Csvtool::Interface::CLI::Errors::Presenter.new(stdout: stdout),
22
18
  row_streamer: FakeRowStreamer.new
23
19
  )
24
20
 
@@ -35,6 +31,7 @@ class InfrastructureCsvRowFileWriterTest < Minitest::Test
35
31
 
36
32
  assert_equal "name,city\nBob,Paris\nCara,Berlin\n", File.read(output_path)
37
33
  assert_equal true, stats[:matched]
34
+ assert_equal true, stats[:wrote_rows]
38
35
  end
39
36
  end
40
37
  end