csvops 0.3.0.alpha → 0.5.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -149
- data/docs/architecture.md +396 -0
- data/docs/release-v0.4.0-alpha.md +87 -0
- data/docs/release-v0.5.0-alpha.md +89 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +96 -0
- data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
- data/lib/csvtool/cli.rb +11 -7
- data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
- data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +1 -0
- data/lib/csvtool/domain/row_session/row_source.rb +3 -0
- data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
- data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
- data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
- data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
- data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
- data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
- data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
- data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
- data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +141 -0
- data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
- data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
- data/test/csvtool/cli_test.rb +130 -16
- data/test/csvtool/cli_unit_test.rb +16 -3
- data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
- data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
- data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +2 -2
- data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +15 -1
- data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
- data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
- data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
- data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
- data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
- data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
- data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
- data/test/csvtool/interface/cli/menu_loop_test.rb +50 -13
- data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
- data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
- data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
- data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
- data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
- data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
- data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
- data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
- data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
- data/test/fixtures/dedupe_reference.csv +3 -0
- data/test/fixtures/dedupe_reference.tsv +3 -0
- data/test/fixtures/dedupe_reference_all.csv +5 -0
- data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
- data/test/fixtures/dedupe_reference_none.csv +2 -0
- data/test/fixtures/dedupe_reference_normalization.csv +3 -0
- data/test/fixtures/dedupe_source.csv +6 -0
- data/test/fixtures/dedupe_source.tsv +6 -0
- data/test/fixtures/dedupe_source_no_headers.csv +5 -0
- data/test/fixtures/dedupe_source_normalization.csv +4 -0
- metadata +93 -8
- data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +0 -31
- data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
- data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
- data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +0 -21
- data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
|
|
6
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
7
|
+
require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
|
|
8
|
+
require "csvtool/domain/cross_csv_dedupe_session/match_options"
|
|
9
|
+
require "csvtool/domain/shared/output_destination"
|
|
10
|
+
|
|
11
|
+
class CrossCsvDedupeSessionTest < Minitest::Test
|
|
12
|
+
def test_start_and_with_output_destination
|
|
13
|
+
source = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
14
|
+
path: "/tmp/source.csv",
|
|
15
|
+
separator: ",",
|
|
16
|
+
headers_present: true
|
|
17
|
+
)
|
|
18
|
+
reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
19
|
+
path: "/tmp/reference.csv",
|
|
20
|
+
separator: ",",
|
|
21
|
+
headers_present: true
|
|
22
|
+
)
|
|
23
|
+
key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
24
|
+
source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id"),
|
|
25
|
+
reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "reference_id")
|
|
26
|
+
)
|
|
27
|
+
match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
28
|
+
trim_whitespace: true,
|
|
29
|
+
case_insensitive: false
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
session = Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
|
|
33
|
+
source: source,
|
|
34
|
+
reference: reference,
|
|
35
|
+
key_mapping: key_mapping,
|
|
36
|
+
match_options: match_options
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
destination = Csvtool::Domain::Shared::OutputDestination.console
|
|
40
|
+
updated = session.with_output_destination(destination)
|
|
41
|
+
|
|
42
|
+
assert_equal source, updated.source
|
|
43
|
+
assert_equal reference, updated.reference
|
|
44
|
+
assert_equal key_mapping, updated.key_mapping
|
|
45
|
+
assert_equal match_options, updated.match_options
|
|
46
|
+
assert_equal destination, updated.output_destination
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def test_rejects_invalid_source_type
|
|
50
|
+
reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
51
|
+
path: "/tmp/reference.csv",
|
|
52
|
+
separator: ",",
|
|
53
|
+
headers_present: true
|
|
54
|
+
)
|
|
55
|
+
key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
56
|
+
source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id"),
|
|
57
|
+
reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "reference_id")
|
|
58
|
+
)
|
|
59
|
+
match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
60
|
+
trim_whitespace: true,
|
|
61
|
+
case_insensitive: false
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
error = assert_raises(ArgumentError) do
|
|
65
|
+
Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
|
|
66
|
+
source: "bad",
|
|
67
|
+
reference: reference,
|
|
68
|
+
key_mapping: key_mapping,
|
|
69
|
+
match_options: match_options
|
|
70
|
+
)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
assert_equal "source must be CsvProfile", error.message
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
|
|
5
|
+
|
|
6
|
+
class CrossCsvDedupeCsvProfileTest < Minitest::Test
|
|
7
|
+
def test_initializes_with_expected_fields
|
|
8
|
+
profile = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
9
|
+
path: "/tmp/source.csv",
|
|
10
|
+
separator: ",",
|
|
11
|
+
headers_present: true
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
assert_equal "/tmp/source.csv", profile.path
|
|
15
|
+
assert_equal ",", profile.separator
|
|
16
|
+
assert_equal true, profile.headers_present?
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def test_requires_path
|
|
20
|
+
error = assert_raises(ArgumentError) do
|
|
21
|
+
Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(path: "", separator: ",", headers_present: true)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
assert_equal "path cannot be empty", error.message
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
6
|
+
|
|
7
|
+
class CrossCsvDedupeKeyMappingTest < Minitest::Test
|
|
8
|
+
def test_holds_source_and_reference_selectors
|
|
9
|
+
source_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "source_id")
|
|
10
|
+
reference_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "ref_id")
|
|
11
|
+
|
|
12
|
+
mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
13
|
+
source_selector: source_selector,
|
|
14
|
+
reference_selector: reference_selector
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
assert_equal source_selector, mapping.source_selector
|
|
18
|
+
assert_equal reference_selector, mapping.reference_selector
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def test_rejects_non_selector_inputs
|
|
22
|
+
error = assert_raises(ArgumentError) do
|
|
23
|
+
Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
24
|
+
source_selector: "id",
|
|
25
|
+
reference_selector: "external_id"
|
|
26
|
+
)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
assert_equal "selectors must be ColumnSelector", error.message
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/match_options"
|
|
5
|
+
|
|
6
|
+
class CrossCsvDedupeMatchOptionsTest < Minitest::Test
|
|
7
|
+
def test_predicates_return_boolean_flags
|
|
8
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
9
|
+
trim_whitespace: true,
|
|
10
|
+
case_insensitive: false
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
assert_equal true, options.trim_whitespace?
|
|
14
|
+
assert_equal false, options.case_insensitive?
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def test_normalize_trim_on_case_off
|
|
18
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
19
|
+
trim_whitespace: true,
|
|
20
|
+
case_insensitive: false
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
assert_equal "AbC", options.normalize(" AbC ")
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def test_normalize_trim_on_case_on
|
|
27
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
28
|
+
trim_whitespace: true,
|
|
29
|
+
case_insensitive: true
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
assert_equal "abc", options.normalize(" AbC ")
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def test_normalize_trim_off_case_on
|
|
36
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
37
|
+
trim_whitespace: false,
|
|
38
|
+
case_insensitive: true
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
assert_equal " abc ", options.normalize(" AbC ")
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def test_normalize_trim_off_case_off
|
|
45
|
+
options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
46
|
+
trim_whitespace: false,
|
|
47
|
+
case_insensitive: false
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
assert_equal " AbC ", options.normalize(" AbC ")
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -4,7 +4,7 @@ require_relative "../../../test_helper"
|
|
|
4
4
|
require "csvtool/domain/row_randomization_session/randomization_session"
|
|
5
5
|
require "csvtool/domain/row_randomization_session/randomization_source"
|
|
6
6
|
require "csvtool/domain/row_randomization_session/randomization_options"
|
|
7
|
-
require "csvtool/domain/
|
|
7
|
+
require "csvtool/domain/shared/output_destination"
|
|
8
8
|
|
|
9
9
|
class RandomizationSessionTest < Minitest::Test
|
|
10
10
|
def test_with_output_destination_returns_updated_session
|
|
@@ -15,7 +15,7 @@ class RandomizationSessionTest < Minitest::Test
|
|
|
15
15
|
)
|
|
16
16
|
options = Csvtool::Domain::RowRandomizationSession::RandomizationOptions.new(seed: 7)
|
|
17
17
|
session = Csvtool::Domain::RowRandomizationSession::RandomizationSession.start(source: source, options: options)
|
|
18
|
-
destination = Csvtool::Domain::
|
|
18
|
+
destination = Csvtool::Domain::Shared::OutputDestination.console
|
|
19
19
|
|
|
20
20
|
updated = session.with_output_destination(destination)
|
|
21
21
|
|
|
@@ -17,12 +17,26 @@ class RandomizationSourceTest < Minitest::Test
|
|
|
17
17
|
end
|
|
18
18
|
|
|
19
19
|
def test_rejects_empty_separator
|
|
20
|
-
assert_raises(ArgumentError) do
|
|
20
|
+
error = assert_raises(ArgumentError) do
|
|
21
21
|
Csvtool::Domain::RowRandomizationSession::RandomizationSource.new(
|
|
22
22
|
path: "/tmp/a.csv",
|
|
23
23
|
separator: "",
|
|
24
24
|
headers_present: true
|
|
25
25
|
)
|
|
26
26
|
end
|
|
27
|
+
|
|
28
|
+
assert_equal "separator cannot be empty", error.message
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def test_rejects_empty_path
|
|
32
|
+
error = assert_raises(ArgumentError) do
|
|
33
|
+
Csvtool::Domain::RowRandomizationSession::RandomizationSource.new(
|
|
34
|
+
path: "",
|
|
35
|
+
separator: ",",
|
|
36
|
+
headers_present: true
|
|
37
|
+
)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
assert_equal "path cannot be empty", error.message
|
|
27
41
|
end
|
|
28
42
|
end
|
|
@@ -4,7 +4,7 @@ require_relative "../../../test_helper"
|
|
|
4
4
|
require "csvtool/domain/row_session/row_session"
|
|
5
5
|
require "csvtool/domain/row_session/row_source"
|
|
6
6
|
require "csvtool/domain/row_session/row_range"
|
|
7
|
-
require "csvtool/domain/
|
|
7
|
+
require "csvtool/domain/shared/output_destination"
|
|
8
8
|
|
|
9
9
|
class RowSessionTest < Minitest::Test
|
|
10
10
|
def test_starts_and_sets_output_destination
|
|
@@ -12,7 +12,7 @@ class RowSessionTest < Minitest::Test
|
|
|
12
12
|
row_range = Csvtool::Domain::RowSession::RowRange.new(start_row: 1, end_row: 2)
|
|
13
13
|
|
|
14
14
|
session = Csvtool::Domain::RowSession::RowSession.start(source: source, row_range: row_range)
|
|
15
|
-
destination = Csvtool::Domain::
|
|
15
|
+
destination = Csvtool::Domain::Shared::OutputDestination.console
|
|
16
16
|
updated = session.with_output_destination(destination)
|
|
17
17
|
|
|
18
18
|
assert_equal source, updated.source
|
|
@@ -9,4 +9,20 @@ class RowSourceTest < Minitest::Test
|
|
|
9
9
|
assert_equal "/tmp/a.csv", source.path
|
|
10
10
|
assert_equal "\t", source.separator
|
|
11
11
|
end
|
|
12
|
+
|
|
13
|
+
def test_rejects_empty_path
|
|
14
|
+
error = assert_raises(ArgumentError) do
|
|
15
|
+
Csvtool::Domain::RowSession::RowSource.new(path: "", separator: ",")
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
assert_equal "path cannot be empty", error.message
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def test_rejects_empty_separator
|
|
22
|
+
error = assert_raises(ArgumentError) do
|
|
23
|
+
Csvtool::Domain::RowSession::RowSource.new(path: "/tmp/a.csv", separator: "")
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
assert_equal "separator cannot be empty", error.message
|
|
27
|
+
end
|
|
12
28
|
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/shared/output_destination"
|
|
5
|
+
|
|
6
|
+
class SharedOutputDestinationTest < Minitest::Test
|
|
7
|
+
def test_builds_console_and_file_destinations
|
|
8
|
+
console = Csvtool::Domain::Shared::OutputDestination.console
|
|
9
|
+
file = Csvtool::Domain::Shared::OutputDestination.file(path: "/tmp/out.csv")
|
|
10
|
+
|
|
11
|
+
assert_equal true, console.console?
|
|
12
|
+
assert_equal false, console.file?
|
|
13
|
+
assert_equal true, file.file?
|
|
14
|
+
assert_equal "/tmp/out.csv", file.path
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def test_rejects_empty_file_path
|
|
18
|
+
error = assert_raises(ArgumentError) do
|
|
19
|
+
Csvtool::Domain::Shared::OutputDestination.file(path: "")
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
assert_equal "file output path cannot be empty", error.message
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/infrastructure/csv/cross_csv_deduper"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
6
|
+
require "csvtool/domain/cross_csv_dedupe_session/match_options"
|
|
7
|
+
require "tmpdir"
|
|
8
|
+
|
|
9
|
+
class InfrastructureCrossCsvDeduperTest < Minitest::Test
|
|
10
|
+
def fixture_path(name)
|
|
11
|
+
File.expand_path("../../../fixtures/#{name}", __dir__)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def test_filters_source_rows_by_reference_column_values
|
|
15
|
+
deduper = Csvtool::Infrastructure::CSV::CrossCsvDeduper.new
|
|
16
|
+
|
|
17
|
+
result = deduper.call(
|
|
18
|
+
source_path: fixture_path("dedupe_source.csv"),
|
|
19
|
+
reference_path: fixture_path("dedupe_reference.csv"),
|
|
20
|
+
source_selector: header_selector("customer_id"),
|
|
21
|
+
reference_selector: header_selector("external_id"),
|
|
22
|
+
source_col_sep: ",",
|
|
23
|
+
reference_col_sep: ","
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
assert_equal ["customer_id", "name"], result[:headers]
|
|
27
|
+
assert_equal 5, result[:source_rows]
|
|
28
|
+
assert_equal 3, result[:removed_rows]
|
|
29
|
+
assert_equal 2, result[:kept_rows_count]
|
|
30
|
+
assert_equal [%w[1 Alice], %w[3 Cara]], result[:kept_rows]
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def test_normalization_trim_on_case_off
|
|
34
|
+
deduper = Csvtool::Infrastructure::CSV::CrossCsvDeduper.new
|
|
35
|
+
|
|
36
|
+
result = deduper.call(
|
|
37
|
+
source_path: fixture_path("dedupe_source_normalization.csv"),
|
|
38
|
+
reference_path: fixture_path("dedupe_reference_normalization.csv"),
|
|
39
|
+
source_selector: header_selector("customer_id"),
|
|
40
|
+
reference_selector: header_selector("external_id"),
|
|
41
|
+
match_options: match_options(trim_whitespace: true, case_insensitive: false)
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
assert_equal 3, result[:kept_rows_count]
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def test_normalization_trim_on_case_on
|
|
48
|
+
deduper = Csvtool::Infrastructure::CSV::CrossCsvDeduper.new
|
|
49
|
+
|
|
50
|
+
result = deduper.call(
|
|
51
|
+
source_path: fixture_path("dedupe_source_normalization.csv"),
|
|
52
|
+
reference_path: fixture_path("dedupe_reference_normalization.csv"),
|
|
53
|
+
source_selector: header_selector("customer_id"),
|
|
54
|
+
reference_selector: header_selector("external_id"),
|
|
55
|
+
match_options: match_options(trim_whitespace: true, case_insensitive: true)
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
assert_equal 1, result[:kept_rows_count]
|
|
59
|
+
assert_equal [%w[B2 Bob]], result[:kept_rows]
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def test_normalization_trim_off_case_on
|
|
63
|
+
deduper = Csvtool::Infrastructure::CSV::CrossCsvDeduper.new
|
|
64
|
+
|
|
65
|
+
result = deduper.call(
|
|
66
|
+
source_path: fixture_path("dedupe_source_normalization.csv"),
|
|
67
|
+
reference_path: fixture_path("dedupe_reference_normalization.csv"),
|
|
68
|
+
source_selector: header_selector("customer_id"),
|
|
69
|
+
reference_selector: header_selector("external_id"),
|
|
70
|
+
match_options: match_options(trim_whitespace: false, case_insensitive: true)
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
assert_equal 2, result[:kept_rows_count]
|
|
74
|
+
assert_equal [[" A1 ", "Alice"], %w[B2 Bob]], result[:kept_rows]
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def test_normalization_trim_off_case_off
|
|
78
|
+
deduper = Csvtool::Infrastructure::CSV::CrossCsvDeduper.new
|
|
79
|
+
|
|
80
|
+
result = deduper.call(
|
|
81
|
+
source_path: fixture_path("dedupe_source_normalization.csv"),
|
|
82
|
+
reference_path: fixture_path("dedupe_reference_normalization.csv"),
|
|
83
|
+
source_selector: header_selector("customer_id"),
|
|
84
|
+
reference_selector: header_selector("external_id"),
|
|
85
|
+
match_options: match_options(trim_whitespace: false, case_insensitive: false)
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
assert_equal 3, result[:kept_rows_count]
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def test_each_retained_streams_rows_and_reports_stats
|
|
92
|
+
deduper = Csvtool::Infrastructure::CSV::CrossCsvDeduper.new
|
|
93
|
+
yielded_rows = []
|
|
94
|
+
|
|
95
|
+
result = deduper.each_retained(
|
|
96
|
+
source_path: fixture_path("dedupe_source.csv"),
|
|
97
|
+
reference_path: fixture_path("dedupe_reference.csv"),
|
|
98
|
+
source_selector: header_selector("customer_id"),
|
|
99
|
+
reference_selector: header_selector("external_id")
|
|
100
|
+
) { |fields| yielded_rows << fields }
|
|
101
|
+
|
|
102
|
+
assert_equal [%w[1 Alice], %w[3 Cara]], yielded_rows
|
|
103
|
+
assert_equal 5, result[:source_rows]
|
|
104
|
+
assert_equal 3, result[:removed_rows]
|
|
105
|
+
assert_equal 2, result[:kept_rows_count]
|
|
106
|
+
refute_includes result.keys, :kept_rows
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def test_each_retained_supports_large_inputs_with_streaming
|
|
110
|
+
deduper = Csvtool::Infrastructure::CSV::CrossCsvDeduper.new
|
|
111
|
+
|
|
112
|
+
Dir.mktmpdir do |dir|
|
|
113
|
+
source_path = File.join(dir, "source.csv")
|
|
114
|
+
reference_path = File.join(dir, "reference.csv")
|
|
115
|
+
|
|
116
|
+
File.open(source_path, "w") do |file|
|
|
117
|
+
file.puts "id,name"
|
|
118
|
+
10_000.times { |index| file.puts "#{index},name#{index}" }
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
File.open(reference_path, "w") do |file|
|
|
122
|
+
file.puts "external_id"
|
|
123
|
+
10_000.times do |index|
|
|
124
|
+
file.puts index.to_s if (index % 2).zero?
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
yielded_count = 0
|
|
129
|
+
result = deduper.each_retained(
|
|
130
|
+
source_path: source_path,
|
|
131
|
+
reference_path: reference_path,
|
|
132
|
+
source_selector: header_selector("id"),
|
|
133
|
+
reference_selector: header_selector("external_id")
|
|
134
|
+
) { |_fields| yielded_count += 1 }
|
|
135
|
+
|
|
136
|
+
assert_equal 10_000, result[:source_rows]
|
|
137
|
+
assert_equal 5_000, result[:removed_rows]
|
|
138
|
+
assert_equal 5_000, result[:kept_rows_count]
|
|
139
|
+
assert_equal 5_000, yielded_count
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
private
|
|
144
|
+
|
|
145
|
+
def header_selector(name)
|
|
146
|
+
Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: name)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def match_options(trim_whitespace:, case_insensitive:)
|
|
150
|
+
Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
151
|
+
trim_whitespace: trim_whitespace,
|
|
152
|
+
case_insensitive: case_insensitive
|
|
153
|
+
)
|
|
154
|
+
end
|
|
155
|
+
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/infrastructure/csv/selector_validator"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
|
|
6
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
7
|
+
|
|
8
|
+
class InfrastructureSelectorValidatorTest < Minitest::Test
|
|
9
|
+
def fixture_path(name)
|
|
10
|
+
File.expand_path("../../../fixtures/#{name}", __dir__)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def test_accepts_header_selector_when_column_exists
|
|
14
|
+
validator = Csvtool::Infrastructure::CSV::SelectorValidator.new
|
|
15
|
+
profile = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
16
|
+
path: fixture_path("dedupe_source.csv"),
|
|
17
|
+
separator: ",",
|
|
18
|
+
headers_present: true
|
|
19
|
+
)
|
|
20
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(
|
|
21
|
+
headers_present: true,
|
|
22
|
+
input: "customer_id"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
assert_equal true, validator.valid?(profile: profile, selector: selector)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def test_rejects_header_selector_when_column_missing
|
|
29
|
+
validator = Csvtool::Infrastructure::CSV::SelectorValidator.new
|
|
30
|
+
profile = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
31
|
+
path: fixture_path("dedupe_source.csv"),
|
|
32
|
+
separator: ",",
|
|
33
|
+
headers_present: true
|
|
34
|
+
)
|
|
35
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(
|
|
36
|
+
headers_present: true,
|
|
37
|
+
input: "missing"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
assert_equal false, validator.valid?(profile: profile, selector: selector)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def test_accepts_index_selector_when_in_range
|
|
44
|
+
validator = Csvtool::Infrastructure::CSV::SelectorValidator.new
|
|
45
|
+
profile = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
46
|
+
path: fixture_path("dedupe_source_no_headers.csv"),
|
|
47
|
+
separator: ",",
|
|
48
|
+
headers_present: false
|
|
49
|
+
)
|
|
50
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(
|
|
51
|
+
headers_present: false,
|
|
52
|
+
input: "2"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
assert_equal true, validator.valid?(profile: profile, selector: selector)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def test_rejects_index_selector_when_out_of_range
|
|
59
|
+
validator = Csvtool::Infrastructure::CSV::SelectorValidator.new
|
|
60
|
+
profile = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
61
|
+
path: fixture_path("dedupe_source_no_headers.csv"),
|
|
62
|
+
separator: ",",
|
|
63
|
+
headers_present: false
|
|
64
|
+
)
|
|
65
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(
|
|
66
|
+
headers_present: false,
|
|
67
|
+
input: "9"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
assert_equal false, validator.valid?(profile: profile, selector: selector)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer"
|
|
5
|
+
require "tmpdir"
|
|
6
|
+
|
|
7
|
+
class InfrastructureCsvCrossCsvDedupeFileWriterTest < Minitest::Test
|
|
8
|
+
class FakeDeduper
|
|
9
|
+
def each_retained(**_kwargs)
|
|
10
|
+
yield %w[1 Alice]
|
|
11
|
+
yield %w[3 Cara]
|
|
12
|
+
{ source_rows: 5, removed_rows: 3, kept_rows_count: 2 }
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_writes_retained_rows_and_returns_stats
|
|
17
|
+
writer = Csvtool::Infrastructure::Output::CsvCrossCsvDedupeFileWriter.new(deduper: FakeDeduper.new)
|
|
18
|
+
|
|
19
|
+
Dir.mktmpdir do |dir|
|
|
20
|
+
output_path = File.join(dir, "deduped.csv")
|
|
21
|
+
stats = writer.call(
|
|
22
|
+
path: output_path,
|
|
23
|
+
headers: ["customer_id", "name"],
|
|
24
|
+
col_sep: ",",
|
|
25
|
+
dedupe_options: { source_path: "source.csv", reference_path: "reference.csv" }
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
|
|
29
|
+
assert_equal 2, stats[:kept_rows_count]
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "../../../test_helper"
|
|
4
4
|
require "csvtool/infrastructure/output/csv_file_writer"
|
|
5
|
-
require "csvtool/interface/cli/errors/presenter"
|
|
6
5
|
require "tmpdir"
|
|
7
6
|
|
|
8
7
|
class InfrastructureCsvFileWriterTest < Minitest::Test
|
|
@@ -13,10 +12,7 @@ class InfrastructureCsvFileWriterTest < Minitest::Test
|
|
|
13
12
|
end
|
|
14
13
|
|
|
15
14
|
def test_writes_header_and_values
|
|
16
|
-
stdout = StringIO.new
|
|
17
15
|
writer = Csvtool::Infrastructure::Output::CsvFileWriter.new(
|
|
18
|
-
stdout: stdout,
|
|
19
|
-
errors: Csvtool::Interface::CLI::Errors::Presenter.new(stdout: stdout),
|
|
20
16
|
value_streamer: FakeStreamer.new
|
|
21
17
|
)
|
|
22
18
|
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/infrastructure/output/csv_randomized_row_file_writer"
|
|
5
|
+
require "tmpdir"
|
|
6
|
+
|
|
7
|
+
class InfrastructureCsvRandomizedRowFileWriterTest < Minitest::Test
|
|
8
|
+
class FakeRandomizer
|
|
9
|
+
def each(file_path:, col_sep:, headers:, seed:)
|
|
10
|
+
yield ["Bob", "Paris"]
|
|
11
|
+
yield ["Cara", "Berlin"]
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def test_writes_randomized_rows_with_headers
|
|
16
|
+
writer = Csvtool::Infrastructure::Output::CsvRandomizedRowFileWriter.new(row_randomizer: FakeRandomizer.new)
|
|
17
|
+
|
|
18
|
+
Dir.mktmpdir do |dir|
|
|
19
|
+
output_path = File.join(dir, "randomized.csv")
|
|
20
|
+
writer.call(
|
|
21
|
+
path: output_path,
|
|
22
|
+
headers: ["name", "city"],
|
|
23
|
+
file_path: "ignored.csv",
|
|
24
|
+
col_sep: ",",
|
|
25
|
+
headers_present: true,
|
|
26
|
+
seed: 123
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
assert_equal "name,city\nBob,Paris\nCara,Berlin\n", File.read(output_path)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "../../../test_helper"
|
|
4
4
|
require "csvtool/infrastructure/output/csv_row_file_writer"
|
|
5
|
-
require "csvtool/interface/cli/errors/presenter"
|
|
6
5
|
require "tmpdir"
|
|
7
6
|
|
|
8
7
|
class InfrastructureCsvRowFileWriterTest < Minitest::Test
|
|
@@ -15,10 +14,7 @@ class InfrastructureCsvRowFileWriterTest < Minitest::Test
|
|
|
15
14
|
end
|
|
16
15
|
|
|
17
16
|
def test_writes_header_and_rows_to_file
|
|
18
|
-
stdout = StringIO.new
|
|
19
17
|
writer = Csvtool::Infrastructure::Output::CsvRowFileWriter.new(
|
|
20
|
-
stdout: stdout,
|
|
21
|
-
errors: Csvtool::Interface::CLI::Errors::Presenter.new(stdout: stdout),
|
|
22
18
|
row_streamer: FakeRowStreamer.new
|
|
23
19
|
)
|
|
24
20
|
|
|
@@ -35,6 +31,7 @@ class InfrastructureCsvRowFileWriterTest < Minitest::Test
|
|
|
35
31
|
|
|
36
32
|
assert_equal "name,city\nBob,Paris\nCara,Berlin\n", File.read(output_path)
|
|
37
33
|
assert_equal true, stats[:matched]
|
|
34
|
+
assert_equal true, stats[:wrote_rows]
|
|
38
35
|
end
|
|
39
36
|
end
|
|
40
37
|
end
|