csvops 0.4.0.alpha → 0.6.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -12
- data/docs/architecture.md +208 -21
- data/docs/release-v0.5.0-alpha.md +89 -0
- data/docs/release-v0.6.0-alpha.md +84 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +17 -14
- data/lib/csvtool/application/use_cases/run_csv_parity.rb +70 -0
- data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
- data/lib/csvtool/cli.rb +11 -7
- data/lib/csvtool/domain/csv_parity_session/parity_options.rb +22 -0
- data/lib/csvtool/domain/csv_parity_session/parity_session.rb +20 -0
- data/lib/csvtool/domain/csv_parity_session/source_pair.rb +19 -0
- data/lib/csvtool/infrastructure/csv/csv_parity_comparator.rb +71 -0
- data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
- data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
- data/lib/csvtool/interface/cli/errors/presenter.rb +4 -0
- data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
- data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/builders/csv_parity_session_builder.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
- data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +38 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +48 -125
- data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +66 -0
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/parity/build_session_step.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/parity/execute_step.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
- data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
- data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +28 -0
- data/test/csvtool/application/use_cases/run_csv_parity_test.rb +160 -0
- data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
- data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
- data/test/csvtool/cli_test.rb +175 -21
- data/test/csvtool/cli_unit_test.rb +4 -4
- data/test/csvtool/domain/csv_parity_session/parity_options_test.rb +17 -0
- data/test/csvtool/domain/csv_parity_session/parity_session_test.rb +18 -0
- data/test/csvtool/domain/csv_parity_session/source_pair_test.rb +11 -0
- data/test/csvtool/infrastructure/csv/csv_parity_comparator_test.rb +78 -0
- data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
- data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
- data/test/csvtool/interface/cli/errors/presenter_test.rb +2 -0
- data/test/csvtool/interface/cli/menu_loop_test.rb +59 -16
- data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
- data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
- data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/csv_parity_session_builder_test.rb +20 -0
- data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
- data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +43 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +94 -0
- data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
- data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
- data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
- data/test/csvtool/interface/cli/workflows/steps/parity/build_session_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/steps/parity/execute_step_test.rb +40 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
- data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
- data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
- data/test/fixtures/parity_duplicates_left.csv +4 -0
- data/test/fixtures/parity_duplicates_right.csv +3 -0
- data/test/fixtures/parity_people_header_mismatch.csv +4 -0
- data/test/fixtures/parity_people_many_reordered.csv +13 -0
- data/test/fixtures/parity_people_mismatch.csv +4 -0
- data/test/fixtures/parity_people_reordered.csv +4 -0
- data/test/fixtures/parity_people_reordered.tsv +4 -0
- metadata +90 -1
|
@@ -2,123 +2,133 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "../../../test_helper"
|
|
4
4
|
require "csvtool/application/use_cases/run_row_randomization"
|
|
5
|
+
require "csvtool/domain/row_randomization_session/randomization_source"
|
|
6
|
+
require "csvtool/domain/row_randomization_session/randomization_options"
|
|
7
|
+
require "csvtool/domain/row_randomization_session/randomization_session"
|
|
8
|
+
require "csvtool/domain/shared/output_destination"
|
|
5
9
|
require "tmpdir"
|
|
6
10
|
|
|
7
11
|
class RunRowRandomizationTest < Minitest::Test
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
input = StringIO.new("#{fixture}\n\n\n\n\n")
|
|
12
|
-
|
|
13
|
-
Csvtool::Application::UseCases::RunRowRandomization.new(stdin: input, stdout: output).call
|
|
14
|
-
|
|
15
|
-
assert_includes output.string, "CSV file path:"
|
|
16
|
-
header_index = output.string.index("name,city")
|
|
17
|
-
assert header_index
|
|
18
|
-
%w[Alice,London Bob,Paris Cara,Berlin].each do |row|
|
|
19
|
-
row_index = output.string.index(row)
|
|
20
|
-
assert row_index
|
|
21
|
-
assert_operator header_index, :<, row_index
|
|
12
|
+
class RaisingWriter
|
|
13
|
+
def call(**_kwargs)
|
|
14
|
+
raise Errno::ENOENT
|
|
22
15
|
end
|
|
23
16
|
end
|
|
24
17
|
|
|
25
|
-
def
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
Csvtool::Application::UseCases::RunRowRandomization.new(stdin: input, stdout: output).call
|
|
18
|
+
def fixture_path(name)
|
|
19
|
+
File.expand_path("../../../fixtures/#{name}", __dir__)
|
|
20
|
+
end
|
|
30
21
|
|
|
31
|
-
|
|
22
|
+
def build_session(file_path:, separator: ",", headers_present: true, seed: nil, output: :console, output_path: nil)
|
|
23
|
+
source = Csvtool::Domain::RowRandomizationSession::RandomizationSource.new(
|
|
24
|
+
path: file_path,
|
|
25
|
+
separator: separator,
|
|
26
|
+
headers_present: headers_present
|
|
27
|
+
)
|
|
28
|
+
options = Csvtool::Domain::RowRandomizationSession::RandomizationOptions.new(seed: seed)
|
|
29
|
+
session = Csvtool::Domain::RowRandomizationSession::RandomizationSession.start(source: source, options: options)
|
|
30
|
+
|
|
31
|
+
session.with_output_destination(
|
|
32
|
+
if output == :file
|
|
33
|
+
Csvtool::Domain::Shared::OutputDestination.file(path: output_path)
|
|
34
|
+
else
|
|
35
|
+
Csvtool::Domain::Shared::OutputDestination.console
|
|
36
|
+
end
|
|
37
|
+
)
|
|
32
38
|
end
|
|
33
39
|
|
|
34
|
-
def
|
|
35
|
-
|
|
36
|
-
output = StringIO.new
|
|
40
|
+
def test_read_headers_returns_headers_when_enabled
|
|
41
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
37
42
|
|
|
38
|
-
|
|
39
|
-
output_path = File.join(dir, "randomized.csv")
|
|
40
|
-
input = StringIO.new("#{fixture}\n\n\n\n2\n#{output_path}\n")
|
|
43
|
+
result = use_case.read_headers(file_path: fixture_path("sample_people.csv"), col_sep: ",", headers_present: true)
|
|
41
44
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
written = File.read(output_path).lines.map(&:strip)
|
|
45
|
-
assert_equal "name,city", written.first
|
|
46
|
-
assert_equal ["Alice,London", "Bob,Paris", "Cara,Berlin"].sort, written[1..].sort
|
|
47
|
-
assert_includes output.string, "Wrote output to #{output_path}"
|
|
48
|
-
end
|
|
45
|
+
assert result.ok?
|
|
46
|
+
assert_equal ["name", "city"], result.data[:headers]
|
|
49
47
|
end
|
|
50
48
|
|
|
51
|
-
def
|
|
52
|
-
|
|
53
|
-
output = StringIO.new
|
|
54
|
-
input = StringIO.new("#{fixture}\n2\n\n\n\n")
|
|
49
|
+
def test_read_headers_returns_nil_when_headers_disabled
|
|
50
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
55
51
|
|
|
56
|
-
|
|
52
|
+
result = use_case.read_headers(file_path: fixture_path("sample_people_no_headers.csv"), col_sep: ",", headers_present: false)
|
|
57
53
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
assert_includes output.string, "Bob\tParis"
|
|
61
|
-
assert_includes output.string, "Cara\tBerlin"
|
|
54
|
+
assert result.ok?
|
|
55
|
+
assert_nil result.data[:headers]
|
|
62
56
|
end
|
|
63
57
|
|
|
64
|
-
def
|
|
65
|
-
|
|
66
|
-
output = StringIO.new
|
|
67
|
-
input = StringIO.new("#{fixture}\n5\n:\n\n\n\n")
|
|
58
|
+
def test_read_headers_fails_for_missing_file
|
|
59
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
68
60
|
|
|
69
|
-
|
|
61
|
+
result = use_case.read_headers(file_path: "/tmp/not-present.csv", col_sep: ",", headers_present: true)
|
|
70
62
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
assert_includes output.string, "Bob:Paris"
|
|
74
|
-
assert_includes output.string, "Cara:Berlin"
|
|
63
|
+
refute result.ok?
|
|
64
|
+
assert_equal :file_not_found, result.error
|
|
75
65
|
end
|
|
76
66
|
|
|
77
|
-
def
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
67
|
+
def test_randomize_streams_rows_for_console_mode
|
|
68
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
69
|
+
session = build_session(file_path: fixture_path("sample_people.csv"), seed: 123)
|
|
70
|
+
rows = []
|
|
81
71
|
|
|
82
|
-
|
|
72
|
+
result = use_case.randomize(session: session, headers: ["name", "city"], on_row: ->(fields) { rows << fields })
|
|
83
73
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
assert_includes output.string, "Cara,Berlin"
|
|
74
|
+
assert result.ok?
|
|
75
|
+
assert_equal 3, rows.length
|
|
76
|
+
assert_equal [["Alice", "London"], ["Bob", "Paris"], ["Cara", "Berlin"]].sort, rows.sort
|
|
88
77
|
end
|
|
89
78
|
|
|
90
|
-
def
|
|
91
|
-
|
|
92
|
-
input_data = "#{fixture}\n\n\n123\n\n"
|
|
93
|
-
|
|
94
|
-
out1 = StringIO.new
|
|
95
|
-
out2 = StringIO.new
|
|
79
|
+
def test_randomize_writes_rows_to_file
|
|
80
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
96
81
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
82
|
+
Dir.mktmpdir do |dir|
|
|
83
|
+
output_path = File.join(dir, "randomized.csv")
|
|
84
|
+
session = build_session(
|
|
85
|
+
file_path: fixture_path("sample_people.csv"),
|
|
86
|
+
seed: 123,
|
|
87
|
+
output: :file,
|
|
88
|
+
output_path: output_path
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
result = use_case.randomize(session: session, headers: ["name", "city"])
|
|
92
|
+
|
|
93
|
+
assert result.ok?
|
|
94
|
+
assert_equal output_path, result.data[:output_path]
|
|
95
|
+
lines = File.read(output_path).lines.map(&:strip)
|
|
96
|
+
assert_equal "name,city", lines.first
|
|
97
|
+
assert_equal ["Alice,London", "Bob,Paris", "Cara,Berlin"].sort, lines[1..].sort
|
|
98
|
+
end
|
|
103
99
|
end
|
|
104
100
|
|
|
105
|
-
def
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
101
|
+
def test_same_seed_produces_stable_order
|
|
102
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
103
|
+
session_1 = build_session(file_path: fixture_path("sample_people_many.csv"), seed: 123)
|
|
104
|
+
session_2 = build_session(file_path: fixture_path("sample_people_many.csv"), seed: 123)
|
|
105
|
+
rows_1 = []
|
|
106
|
+
rows_2 = []
|
|
109
107
|
|
|
110
|
-
|
|
108
|
+
result_1 = use_case.randomize(session: session_1, headers: ["name", "city"], on_row: ->(fields) { rows_1 << fields })
|
|
109
|
+
result_2 = use_case.randomize(session: session_2, headers: ["name", "city"], on_row: ->(fields) { rows_2 << fields })
|
|
111
110
|
|
|
112
|
-
|
|
111
|
+
assert result_1.ok?
|
|
112
|
+
assert result_2.ok?
|
|
113
|
+
assert_equal rows_1, rows_2
|
|
113
114
|
end
|
|
114
115
|
|
|
115
|
-
def
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
116
|
+
def test_randomize_returns_cannot_write_output_file_when_writer_fails
|
|
117
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new(
|
|
118
|
+
csv_randomized_row_file_writer: RaisingWriter.new
|
|
119
|
+
)
|
|
120
|
+
session = build_session(
|
|
121
|
+
file_path: fixture_path("sample_people.csv"),
|
|
122
|
+
seed: 123,
|
|
123
|
+
output: :file,
|
|
124
|
+
output_path: "/tmp/randomized.csv"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
result = use_case.randomize(session: session, headers: ["name", "city"])
|
|
128
|
+
|
|
129
|
+
refute result.ok?
|
|
130
|
+
assert_equal :cannot_write_output_file, result.error
|
|
131
|
+
assert_equal "/tmp/randomized.csv", result.data[:path]
|
|
132
|
+
assert_equal Errno::ENOENT, result.data[:error_class]
|
|
123
133
|
end
|
|
124
134
|
end
|
data/test/csvtool/cli_test.rb
CHANGED
|
@@ -11,7 +11,7 @@ class TestCli < Minitest::Test
|
|
|
11
11
|
|
|
12
12
|
def test_menu_can_exit_cleanly
|
|
13
13
|
output = StringIO.new
|
|
14
|
-
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("
|
|
14
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("6\n"), stdout: output, stderr: StringIO.new)
|
|
15
15
|
assert_equal 0, status
|
|
16
16
|
assert_includes output.string, "CSV Tool Menu"
|
|
17
17
|
end
|
|
@@ -26,7 +26,7 @@ class TestCli < Minitest::Test
|
|
|
26
26
|
"",
|
|
27
27
|
"y",
|
|
28
28
|
"",
|
|
29
|
-
"
|
|
29
|
+
"6"
|
|
30
30
|
].join("\n") + "\n"
|
|
31
31
|
|
|
32
32
|
output = StringIO.new
|
|
@@ -58,7 +58,7 @@ class TestCli < Minitest::Test
|
|
|
58
58
|
"2",
|
|
59
59
|
"3",
|
|
60
60
|
"",
|
|
61
|
-
"
|
|
61
|
+
"6"
|
|
62
62
|
].join("\n") + "\n"
|
|
63
63
|
|
|
64
64
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -79,7 +79,7 @@ class TestCli < Minitest::Test
|
|
|
79
79
|
"0",
|
|
80
80
|
"3",
|
|
81
81
|
"",
|
|
82
|
-
"
|
|
82
|
+
"6"
|
|
83
83
|
].join("\n") + "\n"
|
|
84
84
|
|
|
85
85
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -98,7 +98,7 @@ class TestCli < Minitest::Test
|
|
|
98
98
|
"2",
|
|
99
99
|
"3",
|
|
100
100
|
"",
|
|
101
|
-
"
|
|
101
|
+
"6"
|
|
102
102
|
].join("\n") + "\n"
|
|
103
103
|
|
|
104
104
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -119,7 +119,7 @@ class TestCli < Minitest::Test
|
|
|
119
119
|
"2",
|
|
120
120
|
"3",
|
|
121
121
|
"",
|
|
122
|
-
"
|
|
122
|
+
"6"
|
|
123
123
|
].join("\n") + "\n"
|
|
124
124
|
|
|
125
125
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -144,7 +144,7 @@ class TestCli < Minitest::Test
|
|
|
144
144
|
"3",
|
|
145
145
|
"2",
|
|
146
146
|
output_path,
|
|
147
|
-
"
|
|
147
|
+
"6"
|
|
148
148
|
].join("\n") + "\n"
|
|
149
149
|
|
|
150
150
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -164,7 +164,7 @@ class TestCli < Minitest::Test
|
|
|
164
164
|
"1",
|
|
165
165
|
"2",
|
|
166
166
|
"",
|
|
167
|
-
"
|
|
167
|
+
"6"
|
|
168
168
|
].join("\n") + "\n"
|
|
169
169
|
|
|
170
170
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -184,7 +184,7 @@ class TestCli < Minitest::Test
|
|
|
184
184
|
"",
|
|
185
185
|
"",
|
|
186
186
|
"",
|
|
187
|
-
"
|
|
187
|
+
"6"
|
|
188
188
|
].join("\n") + "\n"
|
|
189
189
|
|
|
190
190
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -209,7 +209,7 @@ class TestCli < Minitest::Test
|
|
|
209
209
|
"",
|
|
210
210
|
"2",
|
|
211
211
|
output_path,
|
|
212
|
-
"
|
|
212
|
+
"6"
|
|
213
213
|
].join("\n") + "\n"
|
|
214
214
|
|
|
215
215
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -231,7 +231,7 @@ class TestCli < Minitest::Test
|
|
|
231
231
|
"",
|
|
232
232
|
"",
|
|
233
233
|
"",
|
|
234
|
-
"
|
|
234
|
+
"6"
|
|
235
235
|
].join("\n") + "\n"
|
|
236
236
|
|
|
237
237
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -250,7 +250,7 @@ class TestCli < Minitest::Test
|
|
|
250
250
|
"n",
|
|
251
251
|
"",
|
|
252
252
|
"",
|
|
253
|
-
"
|
|
253
|
+
"6"
|
|
254
254
|
].join("\n") + "\n"
|
|
255
255
|
|
|
256
256
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -270,7 +270,7 @@ class TestCli < Minitest::Test
|
|
|
270
270
|
"",
|
|
271
271
|
"",
|
|
272
272
|
"abc",
|
|
273
|
-
"
|
|
273
|
+
"6"
|
|
274
274
|
].join("\n") + "\n"
|
|
275
275
|
|
|
276
276
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -295,7 +295,7 @@ class TestCli < Minitest::Test
|
|
|
295
295
|
"",
|
|
296
296
|
"",
|
|
297
297
|
"",
|
|
298
|
-
"
|
|
298
|
+
"6"
|
|
299
299
|
].join("\n") + "\n"
|
|
300
300
|
|
|
301
301
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -329,7 +329,7 @@ class TestCli < Minitest::Test
|
|
|
329
329
|
"",
|
|
330
330
|
"2",
|
|
331
331
|
output_path,
|
|
332
|
-
"
|
|
332
|
+
"6"
|
|
333
333
|
].join("\n") + "\n"
|
|
334
334
|
|
|
335
335
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -356,7 +356,7 @@ class TestCli < Minitest::Test
|
|
|
356
356
|
"",
|
|
357
357
|
"",
|
|
358
358
|
"",
|
|
359
|
-
"
|
|
359
|
+
"6"
|
|
360
360
|
].join("\n") + "\n"
|
|
361
361
|
|
|
362
362
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -382,7 +382,7 @@ class TestCli < Minitest::Test
|
|
|
382
382
|
"",
|
|
383
383
|
"",
|
|
384
384
|
"",
|
|
385
|
-
"
|
|
385
|
+
"6"
|
|
386
386
|
].join("\n") + "\n"
|
|
387
387
|
|
|
388
388
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -394,6 +394,160 @@ class TestCli < Minitest::Test
|
|
|
394
394
|
assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
395
395
|
end
|
|
396
396
|
|
|
397
|
+
def test_parity_workflow_reports_match_and_returns_to_menu
|
|
398
|
+
output = StringIO.new
|
|
399
|
+
input = [
|
|
400
|
+
"5",
|
|
401
|
+
fixture_path("sample_people.csv"),
|
|
402
|
+
fixture_path("sample_people.csv"),
|
|
403
|
+
"",
|
|
404
|
+
"",
|
|
405
|
+
"6"
|
|
406
|
+
].join("\n") + "\n"
|
|
407
|
+
|
|
408
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
409
|
+
|
|
410
|
+
assert_equal 0, status
|
|
411
|
+
assert_includes output.string, "Left CSV file path:"
|
|
412
|
+
assert_includes output.string, "Right CSV file path:"
|
|
413
|
+
assert_includes output.string, "MATCH"
|
|
414
|
+
assert_includes output.string, "Summary: left_rows=3 right_rows=3 left_only=0 right_only=0"
|
|
415
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
def test_parity_workflow_supports_tsv_separator
|
|
419
|
+
output = StringIO.new
|
|
420
|
+
input = [
|
|
421
|
+
"5",
|
|
422
|
+
fixture_path("sample_people.tsv"),
|
|
423
|
+
fixture_path("parity_people_reordered.tsv"),
|
|
424
|
+
"2",
|
|
425
|
+
"",
|
|
426
|
+
"6"
|
|
427
|
+
].join("\n") + "\n"
|
|
428
|
+
|
|
429
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
430
|
+
|
|
431
|
+
assert_equal 0, status
|
|
432
|
+
assert_includes output.string, "MATCH"
|
|
433
|
+
assert_includes output.string, "Summary: left_rows=3 right_rows=3 left_only=0 right_only=0"
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
def test_parity_workflow_headerless_mode_compares_all_rows
|
|
437
|
+
output = StringIO.new
|
|
438
|
+
input = [
|
|
439
|
+
"5",
|
|
440
|
+
fixture_path("sample_people_no_headers.csv"),
|
|
441
|
+
fixture_path("sample_people_no_headers.csv"),
|
|
442
|
+
"",
|
|
443
|
+
"n",
|
|
444
|
+
"6"
|
|
445
|
+
].join("\n") + "\n"
|
|
446
|
+
|
|
447
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
448
|
+
|
|
449
|
+
assert_equal 0, status
|
|
450
|
+
assert_includes output.string, "MATCH"
|
|
451
|
+
assert_includes output.string, "Summary: left_rows=3 right_rows=3 left_only=0 right_only=0"
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
def test_parity_workflow_reports_header_mismatch_in_headered_mode
|
|
455
|
+
output = StringIO.new
|
|
456
|
+
input = [
|
|
457
|
+
"5",
|
|
458
|
+
fixture_path("sample_people.csv"),
|
|
459
|
+
fixture_path("parity_people_header_mismatch.csv"),
|
|
460
|
+
"",
|
|
461
|
+
"",
|
|
462
|
+
"6"
|
|
463
|
+
].join("\n") + "\n"
|
|
464
|
+
|
|
465
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
466
|
+
|
|
467
|
+
assert_equal 0, status
|
|
468
|
+
assert_includes output.string, "CSV headers do not match."
|
|
469
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
def test_parity_workflow_prints_mismatch_examples_and_counts
|
|
473
|
+
output = StringIO.new
|
|
474
|
+
input = [
|
|
475
|
+
"5",
|
|
476
|
+
fixture_path("sample_people.csv"),
|
|
477
|
+
fixture_path("parity_people_mismatch.csv"),
|
|
478
|
+
"",
|
|
479
|
+
"",
|
|
480
|
+
"6"
|
|
481
|
+
].join("\n") + "\n"
|
|
482
|
+
|
|
483
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
484
|
+
|
|
485
|
+
assert_equal 0, status
|
|
486
|
+
assert_includes output.string, "MISMATCH"
|
|
487
|
+
assert_includes output.string, "Summary: left_rows=3 right_rows=3 left_only=1 right_only=1"
|
|
488
|
+
assert_includes output.string, "Left-only examples:"
|
|
489
|
+
assert_includes output.string, "Cara,Berlin (count +1)"
|
|
490
|
+
assert_includes output.string, "Right-only examples:"
|
|
491
|
+
assert_includes output.string, "Dina,Rome (count +1)"
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
def test_parity_workflow_missing_left_file_returns_to_menu
|
|
495
|
+
output = StringIO.new
|
|
496
|
+
input = [
|
|
497
|
+
"5",
|
|
498
|
+
"/tmp/not-there-left.csv",
|
|
499
|
+
fixture_path("sample_people.csv"),
|
|
500
|
+
"",
|
|
501
|
+
"",
|
|
502
|
+
"6"
|
|
503
|
+
].join("\n") + "\n"
|
|
504
|
+
|
|
505
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
506
|
+
|
|
507
|
+
assert_equal 0, status
|
|
508
|
+
assert_includes output.string, "File not found: /tmp/not-there-left.csv"
|
|
509
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
510
|
+
refute_includes output.string, "Traceback"
|
|
511
|
+
end
|
|
512
|
+
|
|
513
|
+
def test_parity_workflow_missing_right_file_returns_to_menu
|
|
514
|
+
output = StringIO.new
|
|
515
|
+
input = [
|
|
516
|
+
"5",
|
|
517
|
+
fixture_path("sample_people.csv"),
|
|
518
|
+
"/tmp/not-there-right.csv",
|
|
519
|
+
"",
|
|
520
|
+
"",
|
|
521
|
+
"6"
|
|
522
|
+
].join("\n") + "\n"
|
|
523
|
+
|
|
524
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
525
|
+
|
|
526
|
+
assert_equal 0, status
|
|
527
|
+
assert_includes output.string, "File not found: /tmp/not-there-right.csv"
|
|
528
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
529
|
+
refute_includes output.string, "Traceback"
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
def test_parity_workflow_malformed_csv_returns_to_menu
|
|
533
|
+
output = StringIO.new
|
|
534
|
+
input = [
|
|
535
|
+
"5",
|
|
536
|
+
fixture_path("sample_people.csv"),
|
|
537
|
+
fixture_path("sample_people_bad_tail.csv"),
|
|
538
|
+
"",
|
|
539
|
+
"",
|
|
540
|
+
"6"
|
|
541
|
+
].join("\n") + "\n"
|
|
542
|
+
|
|
543
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
544
|
+
|
|
545
|
+
assert_equal 0, status
|
|
546
|
+
assert_includes output.string, "Could not parse CSV file."
|
|
547
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
548
|
+
refute_includes output.string, "Traceback"
|
|
549
|
+
end
|
|
550
|
+
|
|
397
551
|
def test_end_to_end_file_output_writes_expected_csv
|
|
398
552
|
output = StringIO.new
|
|
399
553
|
output_path = nil
|
|
@@ -410,7 +564,7 @@ class TestCli < Minitest::Test
|
|
|
410
564
|
"y",
|
|
411
565
|
"2",
|
|
412
566
|
output_path,
|
|
413
|
-
"
|
|
567
|
+
"6"
|
|
414
568
|
].join("\n") + "\n"
|
|
415
569
|
|
|
416
570
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -430,7 +584,7 @@ class TestCli < Minitest::Test
|
|
|
430
584
|
"1",
|
|
431
585
|
"",
|
|
432
586
|
"n",
|
|
433
|
-
"
|
|
587
|
+
"6"
|
|
434
588
|
].join("\n") + "\n"
|
|
435
589
|
|
|
436
590
|
output = StringIO.new
|
|
@@ -445,7 +599,7 @@ class TestCli < Minitest::Test
|
|
|
445
599
|
output = StringIO.new
|
|
446
600
|
status = Csvtool::CLI.start(
|
|
447
601
|
["menu"],
|
|
448
|
-
stdin: StringIO.new("1\n/tmp/does-not-exist.csv\n4\n"),
|
|
602
|
+
stdin: StringIO.new("1\n/tmp/does-not-exist.csv\n4\n6\n"),
|
|
449
603
|
stdout: output,
|
|
450
604
|
stderr: StringIO.new
|
|
451
605
|
)
|
|
@@ -466,7 +620,7 @@ class TestCli < Minitest::Test
|
|
|
466
620
|
"y",
|
|
467
621
|
"2",
|
|
468
622
|
"/tmp/not-a-dir/out.csv",
|
|
469
|
-
"
|
|
623
|
+
"6"
|
|
470
624
|
].join("\n") + "\n"
|
|
471
625
|
|
|
472
626
|
output = StringIO.new
|
|
@@ -16,7 +16,7 @@ class CliUnitTest < Minitest::Test
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
def test_menu_command_can_exit_zero
|
|
19
|
-
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("
|
|
19
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("6\n"), stdout: StringIO.new, stderr: StringIO.new)
|
|
20
20
|
assert_equal 0, status
|
|
21
21
|
end
|
|
22
22
|
|
|
@@ -28,7 +28,7 @@ class CliUnitTest < Minitest::Test
|
|
|
28
28
|
def test_menu_routes_to_row_range_shell
|
|
29
29
|
stdout = StringIO.new
|
|
30
30
|
fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
|
|
31
|
-
input = ["2", fixture, "", "2", "3", "", "
|
|
31
|
+
input = ["2", fixture, "", "2", "3", "", "6"].join("\n") + "\n"
|
|
32
32
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
33
33
|
assert_equal 0, status
|
|
34
34
|
assert_includes stdout.string, "name,city"
|
|
@@ -39,7 +39,7 @@ class CliUnitTest < Minitest::Test
|
|
|
39
39
|
def test_menu_routes_to_randomize_rows_shell
|
|
40
40
|
stdout = StringIO.new
|
|
41
41
|
fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
|
|
42
|
-
input = ["3", fixture, "", "", "", "", "
|
|
42
|
+
input = ["3", fixture, "", "", "", "", "6"].join("\n") + "\n"
|
|
43
43
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
44
44
|
assert_equal 0, status
|
|
45
45
|
assert_includes stdout.string, "name,city"
|
|
@@ -52,7 +52,7 @@ class CliUnitTest < Minitest::Test
|
|
|
52
52
|
stdout = StringIO.new
|
|
53
53
|
source_fixture = File.expand_path("../fixtures/dedupe_source.csv", __dir__)
|
|
54
54
|
reference_fixture = File.expand_path("../fixtures/dedupe_reference.csv", __dir__)
|
|
55
|
-
input = ["4", source_fixture, "", "", reference_fixture, "", "", "customer_id", "external_id", "", "", "", "
|
|
55
|
+
input = ["4", source_fixture, "", "", reference_fixture, "", "", "customer_id", "external_id", "", "", "", "6"].join("\n") + "\n"
|
|
56
56
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
57
57
|
assert_equal 0, status
|
|
58
58
|
assert_includes stdout.string, "customer_id,name"
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/csv_parity_session/parity_options"
|
|
5
|
+
|
|
6
|
+
class ParityOptionsTest < Minitest::Test
|
|
7
|
+
def test_requires_separator
|
|
8
|
+
assert_raises(ArgumentError) do
|
|
9
|
+
Csvtool::Domain::CsvParitySession::ParityOptions.new(separator: "", headers_present: true)
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def test_exposes_headers_present
|
|
14
|
+
options = Csvtool::Domain::CsvParitySession::ParityOptions.new(separator: ",", headers_present: false)
|
|
15
|
+
assert_equal false, options.headers_present?
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/csv_parity_session/source_pair"
|
|
5
|
+
require "csvtool/domain/csv_parity_session/parity_options"
|
|
6
|
+
require "csvtool/domain/csv_parity_session/parity_session"
|
|
7
|
+
|
|
8
|
+
class ParitySessionTest < Minitest::Test
|
|
9
|
+
def test_stores_source_pair_and_options
|
|
10
|
+
source_pair = Csvtool::Domain::CsvParitySession::SourcePair.new(left_path: "/tmp/l.csv", right_path: "/tmp/r.csv")
|
|
11
|
+
options = Csvtool::Domain::CsvParitySession::ParityOptions.new(separator: ",", headers_present: true)
|
|
12
|
+
|
|
13
|
+
session = Csvtool::Domain::CsvParitySession::ParitySession.start(source_pair: source_pair, options: options)
|
|
14
|
+
|
|
15
|
+
assert_equal source_pair, session.source_pair
|
|
16
|
+
assert_equal options, session.options
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/csv_parity_session/source_pair"
|
|
5
|
+
|
|
6
|
+
class SourcePairTest < Minitest::Test
|
|
7
|
+
def test_requires_paths
|
|
8
|
+
assert_raises(ArgumentError) { Csvtool::Domain::CsvParitySession::SourcePair.new(left_path: "", right_path: "/tmp/r.csv") }
|
|
9
|
+
assert_raises(ArgumentError) { Csvtool::Domain::CsvParitySession::SourcePair.new(left_path: "/tmp/l.csv", right_path: "") }
|
|
10
|
+
end
|
|
11
|
+
end
|