csvops 0.3.0.alpha → 0.5.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -149
- data/docs/architecture.md +396 -0
- data/docs/release-v0.4.0-alpha.md +87 -0
- data/docs/release-v0.5.0-alpha.md +89 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +96 -0
- data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
- data/lib/csvtool/cli.rb +11 -7
- data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
- data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +1 -0
- data/lib/csvtool/domain/row_session/row_source.rb +3 -0
- data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
- data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
- data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
- data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
- data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
- data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
- data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
- data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
- data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +141 -0
- data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
- data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
- data/test/csvtool/cli_test.rb +130 -16
- data/test/csvtool/cli_unit_test.rb +16 -3
- data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
- data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
- data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +2 -2
- data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +15 -1
- data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
- data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
- data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
- data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
- data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
- data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
- data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
- data/test/csvtool/interface/cli/menu_loop_test.rb +50 -13
- data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
- data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
- data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
- data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
- data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
- data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
- data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
- data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
- data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
- data/test/fixtures/dedupe_reference.csv +3 -0
- data/test/fixtures/dedupe_reference.tsv +3 -0
- data/test/fixtures/dedupe_reference_all.csv +5 -0
- data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
- data/test/fixtures/dedupe_reference_none.csv +2 -0
- data/test/fixtures/dedupe_reference_normalization.csv +3 -0
- data/test/fixtures/dedupe_source.csv +6 -0
- data/test/fixtures/dedupe_source.tsv +6 -0
- data/test/fixtures/dedupe_source_no_headers.csv +5 -0
- data/test/fixtures/dedupe_source_normalization.csv +4 -0
- metadata +93 -8
- data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +0 -31
- data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
- data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
- data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +0 -21
- data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
|
@@ -2,123 +2,133 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "../../../test_helper"
|
|
4
4
|
require "csvtool/application/use_cases/run_row_randomization"
|
|
5
|
+
require "csvtool/domain/row_randomization_session/randomization_source"
|
|
6
|
+
require "csvtool/domain/row_randomization_session/randomization_options"
|
|
7
|
+
require "csvtool/domain/row_randomization_session/randomization_session"
|
|
8
|
+
require "csvtool/domain/shared/output_destination"
|
|
5
9
|
require "tmpdir"
|
|
6
10
|
|
|
7
11
|
class RunRowRandomizationTest < Minitest::Test
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
input = StringIO.new("#{fixture}\n\n\n\n\n")
|
|
12
|
-
|
|
13
|
-
Csvtool::Application::UseCases::RunRowRandomization.new(stdin: input, stdout: output).call
|
|
14
|
-
|
|
15
|
-
assert_includes output.string, "CSV file path:"
|
|
16
|
-
header_index = output.string.index("name,city")
|
|
17
|
-
assert header_index
|
|
18
|
-
%w[Alice,London Bob,Paris Cara,Berlin].each do |row|
|
|
19
|
-
row_index = output.string.index(row)
|
|
20
|
-
assert row_index
|
|
21
|
-
assert_operator header_index, :<, row_index
|
|
12
|
+
class RaisingWriter
|
|
13
|
+
def call(**_kwargs)
|
|
14
|
+
raise Errno::ENOENT
|
|
22
15
|
end
|
|
23
16
|
end
|
|
24
17
|
|
|
25
|
-
def
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
Csvtool::Application::UseCases::RunRowRandomization.new(stdin: input, stdout: output).call
|
|
18
|
+
def fixture_path(name)
|
|
19
|
+
File.expand_path("../../../fixtures/#{name}", __dir__)
|
|
20
|
+
end
|
|
30
21
|
|
|
31
|
-
|
|
22
|
+
def build_session(file_path:, separator: ",", headers_present: true, seed: nil, output: :console, output_path: nil)
|
|
23
|
+
source = Csvtool::Domain::RowRandomizationSession::RandomizationSource.new(
|
|
24
|
+
path: file_path,
|
|
25
|
+
separator: separator,
|
|
26
|
+
headers_present: headers_present
|
|
27
|
+
)
|
|
28
|
+
options = Csvtool::Domain::RowRandomizationSession::RandomizationOptions.new(seed: seed)
|
|
29
|
+
session = Csvtool::Domain::RowRandomizationSession::RandomizationSession.start(source: source, options: options)
|
|
30
|
+
|
|
31
|
+
session.with_output_destination(
|
|
32
|
+
if output == :file
|
|
33
|
+
Csvtool::Domain::Shared::OutputDestination.file(path: output_path)
|
|
34
|
+
else
|
|
35
|
+
Csvtool::Domain::Shared::OutputDestination.console
|
|
36
|
+
end
|
|
37
|
+
)
|
|
32
38
|
end
|
|
33
39
|
|
|
34
|
-
def
|
|
35
|
-
|
|
36
|
-
output = StringIO.new
|
|
40
|
+
def test_read_headers_returns_headers_when_enabled
|
|
41
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
37
42
|
|
|
38
|
-
|
|
39
|
-
output_path = File.join(dir, "randomized.csv")
|
|
40
|
-
input = StringIO.new("#{fixture}\n\n\n\n2\n#{output_path}\n")
|
|
43
|
+
result = use_case.read_headers(file_path: fixture_path("sample_people.csv"), col_sep: ",", headers_present: true)
|
|
41
44
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
written = File.read(output_path).lines.map(&:strip)
|
|
45
|
-
assert_equal "name,city", written.first
|
|
46
|
-
assert_equal ["Alice,London", "Bob,Paris", "Cara,Berlin"].sort, written[1..].sort
|
|
47
|
-
assert_includes output.string, "Wrote output to #{output_path}"
|
|
48
|
-
end
|
|
45
|
+
assert result.ok?
|
|
46
|
+
assert_equal ["name", "city"], result.data[:headers]
|
|
49
47
|
end
|
|
50
48
|
|
|
51
|
-
def
|
|
52
|
-
|
|
53
|
-
output = StringIO.new
|
|
54
|
-
input = StringIO.new("#{fixture}\n2\n\n\n\n")
|
|
49
|
+
def test_read_headers_returns_nil_when_headers_disabled
|
|
50
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
55
51
|
|
|
56
|
-
|
|
52
|
+
result = use_case.read_headers(file_path: fixture_path("sample_people_no_headers.csv"), col_sep: ",", headers_present: false)
|
|
57
53
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
assert_includes output.string, "Bob\tParis"
|
|
61
|
-
assert_includes output.string, "Cara\tBerlin"
|
|
54
|
+
assert result.ok?
|
|
55
|
+
assert_nil result.data[:headers]
|
|
62
56
|
end
|
|
63
57
|
|
|
64
|
-
def
|
|
65
|
-
|
|
66
|
-
output = StringIO.new
|
|
67
|
-
input = StringIO.new("#{fixture}\n5\n:\n\n\n\n")
|
|
58
|
+
def test_read_headers_fails_for_missing_file
|
|
59
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
68
60
|
|
|
69
|
-
|
|
61
|
+
result = use_case.read_headers(file_path: "/tmp/not-present.csv", col_sep: ",", headers_present: true)
|
|
70
62
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
assert_includes output.string, "Bob:Paris"
|
|
74
|
-
assert_includes output.string, "Cara:Berlin"
|
|
63
|
+
refute result.ok?
|
|
64
|
+
assert_equal :file_not_found, result.error
|
|
75
65
|
end
|
|
76
66
|
|
|
77
|
-
def
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
67
|
+
def test_randomize_streams_rows_for_console_mode
|
|
68
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
69
|
+
session = build_session(file_path: fixture_path("sample_people.csv"), seed: 123)
|
|
70
|
+
rows = []
|
|
81
71
|
|
|
82
|
-
|
|
72
|
+
result = use_case.randomize(session: session, headers: ["name", "city"], on_row: ->(fields) { rows << fields })
|
|
83
73
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
assert_includes output.string, "Cara,Berlin"
|
|
74
|
+
assert result.ok?
|
|
75
|
+
assert_equal 3, rows.length
|
|
76
|
+
assert_equal [["Alice", "London"], ["Bob", "Paris"], ["Cara", "Berlin"]].sort, rows.sort
|
|
88
77
|
end
|
|
89
78
|
|
|
90
|
-
def
|
|
91
|
-
|
|
92
|
-
input_data = "#{fixture}\n\n\n123\n\n"
|
|
93
|
-
|
|
94
|
-
out1 = StringIO.new
|
|
95
|
-
out2 = StringIO.new
|
|
79
|
+
def test_randomize_writes_rows_to_file
|
|
80
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
96
81
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
82
|
+
Dir.mktmpdir do |dir|
|
|
83
|
+
output_path = File.join(dir, "randomized.csv")
|
|
84
|
+
session = build_session(
|
|
85
|
+
file_path: fixture_path("sample_people.csv"),
|
|
86
|
+
seed: 123,
|
|
87
|
+
output: :file,
|
|
88
|
+
output_path: output_path
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
result = use_case.randomize(session: session, headers: ["name", "city"])
|
|
92
|
+
|
|
93
|
+
assert result.ok?
|
|
94
|
+
assert_equal output_path, result.data[:output_path]
|
|
95
|
+
lines = File.read(output_path).lines.map(&:strip)
|
|
96
|
+
assert_equal "name,city", lines.first
|
|
97
|
+
assert_equal ["Alice,London", "Bob,Paris", "Cara,Berlin"].sort, lines[1..].sort
|
|
98
|
+
end
|
|
103
99
|
end
|
|
104
100
|
|
|
105
|
-
def
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
101
|
+
def test_same_seed_produces_stable_order
|
|
102
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
103
|
+
session_1 = build_session(file_path: fixture_path("sample_people_many.csv"), seed: 123)
|
|
104
|
+
session_2 = build_session(file_path: fixture_path("sample_people_many.csv"), seed: 123)
|
|
105
|
+
rows_1 = []
|
|
106
|
+
rows_2 = []
|
|
109
107
|
|
|
110
|
-
|
|
108
|
+
result_1 = use_case.randomize(session: session_1, headers: ["name", "city"], on_row: ->(fields) { rows_1 << fields })
|
|
109
|
+
result_2 = use_case.randomize(session: session_2, headers: ["name", "city"], on_row: ->(fields) { rows_2 << fields })
|
|
111
110
|
|
|
112
|
-
|
|
111
|
+
assert result_1.ok?
|
|
112
|
+
assert result_2.ok?
|
|
113
|
+
assert_equal rows_1, rows_2
|
|
113
114
|
end
|
|
114
115
|
|
|
115
|
-
def
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
116
|
+
def test_randomize_returns_cannot_write_output_file_when_writer_fails
|
|
117
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new(
|
|
118
|
+
csv_randomized_row_file_writer: RaisingWriter.new
|
|
119
|
+
)
|
|
120
|
+
session = build_session(
|
|
121
|
+
file_path: fixture_path("sample_people.csv"),
|
|
122
|
+
seed: 123,
|
|
123
|
+
output: :file,
|
|
124
|
+
output_path: "/tmp/randomized.csv"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
result = use_case.randomize(session: session, headers: ["name", "city"])
|
|
128
|
+
|
|
129
|
+
refute result.ok?
|
|
130
|
+
assert_equal :cannot_write_output_file, result.error
|
|
131
|
+
assert_equal "/tmp/randomized.csv", result.data[:path]
|
|
132
|
+
assert_equal Errno::ENOENT, result.data[:error_class]
|
|
123
133
|
end
|
|
124
134
|
end
|
data/test/csvtool/cli_test.rb
CHANGED
|
@@ -11,7 +11,7 @@ class TestCli < Minitest::Test
|
|
|
11
11
|
|
|
12
12
|
def test_menu_can_exit_cleanly
|
|
13
13
|
output = StringIO.new
|
|
14
|
-
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("
|
|
14
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("5\n"), stdout: output, stderr: StringIO.new)
|
|
15
15
|
assert_equal 0, status
|
|
16
16
|
assert_includes output.string, "CSV Tool Menu"
|
|
17
17
|
end
|
|
@@ -26,7 +26,7 @@ class TestCli < Minitest::Test
|
|
|
26
26
|
"",
|
|
27
27
|
"y",
|
|
28
28
|
"",
|
|
29
|
-
"
|
|
29
|
+
"5"
|
|
30
30
|
].join("\n") + "\n"
|
|
31
31
|
|
|
32
32
|
output = StringIO.new
|
|
@@ -58,7 +58,7 @@ class TestCli < Minitest::Test
|
|
|
58
58
|
"2",
|
|
59
59
|
"3",
|
|
60
60
|
"",
|
|
61
|
-
"
|
|
61
|
+
"5"
|
|
62
62
|
].join("\n") + "\n"
|
|
63
63
|
|
|
64
64
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -79,7 +79,7 @@ class TestCli < Minitest::Test
|
|
|
79
79
|
"0",
|
|
80
80
|
"3",
|
|
81
81
|
"",
|
|
82
|
-
"
|
|
82
|
+
"5"
|
|
83
83
|
].join("\n") + "\n"
|
|
84
84
|
|
|
85
85
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -98,7 +98,7 @@ class TestCli < Minitest::Test
|
|
|
98
98
|
"2",
|
|
99
99
|
"3",
|
|
100
100
|
"",
|
|
101
|
-
"
|
|
101
|
+
"5"
|
|
102
102
|
].join("\n") + "\n"
|
|
103
103
|
|
|
104
104
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -119,7 +119,7 @@ class TestCli < Minitest::Test
|
|
|
119
119
|
"2",
|
|
120
120
|
"3",
|
|
121
121
|
"",
|
|
122
|
-
"
|
|
122
|
+
"5"
|
|
123
123
|
].join("\n") + "\n"
|
|
124
124
|
|
|
125
125
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -144,7 +144,7 @@ class TestCli < Minitest::Test
|
|
|
144
144
|
"3",
|
|
145
145
|
"2",
|
|
146
146
|
output_path,
|
|
147
|
-
"
|
|
147
|
+
"5"
|
|
148
148
|
].join("\n") + "\n"
|
|
149
149
|
|
|
150
150
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -164,7 +164,7 @@ class TestCli < Minitest::Test
|
|
|
164
164
|
"1",
|
|
165
165
|
"2",
|
|
166
166
|
"",
|
|
167
|
-
"
|
|
167
|
+
"5"
|
|
168
168
|
].join("\n") + "\n"
|
|
169
169
|
|
|
170
170
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -184,7 +184,7 @@ class TestCli < Minitest::Test
|
|
|
184
184
|
"",
|
|
185
185
|
"",
|
|
186
186
|
"",
|
|
187
|
-
"
|
|
187
|
+
"5"
|
|
188
188
|
].join("\n") + "\n"
|
|
189
189
|
|
|
190
190
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -209,7 +209,7 @@ class TestCli < Minitest::Test
|
|
|
209
209
|
"",
|
|
210
210
|
"2",
|
|
211
211
|
output_path,
|
|
212
|
-
"
|
|
212
|
+
"5"
|
|
213
213
|
].join("\n") + "\n"
|
|
214
214
|
|
|
215
215
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -231,7 +231,7 @@ class TestCli < Minitest::Test
|
|
|
231
231
|
"",
|
|
232
232
|
"",
|
|
233
233
|
"",
|
|
234
|
-
"
|
|
234
|
+
"5"
|
|
235
235
|
].join("\n") + "\n"
|
|
236
236
|
|
|
237
237
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -250,7 +250,7 @@ class TestCli < Minitest::Test
|
|
|
250
250
|
"n",
|
|
251
251
|
"",
|
|
252
252
|
"",
|
|
253
|
-
"
|
|
253
|
+
"5"
|
|
254
254
|
].join("\n") + "\n"
|
|
255
255
|
|
|
256
256
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -270,7 +270,7 @@ class TestCli < Minitest::Test
|
|
|
270
270
|
"",
|
|
271
271
|
"",
|
|
272
272
|
"abc",
|
|
273
|
-
"
|
|
273
|
+
"5"
|
|
274
274
|
].join("\n") + "\n"
|
|
275
275
|
|
|
276
276
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -280,6 +280,120 @@ class TestCli < Minitest::Test
|
|
|
280
280
|
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
281
281
|
end
|
|
282
282
|
|
|
283
|
+
def test_dedupe_workflow_shell_prompts_and_returns_to_menu
|
|
284
|
+
output = StringIO.new
|
|
285
|
+
input = [
|
|
286
|
+
"4",
|
|
287
|
+
fixture_path("dedupe_source.csv"),
|
|
288
|
+
"",
|
|
289
|
+
"",
|
|
290
|
+
fixture_path("dedupe_reference.csv"),
|
|
291
|
+
"",
|
|
292
|
+
"",
|
|
293
|
+
"customer_id",
|
|
294
|
+
"external_id",
|
|
295
|
+
"",
|
|
296
|
+
"",
|
|
297
|
+
"",
|
|
298
|
+
"5"
|
|
299
|
+
].join("\n") + "\n"
|
|
300
|
+
|
|
301
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
302
|
+
|
|
303
|
+
assert_equal 0, status
|
|
304
|
+
assert_includes output.string, "Reference CSV file path:"
|
|
305
|
+
assert_includes output.string, "Source key column name:"
|
|
306
|
+
assert_includes output.string, "Reference key column name:"
|
|
307
|
+
assert_includes output.string, "customer_id,name"
|
|
308
|
+
assert_includes output.string, "1,Alice"
|
|
309
|
+
assert_includes output.string, "3,Cara"
|
|
310
|
+
assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def test_dedupe_workflow_can_write_to_file
|
|
314
|
+
output = StringIO.new
|
|
315
|
+
|
|
316
|
+
Dir.mktmpdir do |dir|
|
|
317
|
+
output_path = File.join(dir, "deduped.csv")
|
|
318
|
+
input = [
|
|
319
|
+
"4",
|
|
320
|
+
fixture_path("dedupe_source.csv"),
|
|
321
|
+
"",
|
|
322
|
+
"",
|
|
323
|
+
fixture_path("dedupe_reference.csv"),
|
|
324
|
+
"",
|
|
325
|
+
"",
|
|
326
|
+
"customer_id",
|
|
327
|
+
"external_id",
|
|
328
|
+
"",
|
|
329
|
+
"",
|
|
330
|
+
"2",
|
|
331
|
+
output_path,
|
|
332
|
+
"5"
|
|
333
|
+
].join("\n") + "\n"
|
|
334
|
+
|
|
335
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
336
|
+
|
|
337
|
+
assert_equal 0, status
|
|
338
|
+
assert_includes output.string, "Wrote output to #{output_path}"
|
|
339
|
+
assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
|
|
340
|
+
assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def test_dedupe_workflow_supports_tsv_separators
|
|
345
|
+
output = StringIO.new
|
|
346
|
+
input = [
|
|
347
|
+
"4",
|
|
348
|
+
fixture_path("dedupe_source.tsv"),
|
|
349
|
+
"2",
|
|
350
|
+
"",
|
|
351
|
+
fixture_path("dedupe_reference.tsv"),
|
|
352
|
+
"2",
|
|
353
|
+
"",
|
|
354
|
+
"customer_id",
|
|
355
|
+
"external_id",
|
|
356
|
+
"",
|
|
357
|
+
"",
|
|
358
|
+
"",
|
|
359
|
+
"5"
|
|
360
|
+
].join("\n") + "\n"
|
|
361
|
+
|
|
362
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
363
|
+
|
|
364
|
+
assert_equal 0, status
|
|
365
|
+
assert_includes output.string, "customer_id\tname"
|
|
366
|
+
assert_includes output.string, "1\tAlice"
|
|
367
|
+
assert_includes output.string, "3\tCara"
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
def test_dedupe_workflow_headerless_mode_supports_index
|
|
371
|
+
output = StringIO.new
|
|
372
|
+
input = [
|
|
373
|
+
"4",
|
|
374
|
+
fixture_path("dedupe_source_no_headers.csv"),
|
|
375
|
+
"",
|
|
376
|
+
"n",
|
|
377
|
+
fixture_path("dedupe_reference_no_headers.csv"),
|
|
378
|
+
"",
|
|
379
|
+
"n",
|
|
380
|
+
"1",
|
|
381
|
+
"1",
|
|
382
|
+
"",
|
|
383
|
+
"",
|
|
384
|
+
"",
|
|
385
|
+
"5"
|
|
386
|
+
].join("\n") + "\n"
|
|
387
|
+
|
|
388
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
389
|
+
|
|
390
|
+
assert_equal 0, status
|
|
391
|
+
refute_includes output.string, "customer_id,name"
|
|
392
|
+
assert_includes output.string, "1,Alice"
|
|
393
|
+
assert_includes output.string, "3,Cara"
|
|
394
|
+
assert_includes output.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
395
|
+
end
|
|
396
|
+
|
|
283
397
|
def test_end_to_end_file_output_writes_expected_csv
|
|
284
398
|
output = StringIO.new
|
|
285
399
|
output_path = nil
|
|
@@ -296,7 +410,7 @@ class TestCli < Minitest::Test
|
|
|
296
410
|
"y",
|
|
297
411
|
"2",
|
|
298
412
|
output_path,
|
|
299
|
-
"
|
|
413
|
+
"5"
|
|
300
414
|
].join("\n") + "\n"
|
|
301
415
|
|
|
302
416
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -316,7 +430,7 @@ class TestCli < Minitest::Test
|
|
|
316
430
|
"1",
|
|
317
431
|
"",
|
|
318
432
|
"n",
|
|
319
|
-
"
|
|
433
|
+
"5"
|
|
320
434
|
].join("\n") + "\n"
|
|
321
435
|
|
|
322
436
|
output = StringIO.new
|
|
@@ -352,7 +466,7 @@ class TestCli < Minitest::Test
|
|
|
352
466
|
"y",
|
|
353
467
|
"2",
|
|
354
468
|
"/tmp/not-a-dir/out.csv",
|
|
355
|
-
"
|
|
469
|
+
"5"
|
|
356
470
|
].join("\n") + "\n"
|
|
357
471
|
|
|
358
472
|
output = StringIO.new
|
|
@@ -16,7 +16,7 @@ class CliUnitTest < Minitest::Test
|
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
def test_menu_command_can_exit_zero
|
|
19
|
-
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("
|
|
19
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("5\n"), stdout: StringIO.new, stderr: StringIO.new)
|
|
20
20
|
assert_equal 0, status
|
|
21
21
|
end
|
|
22
22
|
|
|
@@ -28,7 +28,7 @@ class CliUnitTest < Minitest::Test
|
|
|
28
28
|
def test_menu_routes_to_row_range_shell
|
|
29
29
|
stdout = StringIO.new
|
|
30
30
|
fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
|
|
31
|
-
input = ["2", fixture, "", "2", "3", "", "
|
|
31
|
+
input = ["2", fixture, "", "2", "3", "", "5"].join("\n") + "\n"
|
|
32
32
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
33
33
|
assert_equal 0, status
|
|
34
34
|
assert_includes stdout.string, "name,city"
|
|
@@ -39,7 +39,7 @@ class CliUnitTest < Minitest::Test
|
|
|
39
39
|
def test_menu_routes_to_randomize_rows_shell
|
|
40
40
|
stdout = StringIO.new
|
|
41
41
|
fixture = File.expand_path("../fixtures/sample_people.csv", __dir__)
|
|
42
|
-
input = ["3", fixture, "", "", "", "", "
|
|
42
|
+
input = ["3", fixture, "", "", "", "", "5"].join("\n") + "\n"
|
|
43
43
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
44
44
|
assert_equal 0, status
|
|
45
45
|
assert_includes stdout.string, "name,city"
|
|
@@ -47,4 +47,17 @@ class CliUnitTest < Minitest::Test
|
|
|
47
47
|
assert_includes stdout.string, "Bob,Paris"
|
|
48
48
|
assert_includes stdout.string, "Cara,Berlin"
|
|
49
49
|
end
|
|
50
|
+
|
|
51
|
+
def test_menu_routes_to_dedupe_shell
|
|
52
|
+
stdout = StringIO.new
|
|
53
|
+
source_fixture = File.expand_path("../fixtures/dedupe_source.csv", __dir__)
|
|
54
|
+
reference_fixture = File.expand_path("../fixtures/dedupe_reference.csv", __dir__)
|
|
55
|
+
input = ["4", source_fixture, "", "", reference_fixture, "", "", "customer_id", "external_id", "", "", "", "5"].join("\n") + "\n"
|
|
56
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: stdout, stderr: StringIO.new)
|
|
57
|
+
assert_equal 0, status
|
|
58
|
+
assert_includes stdout.string, "customer_id,name"
|
|
59
|
+
assert_includes stdout.string, "1,Alice"
|
|
60
|
+
assert_includes stdout.string, "3,Cara"
|
|
61
|
+
assert_includes stdout.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
62
|
+
end
|
|
50
63
|
end
|
|
@@ -8,7 +8,7 @@ require "csvtool/domain/column_session/column_selection"
|
|
|
8
8
|
require "csvtool/domain/column_session/extraction_options"
|
|
9
9
|
require "csvtool/domain/column_session/preview"
|
|
10
10
|
require "csvtool/domain/column_session/extraction_value"
|
|
11
|
-
require "csvtool/domain/
|
|
11
|
+
require "csvtool/domain/shared/output_destination"
|
|
12
12
|
|
|
13
13
|
class ColumnSessionTest < Minitest::Test
|
|
14
14
|
def test_state_transitions
|
|
@@ -25,7 +25,7 @@ class ColumnSessionTest < Minitest::Test
|
|
|
25
25
|
values: [Csvtool::Domain::ColumnSession::ExtractionValue.new("Alice")]
|
|
26
26
|
)
|
|
27
27
|
session = session.with_preview(preview).confirm!.with_output_destination(
|
|
28
|
-
Csvtool::Domain::
|
|
28
|
+
Csvtool::Domain::Shared::OutputDestination.console
|
|
29
29
|
)
|
|
30
30
|
|
|
31
31
|
assert_equal true, session.confirmed?
|
|
@@ -11,4 +11,14 @@ class CsvSourceTest < Minitest::Test
|
|
|
11
11
|
assert_equal "/tmp/a.csv", source.path
|
|
12
12
|
assert_equal separator, source.separator
|
|
13
13
|
end
|
|
14
|
+
|
|
15
|
+
def test_rejects_empty_path
|
|
16
|
+
separator = Csvtool::Domain::ColumnSession::Separator.new(",")
|
|
17
|
+
|
|
18
|
+
error = assert_raises(ArgumentError) do
|
|
19
|
+
Csvtool::Domain::ColumnSession::CsvSource.new(path: "", separator: separator)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
assert_equal "path cannot be empty", error.message
|
|
23
|
+
end
|
|
14
24
|
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
5
|
+
|
|
6
|
+
class CrossCsvDedupeColumnSelectorTest < Minitest::Test
|
|
7
|
+
def test_builds_header_selector_from_input
|
|
8
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "customer_id")
|
|
9
|
+
|
|
10
|
+
assert_equal "customer_id", selector.value
|
|
11
|
+
assert_equal true, selector.headers_present?
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def test_builds_index_selector_from_input
|
|
15
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "2")
|
|
16
|
+
|
|
17
|
+
assert_equal 2, selector.value
|
|
18
|
+
assert_equal true, selector.index?
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def test_rejects_invalid_index_input
|
|
22
|
+
error = assert_raises(ArgumentError) do
|
|
23
|
+
Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "0")
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
assert_equal "column index must be a positive integer", error.message
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def test_extracts_from_headered_row
|
|
30
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "customer_id")
|
|
31
|
+
row = { "customer_id" => "42" }
|
|
32
|
+
|
|
33
|
+
assert_equal "42", selector.extract_from(row)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def test_extracts_from_headerless_row_by_index
|
|
37
|
+
selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: false, input: "2")
|
|
38
|
+
row = ["a", "b", "c"]
|
|
39
|
+
|
|
40
|
+
assert_equal "b", selector.extract_from(row)
|
|
41
|
+
end
|
|
42
|
+
end
|