csvops 0.4.0.alpha → 0.5.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -9
- data/docs/architecture.md +148 -18
- data/docs/release-v0.5.0-alpha.md +89 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +17 -14
- data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
- data/lib/csvtool/cli.rb +6 -6
- data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
- data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
- data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +48 -125
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
- data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
- data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +28 -0
- data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
- data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
- data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
- data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
- data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
- data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
- data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
- data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
- data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
- data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
- data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
- data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
- metadata +60 -1
|
@@ -2,123 +2,133 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "../../../test_helper"
|
|
4
4
|
require "csvtool/application/use_cases/run_row_randomization"
|
|
5
|
+
require "csvtool/domain/row_randomization_session/randomization_source"
|
|
6
|
+
require "csvtool/domain/row_randomization_session/randomization_options"
|
|
7
|
+
require "csvtool/domain/row_randomization_session/randomization_session"
|
|
8
|
+
require "csvtool/domain/shared/output_destination"
|
|
5
9
|
require "tmpdir"
|
|
6
10
|
|
|
7
11
|
class RunRowRandomizationTest < Minitest::Test
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
input = StringIO.new("#{fixture}\n\n\n\n\n")
|
|
12
|
-
|
|
13
|
-
Csvtool::Application::UseCases::RunRowRandomization.new(stdin: input, stdout: output).call
|
|
14
|
-
|
|
15
|
-
assert_includes output.string, "CSV file path:"
|
|
16
|
-
header_index = output.string.index("name,city")
|
|
17
|
-
assert header_index
|
|
18
|
-
%w[Alice,London Bob,Paris Cara,Berlin].each do |row|
|
|
19
|
-
row_index = output.string.index(row)
|
|
20
|
-
assert row_index
|
|
21
|
-
assert_operator header_index, :<, row_index
|
|
12
|
+
class RaisingWriter
|
|
13
|
+
def call(**_kwargs)
|
|
14
|
+
raise Errno::ENOENT
|
|
22
15
|
end
|
|
23
16
|
end
|
|
24
17
|
|
|
25
|
-
def
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
Csvtool::Application::UseCases::RunRowRandomization.new(stdin: input, stdout: output).call
|
|
18
|
+
def fixture_path(name)
|
|
19
|
+
File.expand_path("../../../fixtures/#{name}", __dir__)
|
|
20
|
+
end
|
|
30
21
|
|
|
31
|
-
|
|
22
|
+
def build_session(file_path:, separator: ",", headers_present: true, seed: nil, output: :console, output_path: nil)
|
|
23
|
+
source = Csvtool::Domain::RowRandomizationSession::RandomizationSource.new(
|
|
24
|
+
path: file_path,
|
|
25
|
+
separator: separator,
|
|
26
|
+
headers_present: headers_present
|
|
27
|
+
)
|
|
28
|
+
options = Csvtool::Domain::RowRandomizationSession::RandomizationOptions.new(seed: seed)
|
|
29
|
+
session = Csvtool::Domain::RowRandomizationSession::RandomizationSession.start(source: source, options: options)
|
|
30
|
+
|
|
31
|
+
session.with_output_destination(
|
|
32
|
+
if output == :file
|
|
33
|
+
Csvtool::Domain::Shared::OutputDestination.file(path: output_path)
|
|
34
|
+
else
|
|
35
|
+
Csvtool::Domain::Shared::OutputDestination.console
|
|
36
|
+
end
|
|
37
|
+
)
|
|
32
38
|
end
|
|
33
39
|
|
|
34
|
-
def
|
|
35
|
-
|
|
36
|
-
output = StringIO.new
|
|
40
|
+
def test_read_headers_returns_headers_when_enabled
|
|
41
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
37
42
|
|
|
38
|
-
|
|
39
|
-
output_path = File.join(dir, "randomized.csv")
|
|
40
|
-
input = StringIO.new("#{fixture}\n\n\n\n2\n#{output_path}\n")
|
|
43
|
+
result = use_case.read_headers(file_path: fixture_path("sample_people.csv"), col_sep: ",", headers_present: true)
|
|
41
44
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
written = File.read(output_path).lines.map(&:strip)
|
|
45
|
-
assert_equal "name,city", written.first
|
|
46
|
-
assert_equal ["Alice,London", "Bob,Paris", "Cara,Berlin"].sort, written[1..].sort
|
|
47
|
-
assert_includes output.string, "Wrote output to #{output_path}"
|
|
48
|
-
end
|
|
45
|
+
assert result.ok?
|
|
46
|
+
assert_equal ["name", "city"], result.data[:headers]
|
|
49
47
|
end
|
|
50
48
|
|
|
51
|
-
def
|
|
52
|
-
|
|
53
|
-
output = StringIO.new
|
|
54
|
-
input = StringIO.new("#{fixture}\n2\n\n\n\n")
|
|
49
|
+
def test_read_headers_returns_nil_when_headers_disabled
|
|
50
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
55
51
|
|
|
56
|
-
|
|
52
|
+
result = use_case.read_headers(file_path: fixture_path("sample_people_no_headers.csv"), col_sep: ",", headers_present: false)
|
|
57
53
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
assert_includes output.string, "Bob\tParis"
|
|
61
|
-
assert_includes output.string, "Cara\tBerlin"
|
|
54
|
+
assert result.ok?
|
|
55
|
+
assert_nil result.data[:headers]
|
|
62
56
|
end
|
|
63
57
|
|
|
64
|
-
def
|
|
65
|
-
|
|
66
|
-
output = StringIO.new
|
|
67
|
-
input = StringIO.new("#{fixture}\n5\n:\n\n\n\n")
|
|
58
|
+
def test_read_headers_fails_for_missing_file
|
|
59
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
68
60
|
|
|
69
|
-
|
|
61
|
+
result = use_case.read_headers(file_path: "/tmp/not-present.csv", col_sep: ",", headers_present: true)
|
|
70
62
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
assert_includes output.string, "Bob:Paris"
|
|
74
|
-
assert_includes output.string, "Cara:Berlin"
|
|
63
|
+
refute result.ok?
|
|
64
|
+
assert_equal :file_not_found, result.error
|
|
75
65
|
end
|
|
76
66
|
|
|
77
|
-
def
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
67
|
+
def test_randomize_streams_rows_for_console_mode
|
|
68
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
69
|
+
session = build_session(file_path: fixture_path("sample_people.csv"), seed: 123)
|
|
70
|
+
rows = []
|
|
81
71
|
|
|
82
|
-
|
|
72
|
+
result = use_case.randomize(session: session, headers: ["name", "city"], on_row: ->(fields) { rows << fields })
|
|
83
73
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
assert_includes output.string, "Cara,Berlin"
|
|
74
|
+
assert result.ok?
|
|
75
|
+
assert_equal 3, rows.length
|
|
76
|
+
assert_equal [["Alice", "London"], ["Bob", "Paris"], ["Cara", "Berlin"]].sort, rows.sort
|
|
88
77
|
end
|
|
89
78
|
|
|
90
|
-
def
|
|
91
|
-
|
|
92
|
-
input_data = "#{fixture}\n\n\n123\n\n"
|
|
93
|
-
|
|
94
|
-
out1 = StringIO.new
|
|
95
|
-
out2 = StringIO.new
|
|
79
|
+
def test_randomize_writes_rows_to_file
|
|
80
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
96
81
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
82
|
+
Dir.mktmpdir do |dir|
|
|
83
|
+
output_path = File.join(dir, "randomized.csv")
|
|
84
|
+
session = build_session(
|
|
85
|
+
file_path: fixture_path("sample_people.csv"),
|
|
86
|
+
seed: 123,
|
|
87
|
+
output: :file,
|
|
88
|
+
output_path: output_path
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
result = use_case.randomize(session: session, headers: ["name", "city"])
|
|
92
|
+
|
|
93
|
+
assert result.ok?
|
|
94
|
+
assert_equal output_path, result.data[:output_path]
|
|
95
|
+
lines = File.read(output_path).lines.map(&:strip)
|
|
96
|
+
assert_equal "name,city", lines.first
|
|
97
|
+
assert_equal ["Alice,London", "Bob,Paris", "Cara,Berlin"].sort, lines[1..].sort
|
|
98
|
+
end
|
|
103
99
|
end
|
|
104
100
|
|
|
105
|
-
def
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
101
|
+
def test_same_seed_produces_stable_order
|
|
102
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new
|
|
103
|
+
session_1 = build_session(file_path: fixture_path("sample_people_many.csv"), seed: 123)
|
|
104
|
+
session_2 = build_session(file_path: fixture_path("sample_people_many.csv"), seed: 123)
|
|
105
|
+
rows_1 = []
|
|
106
|
+
rows_2 = []
|
|
109
107
|
|
|
110
|
-
|
|
108
|
+
result_1 = use_case.randomize(session: session_1, headers: ["name", "city"], on_row: ->(fields) { rows_1 << fields })
|
|
109
|
+
result_2 = use_case.randomize(session: session_2, headers: ["name", "city"], on_row: ->(fields) { rows_2 << fields })
|
|
111
110
|
|
|
112
|
-
|
|
111
|
+
assert result_1.ok?
|
|
112
|
+
assert result_2.ok?
|
|
113
|
+
assert_equal rows_1, rows_2
|
|
113
114
|
end
|
|
114
115
|
|
|
115
|
-
def
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
116
|
+
def test_randomize_returns_cannot_write_output_file_when_writer_fails
|
|
117
|
+
use_case = Csvtool::Application::UseCases::RunRowRandomization.new(
|
|
118
|
+
csv_randomized_row_file_writer: RaisingWriter.new
|
|
119
|
+
)
|
|
120
|
+
session = build_session(
|
|
121
|
+
file_path: fixture_path("sample_people.csv"),
|
|
122
|
+
seed: 123,
|
|
123
|
+
output: :file,
|
|
124
|
+
output_path: "/tmp/randomized.csv"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
result = use_case.randomize(session: session, headers: ["name", "city"])
|
|
128
|
+
|
|
129
|
+
refute result.ok?
|
|
130
|
+
assert_equal :cannot_write_output_file, result.error
|
|
131
|
+
assert_equal "/tmp/randomized.csv", result.data[:path]
|
|
132
|
+
assert_equal Errno::ENOENT, result.data[:error_class]
|
|
123
133
|
end
|
|
124
134
|
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer"
|
|
5
|
+
require "tmpdir"
|
|
6
|
+
|
|
7
|
+
class InfrastructureCsvCrossCsvDedupeFileWriterTest < Minitest::Test
|
|
8
|
+
class FakeDeduper
|
|
9
|
+
def each_retained(**_kwargs)
|
|
10
|
+
yield %w[1 Alice]
|
|
11
|
+
yield %w[3 Cara]
|
|
12
|
+
{ source_rows: 5, removed_rows: 3, kept_rows_count: 2 }
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_writes_retained_rows_and_returns_stats
|
|
17
|
+
writer = Csvtool::Infrastructure::Output::CsvCrossCsvDedupeFileWriter.new(deduper: FakeDeduper.new)
|
|
18
|
+
|
|
19
|
+
Dir.mktmpdir do |dir|
|
|
20
|
+
output_path = File.join(dir, "deduped.csv")
|
|
21
|
+
stats = writer.call(
|
|
22
|
+
path: output_path,
|
|
23
|
+
headers: ["customer_id", "name"],
|
|
24
|
+
col_sep: ",",
|
|
25
|
+
dedupe_options: { source_path: "source.csv", reference_path: "reference.csv" }
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
|
|
29
|
+
assert_equal 2, stats[:kept_rows_count]
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "../../../test_helper"
|
|
4
4
|
require "csvtool/infrastructure/output/csv_file_writer"
|
|
5
|
-
require "csvtool/interface/cli/errors/presenter"
|
|
6
5
|
require "tmpdir"
|
|
7
6
|
|
|
8
7
|
class InfrastructureCsvFileWriterTest < Minitest::Test
|
|
@@ -13,10 +12,7 @@ class InfrastructureCsvFileWriterTest < Minitest::Test
|
|
|
13
12
|
end
|
|
14
13
|
|
|
15
14
|
def test_writes_header_and_values
|
|
16
|
-
stdout = StringIO.new
|
|
17
15
|
writer = Csvtool::Infrastructure::Output::CsvFileWriter.new(
|
|
18
|
-
stdout: stdout,
|
|
19
|
-
errors: Csvtool::Interface::CLI::Errors::Presenter.new(stdout: stdout),
|
|
20
16
|
value_streamer: FakeStreamer.new
|
|
21
17
|
)
|
|
22
18
|
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/infrastructure/output/csv_randomized_row_file_writer"
|
|
5
|
+
require "tmpdir"
|
|
6
|
+
|
|
7
|
+
class InfrastructureCsvRandomizedRowFileWriterTest < Minitest::Test
|
|
8
|
+
class FakeRandomizer
|
|
9
|
+
def each(file_path:, col_sep:, headers:, seed:)
|
|
10
|
+
yield ["Bob", "Paris"]
|
|
11
|
+
yield ["Cara", "Berlin"]
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def test_writes_randomized_rows_with_headers
|
|
16
|
+
writer = Csvtool::Infrastructure::Output::CsvRandomizedRowFileWriter.new(row_randomizer: FakeRandomizer.new)
|
|
17
|
+
|
|
18
|
+
Dir.mktmpdir do |dir|
|
|
19
|
+
output_path = File.join(dir, "randomized.csv")
|
|
20
|
+
writer.call(
|
|
21
|
+
path: output_path,
|
|
22
|
+
headers: ["name", "city"],
|
|
23
|
+
file_path: "ignored.csv",
|
|
24
|
+
col_sep: ",",
|
|
25
|
+
headers_present: true,
|
|
26
|
+
seed: 123
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
assert_equal "name,city\nBob,Paris\nCara,Berlin\n", File.read(output_path)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "../../../test_helper"
|
|
4
4
|
require "csvtool/infrastructure/output/csv_row_file_writer"
|
|
5
|
-
require "csvtool/interface/cli/errors/presenter"
|
|
6
5
|
require "tmpdir"
|
|
7
6
|
|
|
8
7
|
class InfrastructureCsvRowFileWriterTest < Minitest::Test
|
|
@@ -15,10 +14,7 @@ class InfrastructureCsvRowFileWriterTest < Minitest::Test
|
|
|
15
14
|
end
|
|
16
15
|
|
|
17
16
|
def test_writes_header_and_rows_to_file
|
|
18
|
-
stdout = StringIO.new
|
|
19
17
|
writer = Csvtool::Infrastructure::Output::CsvRowFileWriter.new(
|
|
20
|
-
stdout: stdout,
|
|
21
|
-
errors: Csvtool::Interface::CLI::Errors::Presenter.new(stdout: stdout),
|
|
22
18
|
row_streamer: FakeRowStreamer.new
|
|
23
19
|
)
|
|
24
20
|
|
|
@@ -35,6 +31,7 @@ class InfrastructureCsvRowFileWriterTest < Minitest::Test
|
|
|
35
31
|
|
|
36
32
|
assert_equal "name,city\nBob,Paris\nCara,Berlin\n", File.read(output_path)
|
|
37
33
|
assert_equal true, stats[:matched]
|
|
34
|
+
assert_equal true, stats[:wrote_rows]
|
|
38
35
|
end
|
|
39
36
|
end
|
|
40
37
|
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/prompts/dedupe_key_selector_prompt"
|
|
5
|
+
|
|
6
|
+
class DedupeKeySelectorPromptTest < Minitest::Test
|
|
7
|
+
def test_builds_name_selector_in_header_mode
|
|
8
|
+
prompt = Csvtool::Interface::CLI::Prompts::DedupeKeySelectorPrompt.new(stdin: StringIO.new("customer_id\n"), stdout: StringIO.new)
|
|
9
|
+
|
|
10
|
+
selector = prompt.call(label: "Source", headers_present: true)
|
|
11
|
+
|
|
12
|
+
assert_equal true, selector.headers_present?
|
|
13
|
+
assert_equal "customer_id", selector.value
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_builds_index_selector_in_headerless_mode
|
|
17
|
+
prompt = Csvtool::Interface::CLI::Prompts::DedupeKeySelectorPrompt.new(stdin: StringIO.new("2\n"), stdout: StringIO.new)
|
|
18
|
+
|
|
19
|
+
selector = prompt.call(label: "Reference", headers_present: false)
|
|
20
|
+
|
|
21
|
+
assert_equal true, selector.index?
|
|
22
|
+
assert_equal 2, selector.value
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def test_returns_nil_for_invalid_selector
|
|
26
|
+
prompt = Csvtool::Interface::CLI::Prompts::DedupeKeySelectorPrompt.new(stdin: StringIO.new("\n"), stdout: StringIO.new)
|
|
27
|
+
|
|
28
|
+
assert_nil prompt.call(label: "Source", headers_present: true)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -8,4 +8,13 @@ class FilePathPromptTest < Minitest::Test
|
|
|
8
8
|
prompt = Csvtool::Interface::CLI::Prompts::FilePathPrompt.new(stdin: StringIO.new(" /tmp/a.csv \n"), stdout: StringIO.new)
|
|
9
9
|
assert_equal "/tmp/a.csv", prompt.call
|
|
10
10
|
end
|
|
11
|
+
|
|
12
|
+
def test_supports_custom_label
|
|
13
|
+
out = StringIO.new
|
|
14
|
+
prompt = Csvtool::Interface::CLI::Prompts::FilePathPrompt.new(stdin: StringIO.new("/tmp/a.csv\n"), stdout: out)
|
|
15
|
+
|
|
16
|
+
prompt.call(label: "Reference CSV file path: ")
|
|
17
|
+
|
|
18
|
+
assert_includes out.string, "Reference CSV file path: "
|
|
19
|
+
end
|
|
11
20
|
end
|
|
@@ -11,4 +11,14 @@ class HeadersPresentPromptTest < Minitest::Test
|
|
|
11
11
|
assert_equal true, yes_prompt.call
|
|
12
12
|
assert_equal false, no_prompt.call
|
|
13
13
|
end
|
|
14
|
+
|
|
15
|
+
def test_supports_custom_label
|
|
16
|
+
out = StringIO.new
|
|
17
|
+
prompt = Csvtool::Interface::CLI::Prompts::HeadersPresentPrompt.new(stdin: StringIO.new("yes\n"), stdout: out)
|
|
18
|
+
|
|
19
|
+
result = prompt.call(label: "Source headers present? [Y/n]: ")
|
|
20
|
+
|
|
21
|
+
assert_equal true, result
|
|
22
|
+
assert_includes out.string, "Source headers present? [Y/n]: "
|
|
23
|
+
end
|
|
14
24
|
end
|
|
@@ -28,4 +28,14 @@ class SeparatorPromptTest < Minitest::Test
|
|
|
28
28
|
assert_nil prompt.call
|
|
29
29
|
assert_includes errors.calls, :empty_custom_separator
|
|
30
30
|
end
|
|
31
|
+
|
|
32
|
+
def test_supports_custom_label
|
|
33
|
+
errors = FakeErrors.new
|
|
34
|
+
out = StringIO.new
|
|
35
|
+
prompt = Csvtool::Interface::CLI::Prompts::SeparatorPrompt.new(stdin: StringIO.new("\n"), stdout: out, errors: errors)
|
|
36
|
+
|
|
37
|
+
prompt.call(label: "Reference CSV separator:")
|
|
38
|
+
|
|
39
|
+
assert_includes out.string, "Reference CSV separator:"
|
|
40
|
+
end
|
|
31
41
|
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/prompts/yes_no_prompt"
|
|
5
|
+
|
|
6
|
+
class YesNoPromptTest < Minitest::Test
|
|
7
|
+
def test_uses_default_for_blank_or_invalid
|
|
8
|
+
prompt_blank = Csvtool::Interface::CLI::Prompts::YesNoPrompt.new(stdin: StringIO.new("\n"), stdout: StringIO.new)
|
|
9
|
+
prompt_invalid = Csvtool::Interface::CLI::Prompts::YesNoPrompt.new(stdin: StringIO.new("maybe\n"), stdout: StringIO.new)
|
|
10
|
+
|
|
11
|
+
assert_equal true, prompt_blank.call(label: "Q? ", default: true)
|
|
12
|
+
assert_equal false, prompt_invalid.call(label: "Q? ", default: false)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def test_accepts_yes_and_no_inputs
|
|
16
|
+
prompt_yes = Csvtool::Interface::CLI::Prompts::YesNoPrompt.new(stdin: StringIO.new("y\n"), stdout: StringIO.new)
|
|
17
|
+
prompt_no = Csvtool::Interface::CLI::Prompts::YesNoPrompt.new(stdin: StringIO.new("no\n"), stdout: StringIO.new)
|
|
18
|
+
|
|
19
|
+
assert_equal true, prompt_yes.call(label: "Q? ", default: false)
|
|
20
|
+
assert_equal false, prompt_no.call(label: "Q? ", default: true)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/builders/column_session_builder"
|
|
5
|
+
|
|
6
|
+
class ColumnSessionBuilderTest < Minitest::Test
|
|
7
|
+
def test_builds_column_session
|
|
8
|
+
builder = Csvtool::Interface::CLI::Workflows::Builders::ColumnSessionBuilder.new
|
|
9
|
+
|
|
10
|
+
session = builder.call(file_path: "/tmp/data.csv", col_sep: ",", column_name: "name", skip_blanks: true)
|
|
11
|
+
|
|
12
|
+
assert_equal "/tmp/data.csv", session.source.path
|
|
13
|
+
assert_equal ",", session.source.separator.value
|
|
14
|
+
assert_equal "name", session.column_selection.name
|
|
15
|
+
assert_equal true, session.options.skip_blanks?
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
|
|
6
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
7
|
+
require "csvtool/domain/shared/output_destination"
|
|
8
|
+
|
|
9
|
+
class CrossCsvDedupeSessionBuilderTest < Minitest::Test
|
|
10
|
+
def test_builds_cross_csv_dedupe_session
|
|
11
|
+
builder = Csvtool::Interface::CLI::Workflows::Builders::CrossCsvDedupeSessionBuilder.new
|
|
12
|
+
source = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(path: "/tmp/source.csv", separator: ",", headers_present: true)
|
|
13
|
+
reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(path: "/tmp/reference.csv", separator: ",", headers_present: true)
|
|
14
|
+
source_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "id")
|
|
15
|
+
reference_selector = Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: true, input: "rid")
|
|
16
|
+
destination = Csvtool::Domain::Shared::OutputDestination.console
|
|
17
|
+
|
|
18
|
+
session = builder.call(
|
|
19
|
+
source: source,
|
|
20
|
+
reference: reference,
|
|
21
|
+
source_selector: source_selector,
|
|
22
|
+
reference_selector: reference_selector,
|
|
23
|
+
trim_whitespace: true,
|
|
24
|
+
case_insensitive: false,
|
|
25
|
+
destination: destination
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
assert_equal "/tmp/source.csv", session.source.path
|
|
29
|
+
assert_equal "/tmp/reference.csv", session.reference.path
|
|
30
|
+
assert_equal "id", session.key_mapping.source_selector.value
|
|
31
|
+
assert_equal "rid", session.key_mapping.reference_selector.value
|
|
32
|
+
assert_equal true, session.match_options.trim_whitespace?
|
|
33
|
+
assert_equal false, session.match_options.case_insensitive?
|
|
34
|
+
assert_equal true, session.output_destination.console?
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/builders/row_extraction_session_builder"
|
|
5
|
+
require "csvtool/domain/row_session/row_range"
|
|
6
|
+
require "csvtool/domain/shared/output_destination"
|
|
7
|
+
|
|
8
|
+
class RowExtractionSessionBuilderTest < Minitest::Test
|
|
9
|
+
def test_builds_row_extraction_session
|
|
10
|
+
builder = Csvtool::Interface::CLI::Workflows::Builders::RowExtractionSessionBuilder.new
|
|
11
|
+
row_range = Csvtool::Domain::RowSession::RowRange.new(start_row: 2, end_row: 4)
|
|
12
|
+
destination = Csvtool::Domain::Shared::OutputDestination.console
|
|
13
|
+
|
|
14
|
+
session = builder.call(file_path: "/tmp/data.csv", col_sep: ";", row_range: row_range, destination: destination)
|
|
15
|
+
|
|
16
|
+
assert_equal "/tmp/data.csv", session.source.path
|
|
17
|
+
assert_equal ";", session.source.separator
|
|
18
|
+
assert_equal 2, session.row_range.start_row
|
|
19
|
+
assert_equal true, session.output_destination.console?
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/builders/row_randomization_session_builder"
|
|
5
|
+
require "csvtool/domain/shared/output_destination"
|
|
6
|
+
|
|
7
|
+
class RowRandomizationSessionBuilderTest < Minitest::Test
|
|
8
|
+
def test_builds_row_randomization_session
|
|
9
|
+
builder = Csvtool::Interface::CLI::Workflows::Builders::RowRandomizationSessionBuilder.new
|
|
10
|
+
destination = Csvtool::Domain::Shared::OutputDestination.file(path: "/tmp/out.csv")
|
|
11
|
+
|
|
12
|
+
session = builder.call(
|
|
13
|
+
file_path: "/tmp/data.csv",
|
|
14
|
+
col_sep: "\t",
|
|
15
|
+
headers_present: false,
|
|
16
|
+
seed: 12,
|
|
17
|
+
destination: destination
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
assert_equal "/tmp/data.csv", session.source.path
|
|
21
|
+
assert_equal "\t", session.source.separator
|
|
22
|
+
assert_equal false, session.source.headers_present?
|
|
23
|
+
assert_equal 12, session.options.seed
|
|
24
|
+
assert_equal true, session.output_destination.file?
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/presenters/column_extraction_presenter"
|
|
5
|
+
|
|
6
|
+
class ColumnExtractionPresenterTest < Minitest::Test
|
|
7
|
+
def test_prints_value
|
|
8
|
+
out = StringIO.new
|
|
9
|
+
presenter = Csvtool::Interface::CLI::Workflows::Presenters::ColumnExtractionPresenter.new(stdout: out)
|
|
10
|
+
|
|
11
|
+
presenter.print_value("Alice")
|
|
12
|
+
|
|
13
|
+
assert_equal "Alice\n", out.string
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_prints_file_written_message
|
|
17
|
+
out = StringIO.new
|
|
18
|
+
presenter = Csvtool::Interface::CLI::Workflows::Presenters::ColumnExtractionPresenter.new(stdout: out)
|
|
19
|
+
|
|
20
|
+
presenter.print_file_written("/tmp/names.csv")
|
|
21
|
+
|
|
22
|
+
assert_includes out.string, "Wrote output to /tmp/names.csv"
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter"
|
|
5
|
+
|
|
6
|
+
class CrossCsvDedupePresenterTest < Minitest::Test
|
|
7
|
+
def test_prints_header_row_and_summary
|
|
8
|
+
out = StringIO.new
|
|
9
|
+
presenter = Csvtool::Interface::CLI::Workflows::Presenters::CrossCsvDedupePresenter.new(stdout: out, col_sep: ",")
|
|
10
|
+
|
|
11
|
+
presenter.print_header(["id", "name"])
|
|
12
|
+
presenter.print_row(["1", "Alice"])
|
|
13
|
+
presenter.print_summary(source_rows: 5, removed_rows: 3, kept_rows_count: 2)
|
|
14
|
+
|
|
15
|
+
assert_includes out.string, "\nid,name\n"
|
|
16
|
+
assert_includes out.string, "1,Alice"
|
|
17
|
+
assert_includes out.string, "Summary: source_rows=5 removed_rows=3 kept_rows=2"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def test_prints_zero_and_all_removed_messages
|
|
21
|
+
out = StringIO.new
|
|
22
|
+
presenter = Csvtool::Interface::CLI::Workflows::Presenters::CrossCsvDedupePresenter.new(stdout: out, col_sep: ",")
|
|
23
|
+
|
|
24
|
+
presenter.print_summary(source_rows: 5, removed_rows: 0, kept_rows_count: 5)
|
|
25
|
+
presenter.print_summary(source_rows: 5, removed_rows: 5, kept_rows_count: 0)
|
|
26
|
+
|
|
27
|
+
assert_includes out.string, "No rows removed; no matching keys found."
|
|
28
|
+
assert_includes out.string, "All source rows were removed by dedupe."
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../../../test_helper"
|
|
4
|
+
require "csvtool/interface/cli/workflows/presenters/row_extraction_presenter"
|
|
5
|
+
|
|
6
|
+
class RowExtractionPresenterTest < Minitest::Test
|
|
7
|
+
def test_prints_header_once_then_rows
|
|
8
|
+
out = StringIO.new
|
|
9
|
+
presenter = Csvtool::Interface::CLI::Workflows::Presenters::RowExtractionPresenter.new(
|
|
10
|
+
stdout: out,
|
|
11
|
+
headers: ["name", "city"],
|
|
12
|
+
col_sep: ","
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
presenter.print_row(["Alice", "London"])
|
|
16
|
+
presenter.print_row(["Bob", "Paris"])
|
|
17
|
+
|
|
18
|
+
assert_equal "name,city\nAlice,London\nBob,Paris\n", out.string
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def test_prints_file_written_message
|
|
22
|
+
out = StringIO.new
|
|
23
|
+
presenter = Csvtool::Interface::CLI::Workflows::Presenters::RowExtractionPresenter.new(
|
|
24
|
+
stdout: out,
|
|
25
|
+
headers: ["name"],
|
|
26
|
+
col_sep: ","
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
presenter.print_file_written("/tmp/out.csv")
|
|
30
|
+
|
|
31
|
+
assert_includes out.string, "Wrote output to /tmp/out.csv"
|
|
32
|
+
end
|
|
33
|
+
end
|