csvops 0.3.0.alpha → 0.5.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -149
- data/docs/architecture.md +396 -0
- data/docs/release-v0.4.0-alpha.md +87 -0
- data/docs/release-v0.5.0-alpha.md +89 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +96 -0
- data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
- data/lib/csvtool/cli.rb +11 -7
- data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
- data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +1 -0
- data/lib/csvtool/domain/row_session/row_source.rb +3 -0
- data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
- data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
- data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
- data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
- data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
- data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
- data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
- data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
- data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +141 -0
- data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
- data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
- data/test/csvtool/cli_test.rb +130 -16
- data/test/csvtool/cli_unit_test.rb +16 -3
- data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
- data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
- data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +2 -2
- data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +15 -1
- data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
- data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
- data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
- data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
- data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
- data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
- data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
- data/test/csvtool/interface/cli/menu_loop_test.rb +50 -13
- data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
- data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
- data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
- data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
- data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
- data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
- data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
- data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
- data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
- data/test/fixtures/dedupe_reference.csv +3 -0
- data/test/fixtures/dedupe_reference.tsv +3 -0
- data/test/fixtures/dedupe_reference_all.csv +5 -0
- data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
- data/test/fixtures/dedupe_reference_none.csv +2 -0
- data/test/fixtures/dedupe_reference_normalization.csv +3 -0
- data/test/fixtures/dedupe_source.csv +6 -0
- data/test/fixtures/dedupe_source.tsv +6 -0
- data/test/fixtures/dedupe_source_no_headers.csv +5 -0
- data/test/fixtures/dedupe_source_normalization.csv +4 -0
- metadata +93 -8
- data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +0 -31
- data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
- data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
- data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +0 -21
- data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Steps
|
|
8
|
+
class WorkflowStepPipeline
|
|
9
|
+
def initialize(steps:)
|
|
10
|
+
@steps = steps
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(context)
|
|
14
|
+
@steps.each do |step|
|
|
15
|
+
return false if step.call(context) == :halt
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
true
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/shared/output_destination"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Interface
|
|
7
|
+
module CLI
|
|
8
|
+
module Workflows
|
|
9
|
+
module Support
|
|
10
|
+
class OutputDestinationMapper
|
|
11
|
+
def call(output_destination)
|
|
12
|
+
if output_destination[:mode] == :file
|
|
13
|
+
Domain::Shared::OutputDestination.file(path: output_destination[:path])
|
|
14
|
+
else
|
|
15
|
+
Domain::Shared::OutputDestination.console
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Support
|
|
8
|
+
class ResultErrorHandler
|
|
9
|
+
def initialize(errors:)
|
|
10
|
+
@errors = errors
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(result, mapping)
|
|
14
|
+
action = mapping[result.error]
|
|
15
|
+
action&.call(result, @errors)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
data/lib/csvtool/version.rb
CHANGED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
|
|
5
|
+
class UseCaseIoBoundaryTest < Minitest::Test
|
|
6
|
+
USE_CASE_GLOB = File.expand_path("../../../../lib/csvtool/application/use_cases/*.rb", __dir__)
|
|
7
|
+
FORBIDDEN_PATTERNS = [
|
|
8
|
+
/CSV\.open/,
|
|
9
|
+
/File\.open\([^)]*,\s*["']w/,
|
|
10
|
+
/File\.write\(/,
|
|
11
|
+
/IO\.write\(/
|
|
12
|
+
].freeze
|
|
13
|
+
|
|
14
|
+
def test_use_cases_do_not_perform_direct_file_writes
|
|
15
|
+
violations = []
|
|
16
|
+
|
|
17
|
+
Dir.glob(USE_CASE_GLOB).sort.each do |file_path|
|
|
18
|
+
content = File.read(file_path)
|
|
19
|
+
FORBIDDEN_PATTERNS.each do |pattern|
|
|
20
|
+
violations << "#{file_path}: #{pattern.inspect}" if content.match?(pattern)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
assert_equal [], violations, "Found forbidden direct write APIs in use cases:\n#{violations.join("\n")}"
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/application/use_cases/run_cross_csv_dedupe"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
|
|
6
|
+
require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
|
|
7
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
8
|
+
require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
|
|
9
|
+
require "csvtool/domain/cross_csv_dedupe_session/match_options"
|
|
10
|
+
require "csvtool/domain/shared/output_destination"
|
|
11
|
+
require "tmpdir"
|
|
12
|
+
|
|
13
|
+
class RunCrossCsvDedupeTest < Minitest::Test
|
|
14
|
+
class RaisingWriter
|
|
15
|
+
def call(**_kwargs)
|
|
16
|
+
raise Errno::ENOENT
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def fixture_path(name)
|
|
21
|
+
File.expand_path("../../../fixtures/#{name}", __dir__)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def test_streams_retained_rows_to_callbacks
|
|
25
|
+
use_case = Csvtool::Application::UseCases::RunCrossCsvDedupe.new
|
|
26
|
+
headers = nil
|
|
27
|
+
rows = []
|
|
28
|
+
|
|
29
|
+
result = use_case.call(
|
|
30
|
+
session: build_session(
|
|
31
|
+
source_path: fixture_path("dedupe_source.csv"),
|
|
32
|
+
reference_path: fixture_path("dedupe_reference.csv"),
|
|
33
|
+
source_selector_input: "customer_id",
|
|
34
|
+
reference_selector_input: "external_id",
|
|
35
|
+
output_destination: Csvtool::Domain::Shared::OutputDestination.console
|
|
36
|
+
),
|
|
37
|
+
on_header: ->(value) { headers = value },
|
|
38
|
+
on_row: ->(fields) { rows << fields }
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
assert_equal true, result.ok?
|
|
42
|
+
assert_equal ["customer_id", "name"], headers
|
|
43
|
+
assert_equal [%w[1 Alice], %w[3 Cara]], rows
|
|
44
|
+
assert_equal 5, result.data[:stats][:source_rows]
|
|
45
|
+
assert_equal 3, result.data[:stats][:removed_rows]
|
|
46
|
+
assert_equal 2, result.data[:stats][:kept_rows_count]
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def test_writes_to_file_output_destination
|
|
50
|
+
use_case = Csvtool::Application::UseCases::RunCrossCsvDedupe.new
|
|
51
|
+
|
|
52
|
+
Dir.mktmpdir do |dir|
|
|
53
|
+
output_path = File.join(dir, "deduped.csv")
|
|
54
|
+
result = use_case.call(
|
|
55
|
+
session: build_session(
|
|
56
|
+
source_path: fixture_path("dedupe_source.csv"),
|
|
57
|
+
reference_path: fixture_path("dedupe_reference.csv"),
|
|
58
|
+
source_selector_input: "customer_id",
|
|
59
|
+
reference_selector_input: "external_id",
|
|
60
|
+
output_destination: Csvtool::Domain::Shared::OutputDestination.file(path: output_path)
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
assert_equal true, result.ok?
|
|
65
|
+
assert_equal output_path, result.data[:output_path]
|
|
66
|
+
assert_equal "customer_id,name\n1,Alice\n3,Cara\n", File.read(output_path)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def test_returns_column_not_found_when_selector_invalid
|
|
71
|
+
use_case = Csvtool::Application::UseCases::RunCrossCsvDedupe.new
|
|
72
|
+
|
|
73
|
+
result = use_case.call(
|
|
74
|
+
session: build_session(
|
|
75
|
+
source_path: fixture_path("dedupe_source.csv"),
|
|
76
|
+
reference_path: fixture_path("dedupe_reference.csv"),
|
|
77
|
+
source_selector_input: "missing",
|
|
78
|
+
reference_selector_input: "external_id",
|
|
79
|
+
output_destination: Csvtool::Domain::Shared::OutputDestination.console
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
assert_equal false, result.ok?
|
|
84
|
+
assert_equal :column_not_found, result.error
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def test_returns_cannot_write_output_file_when_writer_fails
|
|
88
|
+
use_case = Csvtool::Application::UseCases::RunCrossCsvDedupe.new(
|
|
89
|
+
csv_cross_csv_dedupe_file_writer: RaisingWriter.new
|
|
90
|
+
)
|
|
91
|
+
output_path = "/tmp/deduped.csv"
|
|
92
|
+
|
|
93
|
+
result = use_case.call(
|
|
94
|
+
session: build_session(
|
|
95
|
+
source_path: fixture_path("dedupe_source.csv"),
|
|
96
|
+
reference_path: fixture_path("dedupe_reference.csv"),
|
|
97
|
+
source_selector_input: "customer_id",
|
|
98
|
+
reference_selector_input: "external_id",
|
|
99
|
+
output_destination: Csvtool::Domain::Shared::OutputDestination.file(path: output_path)
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
refute result.ok?
|
|
104
|
+
assert_equal :cannot_write_output_file, result.error
|
|
105
|
+
assert_equal output_path, result.data[:path]
|
|
106
|
+
assert_equal Errno::ENOENT, result.data[:error_class]
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
private
|
|
110
|
+
|
|
111
|
+
def build_session(source_path:, reference_path:, source_selector_input:, reference_selector_input:, output_destination:)
|
|
112
|
+
source = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
113
|
+
path: source_path,
|
|
114
|
+
separator: ",",
|
|
115
|
+
headers_present: true
|
|
116
|
+
)
|
|
117
|
+
reference = Csvtool::Domain::CrossCsvDedupeSession::CsvProfile.new(
|
|
118
|
+
path: reference_path,
|
|
119
|
+
separator: ",",
|
|
120
|
+
headers_present: true
|
|
121
|
+
)
|
|
122
|
+
key_mapping = Csvtool::Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
123
|
+
source_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(
|
|
124
|
+
headers_present: true,
|
|
125
|
+
input: source_selector_input
|
|
126
|
+
),
|
|
127
|
+
reference_selector: Csvtool::Domain::CrossCsvDedupeSession::ColumnSelector.from_input(
|
|
128
|
+
headers_present: true,
|
|
129
|
+
input: reference_selector_input
|
|
130
|
+
)
|
|
131
|
+
)
|
|
132
|
+
match_options = Csvtool::Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
133
|
+
trim_whitespace: true,
|
|
134
|
+
case_insensitive: false
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
Csvtool::Domain::CrossCsvDedupeSession::CrossCsvDedupeSession
|
|
138
|
+
.start(source: source, reference: reference, key_mapping: key_mapping, match_options: match_options)
|
|
139
|
+
.with_output_destination(output_destination)
|
|
140
|
+
end
|
|
141
|
+
end
|
|
@@ -2,30 +2,86 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "../../../test_helper"
|
|
4
4
|
require "csvtool/application/use_cases/run_extraction"
|
|
5
|
+
require "csvtool/domain/column_session/column_session"
|
|
6
|
+
require "csvtool/domain/column_session/csv_source"
|
|
7
|
+
require "csvtool/domain/column_session/separator"
|
|
8
|
+
require "csvtool/domain/column_session/column_selection"
|
|
9
|
+
require "csvtool/domain/column_session/extraction_options"
|
|
10
|
+
require "csvtool/domain/shared/output_destination"
|
|
11
|
+
require "tmpdir"
|
|
5
12
|
|
|
6
13
|
class RunExtractionTest < Minitest::Test
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
14
|
+
class RaisingWriter
|
|
15
|
+
def call(**_kwargs)
|
|
16
|
+
raise Errno::ENOENT
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def fixture_path(name)
|
|
21
|
+
File.expand_path("../../../fixtures/#{name}", __dir__)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def test_read_headers_missing_file_returns_failure
|
|
25
|
+
result = Csvtool::Application::UseCases::RunExtraction.new.read_headers(
|
|
26
|
+
file_path: "/tmp/not-present.csv",
|
|
27
|
+
col_sep: ","
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
assert_equal false, result.ok?
|
|
31
|
+
assert_equal :file_not_found, result.error
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def test_preview_returns_expected_values
|
|
35
|
+
use_case = Csvtool::Application::UseCases::RunExtraction.new
|
|
36
|
+
|
|
37
|
+
result = use_case.preview(
|
|
38
|
+
session: build_session(output_destination: Csvtool::Domain::Shared::OutputDestination.console)
|
|
12
39
|
)
|
|
13
40
|
|
|
14
|
-
|
|
41
|
+
assert_equal true, result.ok?
|
|
42
|
+
assert_equal %w[Alice Bob Cara], result.data[:preview_values]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def test_extract_writes_values_to_file
|
|
46
|
+
use_case = Csvtool::Application::UseCases::RunExtraction.new
|
|
47
|
+
|
|
48
|
+
Dir.mktmpdir do |dir|
|
|
49
|
+
output_path = File.join(dir, "names.csv")
|
|
50
|
+
result = use_case.extract(
|
|
51
|
+
session: build_session(output_destination: Csvtool::Domain::Shared::OutputDestination.file(path: output_path))
|
|
52
|
+
)
|
|
15
53
|
|
|
16
|
-
|
|
54
|
+
assert_equal true, result.ok?
|
|
55
|
+
assert_equal output_path, result.data[:output_path]
|
|
56
|
+
assert_equal "name\nAlice\nBob\nCara\n", File.read(output_path)
|
|
57
|
+
end
|
|
17
58
|
end
|
|
18
59
|
|
|
19
|
-
def
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
60
|
+
def test_extract_returns_cannot_write_output_file_when_writer_fails
|
|
61
|
+
use_case = Csvtool::Application::UseCases::RunExtraction.new(csv_file_writer: RaisingWriter.new)
|
|
62
|
+
|
|
63
|
+
result = use_case.extract(
|
|
64
|
+
session: build_session(output_destination: Csvtool::Domain::Shared::OutputDestination.file(path: "/tmp/names.csv"))
|
|
65
|
+
)
|
|
23
66
|
|
|
24
|
-
|
|
25
|
-
|
|
67
|
+
assert_equal false, result.ok?
|
|
68
|
+
assert_equal :cannot_write_output_file, result.error
|
|
69
|
+
assert_equal "/tmp/names.csv", result.data[:path]
|
|
70
|
+
assert_equal Errno::ENOENT, result.data[:error_class]
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
def build_session(output_destination:)
|
|
76
|
+
session = Csvtool::Domain::ColumnSession::ColumnSession.start(
|
|
77
|
+
source: Csvtool::Domain::ColumnSession::CsvSource.new(
|
|
78
|
+
path: fixture_path("sample_people.csv"),
|
|
79
|
+
separator: Csvtool::Domain::ColumnSession::Separator.new(",")
|
|
80
|
+
),
|
|
81
|
+
column_selection: Csvtool::Domain::ColumnSession::ColumnSelection.new(name: "name"),
|
|
82
|
+
options: Csvtool::Domain::ColumnSession::ExtractionOptions.new(skip_blanks: true, preview_limit: 10)
|
|
83
|
+
)
|
|
26
84
|
|
|
27
|
-
|
|
28
|
-
assert_includes out.string, "Bob"
|
|
29
|
-
assert_includes out.string, "Cara"
|
|
85
|
+
session.with_output_destination(output_destination)
|
|
30
86
|
end
|
|
31
87
|
end
|
|
@@ -2,139 +2,119 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "../../../test_helper"
|
|
4
4
|
require "csvtool/application/use_cases/run_row_extraction"
|
|
5
|
+
require "csvtool/domain/row_session/row_source"
|
|
6
|
+
require "csvtool/domain/row_session/row_range"
|
|
7
|
+
require "csvtool/domain/row_session/row_session"
|
|
8
|
+
require "csvtool/domain/shared/output_destination"
|
|
5
9
|
require "tmpdir"
|
|
6
10
|
|
|
7
11
|
class RunRowExtractionTest < Minitest::Test
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
use_case = Csvtool::Application::UseCases::RunRowExtraction.new(stdin: StringIO.new(input), stdout: out)
|
|
14
|
-
use_case.call
|
|
15
|
-
|
|
16
|
-
assert_includes out.string, "name,city"
|
|
17
|
-
assert_includes out.string, "Bob,Paris"
|
|
18
|
-
assert_includes out.string, "Cara,Berlin"
|
|
19
|
-
refute_includes out.string, "Alice,London"
|
|
12
|
+
class RaisingWriter
|
|
13
|
+
def call(**_kwargs)
|
|
14
|
+
raise Errno::ENOENT
|
|
15
|
+
end
|
|
20
16
|
end
|
|
21
17
|
|
|
22
|
-
def
|
|
23
|
-
|
|
24
|
-
fixture = File.expand_path("../../../fixtures/sample_people.csv", __dir__)
|
|
25
|
-
input = [fixture, "", "abc", "3", ""].join("\n") + "\n"
|
|
26
|
-
|
|
27
|
-
use_case = Csvtool::Application::UseCases::RunRowExtraction.new(stdin: StringIO.new(input), stdout: out)
|
|
28
|
-
use_case.call
|
|
29
|
-
|
|
30
|
-
assert_includes out.string, "Start row must be a positive integer."
|
|
31
|
-
refute_includes out.string, "name,city"
|
|
18
|
+
def fixture_path(name)
|
|
19
|
+
File.expand_path("../../../fixtures/#{name}", __dir__)
|
|
32
20
|
end
|
|
33
21
|
|
|
34
|
-
def
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def test_rejects_end_before_start
|
|
47
|
-
out = StringIO.new
|
|
48
|
-
fixture = File.expand_path("../../../fixtures/sample_people.csv", __dir__)
|
|
49
|
-
input = [fixture, "", "3", "2", ""].join("\n") + "\n"
|
|
50
|
-
|
|
51
|
-
use_case = Csvtool::Application::UseCases::RunRowExtraction.new(stdin: StringIO.new(input), stdout: out)
|
|
52
|
-
use_case.call
|
|
53
|
-
|
|
54
|
-
assert_includes out.string, "End row must be greater than or equal to start row."
|
|
55
|
-
refute_includes out.string, "name,city"
|
|
22
|
+
def build_session(file_path:, separator: ",", start_row:, end_row:, output: :console, output_path: nil)
|
|
23
|
+
source = Csvtool::Domain::RowSession::RowSource.new(path: file_path, separator: separator)
|
|
24
|
+
row_range = Csvtool::Domain::RowSession::RowRange.new(start_row: start_row, end_row: end_row)
|
|
25
|
+
session = Csvtool::Domain::RowSession::RowSession.start(source: source, row_range: row_range)
|
|
26
|
+
|
|
27
|
+
session.with_output_destination(
|
|
28
|
+
if output == :file
|
|
29
|
+
Csvtool::Domain::Shared::OutputDestination.file(path: output_path)
|
|
30
|
+
else
|
|
31
|
+
Csvtool::Domain::Shared::OutputDestination.console
|
|
32
|
+
end
|
|
33
|
+
)
|
|
56
34
|
end
|
|
57
35
|
|
|
58
|
-
def
|
|
59
|
-
|
|
60
|
-
fixture = File.expand_path("../../../fixtures/sample_people.csv", __dir__)
|
|
61
|
-
input = [fixture, "", "10", "12", ""].join("\n") + "\n"
|
|
36
|
+
def test_read_headers_returns_headers_for_valid_file
|
|
37
|
+
use_case = Csvtool::Application::UseCases::RunRowExtraction.new
|
|
62
38
|
|
|
63
|
-
|
|
64
|
-
use_case.call
|
|
39
|
+
result = use_case.read_headers(file_path: fixture_path("sample_people.csv"), col_sep: ",")
|
|
65
40
|
|
|
66
|
-
|
|
67
|
-
|
|
41
|
+
assert result.ok?
|
|
42
|
+
assert_equal ["name", "city"], result.data[:headers]
|
|
68
43
|
end
|
|
69
44
|
|
|
70
|
-
def
|
|
71
|
-
|
|
72
|
-
fixture = File.expand_path("../../../fixtures/sample_people.tsv", __dir__)
|
|
73
|
-
input = [fixture, "2", "2", "3", ""].join("\n") + "\n"
|
|
45
|
+
def test_read_headers_fails_when_file_is_missing
|
|
46
|
+
use_case = Csvtool::Application::UseCases::RunRowExtraction.new
|
|
74
47
|
|
|
75
|
-
|
|
76
|
-
use_case.call
|
|
48
|
+
result = use_case.read_headers(file_path: "/tmp/not-present.csv", col_sep: ",")
|
|
77
49
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
assert_includes out.string, "Cara,Berlin"
|
|
50
|
+
refute result.ok?
|
|
51
|
+
assert_equal :file_not_found, result.error
|
|
81
52
|
end
|
|
82
53
|
|
|
83
|
-
def
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
54
|
+
def test_extract_streams_rows_for_console_mode
|
|
55
|
+
use_case = Csvtool::Application::UseCases::RunRowExtraction.new
|
|
56
|
+
session = build_session(file_path: fixture_path("sample_people.csv"), start_row: 2, end_row: 3)
|
|
57
|
+
headers = ["name", "city"]
|
|
58
|
+
rows = []
|
|
87
59
|
|
|
88
|
-
|
|
89
|
-
use_case.call
|
|
60
|
+
result = use_case.extract(session: session, headers: headers, on_row: ->(fields) { rows << fields })
|
|
90
61
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
62
|
+
assert result.ok?
|
|
63
|
+
assert_equal true, result.data[:matched]
|
|
64
|
+
assert_equal 3, result.data[:row_count]
|
|
65
|
+
assert_equal [["Bob", "Paris"], ["Cara", "Berlin"]], rows
|
|
94
66
|
end
|
|
95
67
|
|
|
96
|
-
def
|
|
97
|
-
|
|
98
|
-
|
|
68
|
+
def test_extract_writes_rows_to_file_mode
|
|
69
|
+
use_case = Csvtool::Application::UseCases::RunRowExtraction.new
|
|
70
|
+
headers = ["name", "city"]
|
|
99
71
|
|
|
100
72
|
Dir.mktmpdir do |dir|
|
|
101
73
|
output_path = File.join(dir, "rows.csv")
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
74
|
+
session = build_session(
|
|
75
|
+
file_path: fixture_path("sample_people.csv"),
|
|
76
|
+
start_row: 2,
|
|
77
|
+
end_row: 3,
|
|
78
|
+
output: :file,
|
|
79
|
+
output_path: output_path
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
result = use_case.extract(session: session, headers: headers)
|
|
83
|
+
|
|
84
|
+
assert result.ok?
|
|
85
|
+
assert_equal true, result.data[:wrote_rows]
|
|
107
86
|
assert_equal "name,city\nBob,Paris\nCara,Berlin\n", File.read(output_path)
|
|
108
|
-
assert_includes out.string, "Wrote output to #{output_path}"
|
|
109
87
|
end
|
|
110
88
|
end
|
|
111
89
|
|
|
112
|
-
def
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
90
|
+
def test_extract_returns_cannot_write_output_file_when_writer_fails
|
|
91
|
+
use_case = Csvtool::Application::UseCases::RunRowExtraction.new(csv_row_file_writer: RaisingWriter.new)
|
|
92
|
+
headers = ["name", "city"]
|
|
93
|
+
session = build_session(
|
|
94
|
+
file_path: fixture_path("sample_people.csv"),
|
|
95
|
+
start_row: 2,
|
|
96
|
+
end_row: 3,
|
|
97
|
+
output: :file,
|
|
98
|
+
output_path: "/tmp/rows.csv"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
result = use_case.extract(session: session, headers: headers)
|
|
102
|
+
|
|
103
|
+
refute result.ok?
|
|
104
|
+
assert_equal :cannot_write_output_file, result.error
|
|
105
|
+
assert_equal "/tmp/rows.csv", result.data[:path]
|
|
106
|
+
assert_equal Errno::ENOENT, result.data[:error_class]
|
|
124
107
|
end
|
|
125
108
|
|
|
126
|
-
def
|
|
127
|
-
|
|
128
|
-
|
|
109
|
+
def test_extract_reports_out_of_bounds_via_stats
|
|
110
|
+
use_case = Csvtool::Application::UseCases::RunRowExtraction.new
|
|
111
|
+
session = build_session(file_path: fixture_path("sample_people.csv"), start_row: 10, end_row: 12)
|
|
112
|
+
headers = ["name", "city"]
|
|
129
113
|
|
|
130
|
-
|
|
131
|
-
output_path = File.join(dir, "rows.csv")
|
|
132
|
-
input = [fixture, "", "10", "12", "2", output_path].join("\n") + "\n"
|
|
133
|
-
|
|
134
|
-
use_case = Csvtool::Application::UseCases::RunRowExtraction.new(stdin: StringIO.new(input), stdout: out)
|
|
135
|
-
use_case.call
|
|
136
|
-
end
|
|
114
|
+
result = use_case.extract(session: session, headers: headers)
|
|
137
115
|
|
|
138
|
-
|
|
116
|
+
assert result.ok?
|
|
117
|
+
assert_equal false, result.data[:matched]
|
|
118
|
+
assert_equal 3, result.data[:row_count]
|
|
139
119
|
end
|
|
140
120
|
end
|