csvops 0.4.0.alpha → 0.5.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -9
- data/docs/architecture.md +148 -18
- data/docs/release-v0.5.0-alpha.md +89 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +17 -14
- data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
- data/lib/csvtool/cli.rb +6 -6
- data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
- data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
- data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +48 -125
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
- data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
- data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +28 -0
- data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
- data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
- data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
- data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
- data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
- data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
- data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
- data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
- data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
- data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
- data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
- data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
- metadata +60 -1
|
@@ -5,13 +5,15 @@ module Csvtool
|
|
|
5
5
|
module CLI
|
|
6
6
|
module Prompts
|
|
7
7
|
class FilePathPrompt
|
|
8
|
+
DEFAULT_LABEL = "CSV file path: "
|
|
9
|
+
|
|
8
10
|
def initialize(stdin:, stdout:)
|
|
9
11
|
@stdin = stdin
|
|
10
12
|
@stdout = stdout
|
|
11
13
|
end
|
|
12
14
|
|
|
13
|
-
def call
|
|
14
|
-
@stdout.print
|
|
15
|
+
def call(label: DEFAULT_LABEL)
|
|
16
|
+
@stdout.print label
|
|
15
17
|
@stdin.gets&.strip.to_s
|
|
16
18
|
end
|
|
17
19
|
end
|
|
@@ -5,13 +5,15 @@ module Csvtool
|
|
|
5
5
|
module CLI
|
|
6
6
|
module Prompts
|
|
7
7
|
class HeadersPresentPrompt
|
|
8
|
+
DEFAULT_LABEL = "Headers present? [Y/n]: "
|
|
9
|
+
|
|
8
10
|
def initialize(stdin:, stdout:)
|
|
9
11
|
@stdin = stdin
|
|
10
12
|
@stdout = stdout
|
|
11
13
|
end
|
|
12
14
|
|
|
13
|
-
def call
|
|
14
|
-
@stdout.print
|
|
15
|
+
def call(label: DEFAULT_LABEL)
|
|
16
|
+
@stdout.print label
|
|
15
17
|
answer = @stdin.gets&.strip.to_s.downcase
|
|
16
18
|
!%w[n no].include?(answer)
|
|
17
19
|
end
|
|
@@ -5,14 +5,16 @@ module Csvtool
|
|
|
5
5
|
module CLI
|
|
6
6
|
module Prompts
|
|
7
7
|
class SeparatorPrompt
|
|
8
|
+
DEFAULT_LABEL = "Choose separator:"
|
|
9
|
+
|
|
8
10
|
def initialize(stdin:, stdout:, errors:)
|
|
9
11
|
@stdin = stdin
|
|
10
12
|
@stdout = stdout
|
|
11
13
|
@errors = errors
|
|
12
14
|
end
|
|
13
15
|
|
|
14
|
-
def call
|
|
15
|
-
@stdout.puts
|
|
16
|
+
def call(label: DEFAULT_LABEL)
|
|
17
|
+
@stdout.puts label
|
|
16
18
|
@stdout.puts "1. comma (,)"
|
|
17
19
|
@stdout.puts "2. tab (\\t)"
|
|
18
20
|
@stdout.puts "3. semicolon (;)"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Prompts
|
|
7
|
+
class YesNoPrompt
|
|
8
|
+
def initialize(stdin:, stdout:)
|
|
9
|
+
@stdin = stdin
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(label:, default:)
|
|
14
|
+
@stdout.print label
|
|
15
|
+
answer = @stdin.gets&.strip.to_s.downcase
|
|
16
|
+
return default if answer.empty?
|
|
17
|
+
return true if %w[y yes].include?(answer)
|
|
18
|
+
return false if %w[n no].include?(answer)
|
|
19
|
+
|
|
20
|
+
default
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/column_session/separator"
|
|
4
|
+
require "csvtool/domain/column_session/csv_source"
|
|
5
|
+
require "csvtool/domain/column_session/column_selection"
|
|
6
|
+
require "csvtool/domain/column_session/extraction_options"
|
|
7
|
+
require "csvtool/domain/column_session/column_session"
|
|
8
|
+
|
|
9
|
+
module Csvtool
|
|
10
|
+
module Interface
|
|
11
|
+
module CLI
|
|
12
|
+
module Workflows
|
|
13
|
+
module Builders
|
|
14
|
+
class ColumnSessionBuilder
|
|
15
|
+
def call(file_path:, col_sep:, column_name:, skip_blanks:)
|
|
16
|
+
separator = Domain::ColumnSession::Separator.new(col_sep)
|
|
17
|
+
source = Domain::ColumnSession::CsvSource.new(path: file_path, separator: separator)
|
|
18
|
+
column_selection = Domain::ColumnSession::ColumnSelection.new(name: column_name)
|
|
19
|
+
options = Domain::ColumnSession::ExtractionOptions.new(skip_blanks: skip_blanks, preview_limit: 10)
|
|
20
|
+
|
|
21
|
+
Domain::ColumnSession::ColumnSession.start(
|
|
22
|
+
source: source,
|
|
23
|
+
column_selection: column_selection,
|
|
24
|
+
options: options
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/match_options"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
|
|
6
|
+
|
|
7
|
+
module Csvtool
|
|
8
|
+
module Interface
|
|
9
|
+
module CLI
|
|
10
|
+
module Workflows
|
|
11
|
+
module Builders
|
|
12
|
+
class CrossCsvDedupeSessionBuilder
|
|
13
|
+
def call(source:, reference:, source_selector:, reference_selector:, trim_whitespace:, case_insensitive:, destination:)
|
|
14
|
+
key_mapping = Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
15
|
+
source_selector: source_selector,
|
|
16
|
+
reference_selector: reference_selector
|
|
17
|
+
)
|
|
18
|
+
match_options = Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
19
|
+
trim_whitespace: trim_whitespace,
|
|
20
|
+
case_insensitive: case_insensitive
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
|
|
24
|
+
source: source,
|
|
25
|
+
reference: reference,
|
|
26
|
+
key_mapping: key_mapping,
|
|
27
|
+
match_options: match_options
|
|
28
|
+
).with_output_destination(destination)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/row_session/row_source"
|
|
4
|
+
require "csvtool/domain/row_session/row_session"
|
|
5
|
+
|
|
6
|
+
module Csvtool
|
|
7
|
+
module Interface
|
|
8
|
+
module CLI
|
|
9
|
+
module Workflows
|
|
10
|
+
module Builders
|
|
11
|
+
class RowExtractionSessionBuilder
|
|
12
|
+
def call(file_path:, col_sep:, row_range:, destination:)
|
|
13
|
+
source = Domain::RowSession::RowSource.new(path: file_path, separator: col_sep)
|
|
14
|
+
session = Domain::RowSession::RowSession.start(source: source, row_range: row_range)
|
|
15
|
+
session.with_output_destination(destination)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/row_randomization_session/randomization_source"
|
|
4
|
+
require "csvtool/domain/row_randomization_session/randomization_options"
|
|
5
|
+
require "csvtool/domain/row_randomization_session/randomization_session"
|
|
6
|
+
|
|
7
|
+
module Csvtool
|
|
8
|
+
module Interface
|
|
9
|
+
module CLI
|
|
10
|
+
module Workflows
|
|
11
|
+
module Builders
|
|
12
|
+
class RowRandomizationSessionBuilder
|
|
13
|
+
def call(file_path:, col_sep:, headers_present:, seed:, destination:)
|
|
14
|
+
source = Domain::RowRandomizationSession::RandomizationSource.new(
|
|
15
|
+
path: file_path,
|
|
16
|
+
separator: col_sep,
|
|
17
|
+
headers_present: headers_present
|
|
18
|
+
)
|
|
19
|
+
options = Domain::RowRandomizationSession::RandomizationOptions.new(seed: seed)
|
|
20
|
+
session = Domain::RowRandomizationSession::RandomizationSession.start(source: source, options: options)
|
|
21
|
+
session.with_output_destination(destination)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Presenters
|
|
8
|
+
class ColumnExtractionPresenter
|
|
9
|
+
def initialize(stdout:)
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def print_value(value)
|
|
14
|
+
@stdout.puts value
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def print_file_written(path)
|
|
18
|
+
@stdout.puts "Wrote output to #{path}"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Interface
|
|
7
|
+
module CLI
|
|
8
|
+
module Workflows
|
|
9
|
+
module Presenters
|
|
10
|
+
class CrossCsvDedupePresenter
|
|
11
|
+
def initialize(stdout:, col_sep:)
|
|
12
|
+
@stdout = stdout
|
|
13
|
+
@col_sep = col_sep
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def print_header(headers)
|
|
17
|
+
@stdout.puts
|
|
18
|
+
@stdout.puts ::CSV.generate_line(headers, row_sep: "", col_sep: @col_sep).chomp
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def print_row(fields)
|
|
22
|
+
@stdout.puts ::CSV.generate_line(fields, row_sep: "", col_sep: @col_sep).chomp
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def print_file_written(path)
|
|
26
|
+
@stdout.puts "Wrote output to #{path}"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def print_summary(stats)
|
|
30
|
+
@stdout.puts "Summary: source_rows=#{stats[:source_rows]} removed_rows=#{stats[:removed_rows]} kept_rows=#{stats[:kept_rows_count]}"
|
|
31
|
+
@stdout.puts "No rows removed; no matching keys found." if stats[:removed_rows].zero?
|
|
32
|
+
@stdout.puts "All source rows were removed by dedupe." if stats[:source_rows].positive? && stats[:kept_rows_count].zero?
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Interface
|
|
7
|
+
module CLI
|
|
8
|
+
module Workflows
|
|
9
|
+
module Presenters
|
|
10
|
+
class RowExtractionPresenter
|
|
11
|
+
def initialize(stdout:, headers:, col_sep:)
|
|
12
|
+
@stdout = stdout
|
|
13
|
+
@headers = headers
|
|
14
|
+
@col_sep = col_sep
|
|
15
|
+
@printed_header = false
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def print_row(fields)
|
|
19
|
+
unless @printed_header
|
|
20
|
+
@stdout.puts ::CSV.generate_line(@headers, row_sep: "", col_sep: @col_sep).chomp
|
|
21
|
+
@printed_header = true
|
|
22
|
+
end
|
|
23
|
+
@stdout.puts ::CSV.generate_line(fields, row_sep: "", col_sep: @col_sep).chomp
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def print_file_written(path)
|
|
27
|
+
@stdout.puts "Wrote output to #{path}"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Interface
|
|
7
|
+
module CLI
|
|
8
|
+
module Workflows
|
|
9
|
+
module Presenters
|
|
10
|
+
class RowRandomizationPresenter
|
|
11
|
+
def initialize(stdout:, headers:, col_sep:)
|
|
12
|
+
@stdout = stdout
|
|
13
|
+
@headers = headers
|
|
14
|
+
@col_sep = col_sep
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def print_console_start
|
|
18
|
+
@stdout.puts
|
|
19
|
+
@stdout.puts ::CSV.generate_line(@headers, row_sep: "", col_sep: @col_sep).chomp if @headers
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def print_row(fields)
|
|
23
|
+
@stdout.puts ::CSV.generate_line(fields, row_sep: "", col_sep: @col_sep).chomp
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def print_file_written(path)
|
|
27
|
+
@stdout.puts "Wrote output to #{path}"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -1,18 +1,23 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "csv"
|
|
4
3
|
require "csvtool/application/use_cases/run_cross_csv_dedupe"
|
|
5
4
|
require "csvtool/interface/cli/errors/presenter"
|
|
6
5
|
require "csvtool/interface/cli/prompts/file_path_prompt"
|
|
7
6
|
require "csvtool/interface/cli/prompts/separator_prompt"
|
|
8
7
|
require "csvtool/interface/cli/prompts/output_destination_prompt"
|
|
8
|
+
require "csvtool/interface/cli/prompts/headers_present_prompt"
|
|
9
|
+
require "csvtool/interface/cli/prompts/yes_no_prompt"
|
|
10
|
+
require "csvtool/interface/cli/prompts/dedupe_key_selector_prompt"
|
|
11
|
+
require "csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder"
|
|
12
|
+
require "csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter"
|
|
13
|
+
require "csvtool/interface/cli/workflows/support/output_destination_mapper"
|
|
14
|
+
require "csvtool/interface/cli/workflows/support/result_error_handler"
|
|
15
|
+
require "csvtool/interface/cli/workflows/steps/workflow_step_pipeline"
|
|
16
|
+
require "csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step"
|
|
17
|
+
require "csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step"
|
|
18
|
+
require "csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step"
|
|
9
19
|
require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
|
|
10
20
|
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
11
|
-
require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
|
|
12
|
-
require "csvtool/domain/cross_csv_dedupe_session/match_options"
|
|
13
|
-
require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
|
|
14
|
-
require "csvtool/domain/shared/output_destination"
|
|
15
|
-
|
|
16
21
|
module Csvtool
|
|
17
22
|
module Interface
|
|
18
23
|
module CLI
|
|
@@ -23,89 +28,40 @@ module Csvtool
|
|
|
23
28
|
@stdout = stdout
|
|
24
29
|
@use_case = use_case
|
|
25
30
|
@errors = Interface::CLI::Errors::Presenter.new(stdout: stdout)
|
|
31
|
+
@session_builder = Builders::CrossCsvDedupeSessionBuilder.new
|
|
32
|
+
@output_destination_mapper = Support::OutputDestinationMapper.new
|
|
33
|
+
@result_error_handler = Support::ResultErrorHandler.new(errors: @errors)
|
|
26
34
|
end
|
|
27
35
|
|
|
28
36
|
def call
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
source_selector = prompt_selector("Source", source.headers_present?)
|
|
59
|
-
return @errors.column_not_found if source_selector.nil?
|
|
60
|
-
reference_selector = prompt_selector("Reference", reference.headers_present?)
|
|
61
|
-
return @errors.column_not_found if reference_selector.nil?
|
|
62
|
-
|
|
63
|
-
@stdout.print "Trim whitespace before matching? [Y/n]: "
|
|
64
|
-
trim_whitespace = read_yes_no(default: true)
|
|
65
|
-
@stdout.print "Case-insensitive matching? [y/N]: "
|
|
66
|
-
case_insensitive = read_yes_no(default: false)
|
|
67
|
-
|
|
68
|
-
key_mapping = Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
69
|
-
source_selector: source_selector,
|
|
70
|
-
reference_selector: reference_selector
|
|
71
|
-
)
|
|
72
|
-
match_options = Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
73
|
-
trim_whitespace: trim_whitespace,
|
|
74
|
-
case_insensitive: case_insensitive
|
|
75
|
-
)
|
|
76
|
-
session = Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
|
|
77
|
-
source: source,
|
|
78
|
-
reference: reference,
|
|
79
|
-
key_mapping: key_mapping,
|
|
80
|
-
match_options: match_options
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
output_destination = Interface::CLI::Prompts::OutputDestinationPrompt.new(
|
|
84
|
-
stdin: @stdin,
|
|
85
|
-
stdout: @stdout,
|
|
86
|
-
errors: @errors
|
|
87
|
-
).call
|
|
88
|
-
return if output_destination.nil?
|
|
89
|
-
session = session.with_output_destination(
|
|
90
|
-
if output_destination[:mode] == :file
|
|
91
|
-
Domain::Shared::OutputDestination.file(path: output_destination[:path])
|
|
92
|
-
else
|
|
93
|
-
Domain::Shared::OutputDestination.console
|
|
94
|
-
end
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
result = @use_case.call(
|
|
98
|
-
session: session,
|
|
99
|
-
on_header: ->(headers) { print_header(headers, col_sep: session.source.separator) },
|
|
100
|
-
on_row: ->(fields) { print_row(fields, col_sep: session.source.separator) }
|
|
101
|
-
)
|
|
102
|
-
return handle_error(result) unless result.ok?
|
|
103
|
-
|
|
104
|
-
@stdout.puts "Wrote output to #{result.data[:output_path]}" if session.output_destination.file?
|
|
105
|
-
stats = result.data[:stats]
|
|
106
|
-
@stdout.puts "Summary: source_rows=#{stats[:source_rows]} removed_rows=#{stats[:removed_rows]} kept_rows=#{stats[:kept_rows_count]}"
|
|
107
|
-
@stdout.puts "No rows removed; no matching keys found." if stats[:removed_rows].zero?
|
|
108
|
-
@stdout.puts "All source rows were removed by dedupe." if stats[:source_rows].positive? && stats[:kept_rows_count].zero?
|
|
37
|
+
context = {
|
|
38
|
+
use_case: @use_case,
|
|
39
|
+
session_builder: @session_builder,
|
|
40
|
+
output_destination_mapper: @output_destination_mapper,
|
|
41
|
+
presenter_factory: ->(col_sep:) { Presenters::CrossCsvDedupePresenter.new(stdout: @stdout, col_sep: col_sep) },
|
|
42
|
+
handle_error: method(:handle_error)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
pipeline = Steps::WorkflowStepPipeline.new(steps: [
|
|
46
|
+
Steps::CrossCsvDedupe::CollectProfilesStep.new(
|
|
47
|
+
file_path_prompt: Interface::CLI::Prompts::FilePathPrompt.new(stdin: @stdin, stdout: @stdout),
|
|
48
|
+
separator_prompt: Interface::CLI::Prompts::SeparatorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors),
|
|
49
|
+
headers_present_prompt: Interface::CLI::Prompts::HeadersPresentPrompt.new(stdin: @stdin, stdout: @stdout),
|
|
50
|
+
errors: @errors
|
|
51
|
+
),
|
|
52
|
+
Steps::CrossCsvDedupe::CollectOptionsStep.new(
|
|
53
|
+
selector_prompt: Interface::CLI::Prompts::DedupeKeySelectorPrompt.new(stdin: @stdin, stdout: @stdout),
|
|
54
|
+
yes_no_prompt: Interface::CLI::Prompts::YesNoPrompt.new(stdin: @stdin, stdout: @stdout),
|
|
55
|
+
output_destination_prompt: Interface::CLI::Prompts::OutputDestinationPrompt.new(
|
|
56
|
+
stdin: @stdin,
|
|
57
|
+
stdout: @stdout,
|
|
58
|
+
errors: @errors
|
|
59
|
+
),
|
|
60
|
+
errors: @errors
|
|
61
|
+
),
|
|
62
|
+
Steps::CrossCsvDedupe::ExecuteStep.new
|
|
63
|
+
])
|
|
64
|
+
pipeline.call(context)
|
|
109
65
|
rescue ArgumentError => e
|
|
110
66
|
return @errors.empty_output_path if e.message == "file output path cannot be empty"
|
|
111
67
|
|
|
@@ -114,48 +70,15 @@ module Csvtool
|
|
|
114
70
|
|
|
115
71
|
private
|
|
116
72
|
|
|
117
|
-
def prompt_selector(label, headers_present)
|
|
118
|
-
if headers_present
|
|
119
|
-
@stdout.print "#{label} key column name: "
|
|
120
|
-
else
|
|
121
|
-
@stdout.print "#{label} key column index (1-based): "
|
|
122
|
-
end
|
|
123
|
-
input = @stdin.gets&.strip.to_s
|
|
124
|
-
Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: headers_present, input: input)
|
|
125
|
-
rescue ArgumentError
|
|
126
|
-
nil
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
def print_header(headers, col_sep:)
|
|
130
|
-
@stdout.puts
|
|
131
|
-
@stdout.puts ::CSV.generate_line(headers, row_sep: "", col_sep: col_sep).chomp
|
|
132
|
-
end
|
|
133
|
-
|
|
134
|
-
def print_row(fields, col_sep:)
|
|
135
|
-
@stdout.puts ::CSV.generate_line(fields, row_sep: "", col_sep: col_sep).chomp
|
|
136
|
-
end
|
|
137
|
-
|
|
138
73
|
def handle_error(result)
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
@errors.cannot_read_file(result.data[:path])
|
|
146
|
-
when :cannot_write_output_file
|
|
147
|
-
@errors.cannot_write_output_file(result.data[:path], result.data[:error_class])
|
|
148
|
-
end
|
|
74
|
+
@result_error_handler.call(result, {
|
|
75
|
+
column_not_found: ->(_r, errors) { errors.column_not_found },
|
|
76
|
+
could_not_parse_csv: ->(_r, errors) { errors.could_not_parse_csv },
|
|
77
|
+
cannot_read_file: ->(r, errors) { errors.cannot_read_file(r.data[:path]) },
|
|
78
|
+
cannot_write_output_file: ->(r, errors) { errors.cannot_write_output_file(r.data[:path], r.data[:error_class]) }
|
|
79
|
+
})
|
|
149
80
|
end
|
|
150
81
|
|
|
151
|
-
def read_yes_no(default:)
|
|
152
|
-
answer = @stdin.gets&.strip.to_s.downcase
|
|
153
|
-
return default if answer.empty?
|
|
154
|
-
return true if %w[y yes].include?(answer)
|
|
155
|
-
return false if %w[n no].include?(answer)
|
|
156
|
-
|
|
157
|
-
default
|
|
158
|
-
end
|
|
159
82
|
end
|
|
160
83
|
end
|
|
161
84
|
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/application/use_cases/run_extraction"
|
|
4
|
+
require "csvtool/interface/cli/errors/presenter"
|
|
5
|
+
require "csvtool/interface/cli/prompts/file_path_prompt"
|
|
6
|
+
require "csvtool/interface/cli/prompts/separator_prompt"
|
|
7
|
+
require "csvtool/interface/cli/prompts/column_selector_prompt"
|
|
8
|
+
require "csvtool/interface/cli/prompts/skip_blanks_prompt"
|
|
9
|
+
require "csvtool/interface/cli/prompts/confirm_prompt"
|
|
10
|
+
require "csvtool/interface/cli/prompts/output_destination_prompt"
|
|
11
|
+
require "csvtool/interface/cli/workflows/builders/column_session_builder"
|
|
12
|
+
require "csvtool/interface/cli/workflows/presenters/column_extraction_presenter"
|
|
13
|
+
require "csvtool/interface/cli/workflows/support/output_destination_mapper"
|
|
14
|
+
require "csvtool/interface/cli/workflows/support/result_error_handler"
|
|
15
|
+
require "csvtool/interface/cli/workflows/steps/workflow_step_pipeline"
|
|
16
|
+
require "csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step"
|
|
17
|
+
require "csvtool/interface/cli/workflows/steps/extraction/build_preview_step"
|
|
18
|
+
require "csvtool/interface/cli/workflows/steps/extraction/collect_destination_step"
|
|
19
|
+
require "csvtool/interface/cli/workflows/steps/extraction/execute_step"
|
|
20
|
+
|
|
21
|
+
module Csvtool
|
|
22
|
+
module Interface
|
|
23
|
+
module CLI
|
|
24
|
+
module Workflows
|
|
25
|
+
class RunExtractionWorkflow
|
|
26
|
+
def initialize(stdin:, stdout:, use_case: Application::UseCases::RunExtraction.new)
|
|
27
|
+
@stdin = stdin
|
|
28
|
+
@stdout = stdout
|
|
29
|
+
@use_case = use_case
|
|
30
|
+
@errors = Interface::CLI::Errors::Presenter.new(stdout: stdout)
|
|
31
|
+
@session_builder = Builders::ColumnSessionBuilder.new
|
|
32
|
+
@presenter = Presenters::ColumnExtractionPresenter.new(stdout: @stdout)
|
|
33
|
+
@output_destination_mapper = Support::OutputDestinationMapper.new
|
|
34
|
+
@result_error_handler = Support::ResultErrorHandler.new(errors: @errors)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def call
|
|
38
|
+
context = {
|
|
39
|
+
use_case: @use_case,
|
|
40
|
+
session_builder: @session_builder,
|
|
41
|
+
output_destination_mapper: @output_destination_mapper,
|
|
42
|
+
presenter: @presenter,
|
|
43
|
+
handle_error: method(:handle_error)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
pipeline = Steps::WorkflowStepPipeline.new(steps: [
|
|
47
|
+
Steps::Extraction::CollectInputsStep.new(
|
|
48
|
+
file_path_prompt: Interface::CLI::Prompts::FilePathPrompt.new(stdin: @stdin, stdout: @stdout),
|
|
49
|
+
separator_prompt: Interface::CLI::Prompts::SeparatorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors),
|
|
50
|
+
column_selector_prompt: Interface::CLI::Prompts::ColumnSelectorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors),
|
|
51
|
+
skip_blanks_prompt: Interface::CLI::Prompts::SkipBlanksPrompt.new(stdin: @stdin, stdout: @stdout)
|
|
52
|
+
),
|
|
53
|
+
Steps::Extraction::BuildPreviewStep.new(
|
|
54
|
+
confirm_prompt: Interface::CLI::Prompts::ConfirmPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors)
|
|
55
|
+
),
|
|
56
|
+
Steps::Extraction::CollectDestinationStep.new(
|
|
57
|
+
output_destination_prompt: Interface::CLI::Prompts::OutputDestinationPrompt.new(
|
|
58
|
+
stdin: @stdin,
|
|
59
|
+
stdout: @stdout,
|
|
60
|
+
errors: @errors
|
|
61
|
+
)
|
|
62
|
+
),
|
|
63
|
+
Steps::Extraction::ExecuteStep.new
|
|
64
|
+
])
|
|
65
|
+
pipeline.call(context)
|
|
66
|
+
rescue ArgumentError => e
|
|
67
|
+
return @errors.empty_output_path if e.message == "file output path cannot be empty"
|
|
68
|
+
|
|
69
|
+
raise e
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
private
|
|
73
|
+
|
|
74
|
+
def handle_error(result)
|
|
75
|
+
@result_error_handler.call(result, {
|
|
76
|
+
file_not_found: ->(r, errors) { errors.file_not_found(r.data[:path]) },
|
|
77
|
+
no_headers: ->(_r, errors) { errors.no_headers },
|
|
78
|
+
column_not_found: ->(_r, errors) { errors.column_not_found },
|
|
79
|
+
could_not_parse_csv: ->(_r, errors) { errors.could_not_parse_csv },
|
|
80
|
+
cannot_read_file: ->(r, errors) { errors.cannot_read_file(r.data[:path]) },
|
|
81
|
+
cannot_write_output_file: ->(r, errors) { errors.cannot_write_output_file(r.data[:path], r.data[:error_class]) }
|
|
82
|
+
})
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|