csvops 0.3.0.alpha → 0.5.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -149
- data/docs/architecture.md +396 -0
- data/docs/release-v0.4.0-alpha.md +87 -0
- data/docs/release-v0.5.0-alpha.md +89 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +96 -0
- data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
- data/lib/csvtool/cli.rb +11 -7
- data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
- data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +1 -0
- data/lib/csvtool/domain/row_session/row_source.rb +3 -0
- data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
- data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
- data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
- data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
- data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
- data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
- data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
- data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
- data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +141 -0
- data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
- data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
- data/test/csvtool/cli_test.rb +130 -16
- data/test/csvtool/cli_unit_test.rb +16 -3
- data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
- data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
- data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +2 -2
- data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +15 -1
- data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
- data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
- data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
- data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
- data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
- data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
- data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
- data/test/csvtool/interface/cli/menu_loop_test.rb +50 -13
- data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
- data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
- data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
- data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
- data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
- data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
- data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
- data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
- data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
- data/test/fixtures/dedupe_reference.csv +3 -0
- data/test/fixtures/dedupe_reference.tsv +3 -0
- data/test/fixtures/dedupe_reference_all.csv +5 -0
- data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
- data/test/fixtures/dedupe_reference_none.csv +2 -0
- data/test/fixtures/dedupe_reference_normalization.csv +3 -0
- data/test/fixtures/dedupe_source.csv +6 -0
- data/test/fixtures/dedupe_source.tsv +6 -0
- data/test/fixtures/dedupe_source_no_headers.csv +5 -0
- data/test/fixtures/dedupe_source_normalization.csv +4 -0
- metadata +93 -8
- data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +0 -31
- data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
- data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
- data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +0 -21
- data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Interface
|
|
7
|
+
module CLI
|
|
8
|
+
module Prompts
|
|
9
|
+
class DedupeKeySelectorPrompt
|
|
10
|
+
def initialize(stdin:, stdout:)
|
|
11
|
+
@stdin = stdin
|
|
12
|
+
@stdout = stdout
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def call(label:, headers_present:)
|
|
16
|
+
if headers_present
|
|
17
|
+
@stdout.print "#{label} key column name: "
|
|
18
|
+
else
|
|
19
|
+
@stdout.print "#{label} key column index (1-based): "
|
|
20
|
+
end
|
|
21
|
+
input = @stdin.gets&.strip.to_s
|
|
22
|
+
Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: headers_present, input: input)
|
|
23
|
+
rescue ArgumentError
|
|
24
|
+
nil
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -5,13 +5,15 @@ module Csvtool
|
|
|
5
5
|
module CLI
|
|
6
6
|
module Prompts
|
|
7
7
|
class FilePathPrompt
|
|
8
|
+
DEFAULT_LABEL = "CSV file path: "
|
|
9
|
+
|
|
8
10
|
def initialize(stdin:, stdout:)
|
|
9
11
|
@stdin = stdin
|
|
10
12
|
@stdout = stdout
|
|
11
13
|
end
|
|
12
14
|
|
|
13
|
-
def call
|
|
14
|
-
@stdout.print
|
|
15
|
+
def call(label: DEFAULT_LABEL)
|
|
16
|
+
@stdout.print label
|
|
15
17
|
@stdin.gets&.strip.to_s
|
|
16
18
|
end
|
|
17
19
|
end
|
|
@@ -5,13 +5,15 @@ module Csvtool
|
|
|
5
5
|
module CLI
|
|
6
6
|
module Prompts
|
|
7
7
|
class HeadersPresentPrompt
|
|
8
|
+
DEFAULT_LABEL = "Headers present? [Y/n]: "
|
|
9
|
+
|
|
8
10
|
def initialize(stdin:, stdout:)
|
|
9
11
|
@stdin = stdin
|
|
10
12
|
@stdout = stdout
|
|
11
13
|
end
|
|
12
14
|
|
|
13
|
-
def call
|
|
14
|
-
@stdout.print
|
|
15
|
+
def call(label: DEFAULT_LABEL)
|
|
16
|
+
@stdout.print label
|
|
15
17
|
answer = @stdin.gets&.strip.to_s.downcase
|
|
16
18
|
!%w[n no].include?(answer)
|
|
17
19
|
end
|
|
@@ -5,14 +5,16 @@ module Csvtool
|
|
|
5
5
|
module CLI
|
|
6
6
|
module Prompts
|
|
7
7
|
class SeparatorPrompt
|
|
8
|
+
DEFAULT_LABEL = "Choose separator:"
|
|
9
|
+
|
|
8
10
|
def initialize(stdin:, stdout:, errors:)
|
|
9
11
|
@stdin = stdin
|
|
10
12
|
@stdout = stdout
|
|
11
13
|
@errors = errors
|
|
12
14
|
end
|
|
13
15
|
|
|
14
|
-
def call
|
|
15
|
-
@stdout.puts
|
|
16
|
+
def call(label: DEFAULT_LABEL)
|
|
17
|
+
@stdout.puts label
|
|
16
18
|
@stdout.puts "1. comma (,)"
|
|
17
19
|
@stdout.puts "2. tab (\\t)"
|
|
18
20
|
@stdout.puts "3. semicolon (;)"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Prompts
|
|
7
|
+
class YesNoPrompt
|
|
8
|
+
def initialize(stdin:, stdout:)
|
|
9
|
+
@stdin = stdin
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(label:, default:)
|
|
14
|
+
@stdout.print label
|
|
15
|
+
answer = @stdin.gets&.strip.to_s.downcase
|
|
16
|
+
return default if answer.empty?
|
|
17
|
+
return true if %w[y yes].include?(answer)
|
|
18
|
+
return false if %w[n no].include?(answer)
|
|
19
|
+
|
|
20
|
+
default
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/column_session/separator"
|
|
4
|
+
require "csvtool/domain/column_session/csv_source"
|
|
5
|
+
require "csvtool/domain/column_session/column_selection"
|
|
6
|
+
require "csvtool/domain/column_session/extraction_options"
|
|
7
|
+
require "csvtool/domain/column_session/column_session"
|
|
8
|
+
|
|
9
|
+
module Csvtool
|
|
10
|
+
module Interface
|
|
11
|
+
module CLI
|
|
12
|
+
module Workflows
|
|
13
|
+
module Builders
|
|
14
|
+
class ColumnSessionBuilder
|
|
15
|
+
def call(file_path:, col_sep:, column_name:, skip_blanks:)
|
|
16
|
+
separator = Domain::ColumnSession::Separator.new(col_sep)
|
|
17
|
+
source = Domain::ColumnSession::CsvSource.new(path: file_path, separator: separator)
|
|
18
|
+
column_selection = Domain::ColumnSession::ColumnSelection.new(name: column_name)
|
|
19
|
+
options = Domain::ColumnSession::ExtractionOptions.new(skip_blanks: skip_blanks, preview_limit: 10)
|
|
20
|
+
|
|
21
|
+
Domain::ColumnSession::ColumnSession.start(
|
|
22
|
+
source: source,
|
|
23
|
+
column_selection: column_selection,
|
|
24
|
+
options: options
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/match_options"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
|
|
6
|
+
|
|
7
|
+
module Csvtool
|
|
8
|
+
module Interface
|
|
9
|
+
module CLI
|
|
10
|
+
module Workflows
|
|
11
|
+
module Builders
|
|
12
|
+
class CrossCsvDedupeSessionBuilder
|
|
13
|
+
def call(source:, reference:, source_selector:, reference_selector:, trim_whitespace:, case_insensitive:, destination:)
|
|
14
|
+
key_mapping = Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
15
|
+
source_selector: source_selector,
|
|
16
|
+
reference_selector: reference_selector
|
|
17
|
+
)
|
|
18
|
+
match_options = Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
19
|
+
trim_whitespace: trim_whitespace,
|
|
20
|
+
case_insensitive: case_insensitive
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
|
|
24
|
+
source: source,
|
|
25
|
+
reference: reference,
|
|
26
|
+
key_mapping: key_mapping,
|
|
27
|
+
match_options: match_options
|
|
28
|
+
).with_output_destination(destination)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/row_session/row_source"
|
|
4
|
+
require "csvtool/domain/row_session/row_session"
|
|
5
|
+
|
|
6
|
+
module Csvtool
|
|
7
|
+
module Interface
|
|
8
|
+
module CLI
|
|
9
|
+
module Workflows
|
|
10
|
+
module Builders
|
|
11
|
+
class RowExtractionSessionBuilder
|
|
12
|
+
def call(file_path:, col_sep:, row_range:, destination:)
|
|
13
|
+
source = Domain::RowSession::RowSource.new(path: file_path, separator: col_sep)
|
|
14
|
+
session = Domain::RowSession::RowSession.start(source: source, row_range: row_range)
|
|
15
|
+
session.with_output_destination(destination)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/row_randomization_session/randomization_source"
|
|
4
|
+
require "csvtool/domain/row_randomization_session/randomization_options"
|
|
5
|
+
require "csvtool/domain/row_randomization_session/randomization_session"
|
|
6
|
+
|
|
7
|
+
module Csvtool
|
|
8
|
+
module Interface
|
|
9
|
+
module CLI
|
|
10
|
+
module Workflows
|
|
11
|
+
module Builders
|
|
12
|
+
class RowRandomizationSessionBuilder
|
|
13
|
+
def call(file_path:, col_sep:, headers_present:, seed:, destination:)
|
|
14
|
+
source = Domain::RowRandomizationSession::RandomizationSource.new(
|
|
15
|
+
path: file_path,
|
|
16
|
+
separator: col_sep,
|
|
17
|
+
headers_present: headers_present
|
|
18
|
+
)
|
|
19
|
+
options = Domain::RowRandomizationSession::RandomizationOptions.new(seed: seed)
|
|
20
|
+
session = Domain::RowRandomizationSession::RandomizationSession.start(source: source, options: options)
|
|
21
|
+
session.with_output_destination(destination)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Presenters
|
|
8
|
+
class ColumnExtractionPresenter
|
|
9
|
+
def initialize(stdout:)
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def print_value(value)
|
|
14
|
+
@stdout.puts value
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def print_file_written(path)
|
|
18
|
+
@stdout.puts "Wrote output to #{path}"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Interface
|
|
7
|
+
module CLI
|
|
8
|
+
module Workflows
|
|
9
|
+
module Presenters
|
|
10
|
+
class CrossCsvDedupePresenter
|
|
11
|
+
def initialize(stdout:, col_sep:)
|
|
12
|
+
@stdout = stdout
|
|
13
|
+
@col_sep = col_sep
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def print_header(headers)
|
|
17
|
+
@stdout.puts
|
|
18
|
+
@stdout.puts ::CSV.generate_line(headers, row_sep: "", col_sep: @col_sep).chomp
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def print_row(fields)
|
|
22
|
+
@stdout.puts ::CSV.generate_line(fields, row_sep: "", col_sep: @col_sep).chomp
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def print_file_written(path)
|
|
26
|
+
@stdout.puts "Wrote output to #{path}"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def print_summary(stats)
|
|
30
|
+
@stdout.puts "Summary: source_rows=#{stats[:source_rows]} removed_rows=#{stats[:removed_rows]} kept_rows=#{stats[:kept_rows_count]}"
|
|
31
|
+
@stdout.puts "No rows removed; no matching keys found." if stats[:removed_rows].zero?
|
|
32
|
+
@stdout.puts "All source rows were removed by dedupe." if stats[:source_rows].positive? && stats[:kept_rows_count].zero?
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Interface
|
|
7
|
+
module CLI
|
|
8
|
+
module Workflows
|
|
9
|
+
module Presenters
|
|
10
|
+
class RowExtractionPresenter
|
|
11
|
+
def initialize(stdout:, headers:, col_sep:)
|
|
12
|
+
@stdout = stdout
|
|
13
|
+
@headers = headers
|
|
14
|
+
@col_sep = col_sep
|
|
15
|
+
@printed_header = false
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def print_row(fields)
|
|
19
|
+
unless @printed_header
|
|
20
|
+
@stdout.puts ::CSV.generate_line(@headers, row_sep: "", col_sep: @col_sep).chomp
|
|
21
|
+
@printed_header = true
|
|
22
|
+
end
|
|
23
|
+
@stdout.puts ::CSV.generate_line(fields, row_sep: "", col_sep: @col_sep).chomp
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def print_file_written(path)
|
|
27
|
+
@stdout.puts "Wrote output to #{path}"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Interface
|
|
7
|
+
module CLI
|
|
8
|
+
module Workflows
|
|
9
|
+
module Presenters
|
|
10
|
+
class RowRandomizationPresenter
|
|
11
|
+
def initialize(stdout:, headers:, col_sep:)
|
|
12
|
+
@stdout = stdout
|
|
13
|
+
@headers = headers
|
|
14
|
+
@col_sep = col_sep
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def print_console_start
|
|
18
|
+
@stdout.puts
|
|
19
|
+
@stdout.puts ::CSV.generate_line(@headers, row_sep: "", col_sep: @col_sep).chomp if @headers
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def print_row(fields)
|
|
23
|
+
@stdout.puts ::CSV.generate_line(fields, row_sep: "", col_sep: @col_sep).chomp
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def print_file_written(path)
|
|
27
|
+
@stdout.puts "Wrote output to #{path}"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/application/use_cases/run_cross_csv_dedupe"
|
|
4
|
+
require "csvtool/interface/cli/errors/presenter"
|
|
5
|
+
require "csvtool/interface/cli/prompts/file_path_prompt"
|
|
6
|
+
require "csvtool/interface/cli/prompts/separator_prompt"
|
|
7
|
+
require "csvtool/interface/cli/prompts/output_destination_prompt"
|
|
8
|
+
require "csvtool/interface/cli/prompts/headers_present_prompt"
|
|
9
|
+
require "csvtool/interface/cli/prompts/yes_no_prompt"
|
|
10
|
+
require "csvtool/interface/cli/prompts/dedupe_key_selector_prompt"
|
|
11
|
+
require "csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder"
|
|
12
|
+
require "csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter"
|
|
13
|
+
require "csvtool/interface/cli/workflows/support/output_destination_mapper"
|
|
14
|
+
require "csvtool/interface/cli/workflows/support/result_error_handler"
|
|
15
|
+
require "csvtool/interface/cli/workflows/steps/workflow_step_pipeline"
|
|
16
|
+
require "csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step"
|
|
17
|
+
require "csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step"
|
|
18
|
+
require "csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step"
|
|
19
|
+
require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
|
|
20
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
21
|
+
module Csvtool
|
|
22
|
+
module Interface
|
|
23
|
+
module CLI
|
|
24
|
+
module Workflows
|
|
25
|
+
class RunCrossCsvDedupeWorkflow
|
|
26
|
+
def initialize(stdin:, stdout:, use_case: Application::UseCases::RunCrossCsvDedupe.new)
|
|
27
|
+
@stdin = stdin
|
|
28
|
+
@stdout = stdout
|
|
29
|
+
@use_case = use_case
|
|
30
|
+
@errors = Interface::CLI::Errors::Presenter.new(stdout: stdout)
|
|
31
|
+
@session_builder = Builders::CrossCsvDedupeSessionBuilder.new
|
|
32
|
+
@output_destination_mapper = Support::OutputDestinationMapper.new
|
|
33
|
+
@result_error_handler = Support::ResultErrorHandler.new(errors: @errors)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def call
|
|
37
|
+
context = {
|
|
38
|
+
use_case: @use_case,
|
|
39
|
+
session_builder: @session_builder,
|
|
40
|
+
output_destination_mapper: @output_destination_mapper,
|
|
41
|
+
presenter_factory: ->(col_sep:) { Presenters::CrossCsvDedupePresenter.new(stdout: @stdout, col_sep: col_sep) },
|
|
42
|
+
handle_error: method(:handle_error)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
pipeline = Steps::WorkflowStepPipeline.new(steps: [
|
|
46
|
+
Steps::CrossCsvDedupe::CollectProfilesStep.new(
|
|
47
|
+
file_path_prompt: Interface::CLI::Prompts::FilePathPrompt.new(stdin: @stdin, stdout: @stdout),
|
|
48
|
+
separator_prompt: Interface::CLI::Prompts::SeparatorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors),
|
|
49
|
+
headers_present_prompt: Interface::CLI::Prompts::HeadersPresentPrompt.new(stdin: @stdin, stdout: @stdout),
|
|
50
|
+
errors: @errors
|
|
51
|
+
),
|
|
52
|
+
Steps::CrossCsvDedupe::CollectOptionsStep.new(
|
|
53
|
+
selector_prompt: Interface::CLI::Prompts::DedupeKeySelectorPrompt.new(stdin: @stdin, stdout: @stdout),
|
|
54
|
+
yes_no_prompt: Interface::CLI::Prompts::YesNoPrompt.new(stdin: @stdin, stdout: @stdout),
|
|
55
|
+
output_destination_prompt: Interface::CLI::Prompts::OutputDestinationPrompt.new(
|
|
56
|
+
stdin: @stdin,
|
|
57
|
+
stdout: @stdout,
|
|
58
|
+
errors: @errors
|
|
59
|
+
),
|
|
60
|
+
errors: @errors
|
|
61
|
+
),
|
|
62
|
+
Steps::CrossCsvDedupe::ExecuteStep.new
|
|
63
|
+
])
|
|
64
|
+
pipeline.call(context)
|
|
65
|
+
rescue ArgumentError => e
|
|
66
|
+
return @errors.empty_output_path if e.message == "file output path cannot be empty"
|
|
67
|
+
|
|
68
|
+
raise e
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
def handle_error(result)
|
|
74
|
+
@result_error_handler.call(result, {
|
|
75
|
+
column_not_found: ->(_r, errors) { errors.column_not_found },
|
|
76
|
+
could_not_parse_csv: ->(_r, errors) { errors.could_not_parse_csv },
|
|
77
|
+
cannot_read_file: ->(r, errors) { errors.cannot_read_file(r.data[:path]) },
|
|
78
|
+
cannot_write_output_file: ->(r, errors) { errors.cannot_write_output_file(r.data[:path], r.data[:error_class]) }
|
|
79
|
+
})
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/application/use_cases/run_extraction"
|
|
4
|
+
require "csvtool/interface/cli/errors/presenter"
|
|
5
|
+
require "csvtool/interface/cli/prompts/file_path_prompt"
|
|
6
|
+
require "csvtool/interface/cli/prompts/separator_prompt"
|
|
7
|
+
require "csvtool/interface/cli/prompts/column_selector_prompt"
|
|
8
|
+
require "csvtool/interface/cli/prompts/skip_blanks_prompt"
|
|
9
|
+
require "csvtool/interface/cli/prompts/confirm_prompt"
|
|
10
|
+
require "csvtool/interface/cli/prompts/output_destination_prompt"
|
|
11
|
+
require "csvtool/interface/cli/workflows/builders/column_session_builder"
|
|
12
|
+
require "csvtool/interface/cli/workflows/presenters/column_extraction_presenter"
|
|
13
|
+
require "csvtool/interface/cli/workflows/support/output_destination_mapper"
|
|
14
|
+
require "csvtool/interface/cli/workflows/support/result_error_handler"
|
|
15
|
+
require "csvtool/interface/cli/workflows/steps/workflow_step_pipeline"
|
|
16
|
+
require "csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step"
|
|
17
|
+
require "csvtool/interface/cli/workflows/steps/extraction/build_preview_step"
|
|
18
|
+
require "csvtool/interface/cli/workflows/steps/extraction/collect_destination_step"
|
|
19
|
+
require "csvtool/interface/cli/workflows/steps/extraction/execute_step"
|
|
20
|
+
|
|
21
|
+
module Csvtool
|
|
22
|
+
module Interface
|
|
23
|
+
module CLI
|
|
24
|
+
module Workflows
|
|
25
|
+
class RunExtractionWorkflow
|
|
26
|
+
def initialize(stdin:, stdout:, use_case: Application::UseCases::RunExtraction.new)
|
|
27
|
+
@stdin = stdin
|
|
28
|
+
@stdout = stdout
|
|
29
|
+
@use_case = use_case
|
|
30
|
+
@errors = Interface::CLI::Errors::Presenter.new(stdout: stdout)
|
|
31
|
+
@session_builder = Builders::ColumnSessionBuilder.new
|
|
32
|
+
@presenter = Presenters::ColumnExtractionPresenter.new(stdout: @stdout)
|
|
33
|
+
@output_destination_mapper = Support::OutputDestinationMapper.new
|
|
34
|
+
@result_error_handler = Support::ResultErrorHandler.new(errors: @errors)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def call
|
|
38
|
+
context = {
|
|
39
|
+
use_case: @use_case,
|
|
40
|
+
session_builder: @session_builder,
|
|
41
|
+
output_destination_mapper: @output_destination_mapper,
|
|
42
|
+
presenter: @presenter,
|
|
43
|
+
handle_error: method(:handle_error)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
pipeline = Steps::WorkflowStepPipeline.new(steps: [
|
|
47
|
+
Steps::Extraction::CollectInputsStep.new(
|
|
48
|
+
file_path_prompt: Interface::CLI::Prompts::FilePathPrompt.new(stdin: @stdin, stdout: @stdout),
|
|
49
|
+
separator_prompt: Interface::CLI::Prompts::SeparatorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors),
|
|
50
|
+
column_selector_prompt: Interface::CLI::Prompts::ColumnSelectorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors),
|
|
51
|
+
skip_blanks_prompt: Interface::CLI::Prompts::SkipBlanksPrompt.new(stdin: @stdin, stdout: @stdout)
|
|
52
|
+
),
|
|
53
|
+
Steps::Extraction::BuildPreviewStep.new(
|
|
54
|
+
confirm_prompt: Interface::CLI::Prompts::ConfirmPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors)
|
|
55
|
+
),
|
|
56
|
+
Steps::Extraction::CollectDestinationStep.new(
|
|
57
|
+
output_destination_prompt: Interface::CLI::Prompts::OutputDestinationPrompt.new(
|
|
58
|
+
stdin: @stdin,
|
|
59
|
+
stdout: @stdout,
|
|
60
|
+
errors: @errors
|
|
61
|
+
)
|
|
62
|
+
),
|
|
63
|
+
Steps::Extraction::ExecuteStep.new
|
|
64
|
+
])
|
|
65
|
+
pipeline.call(context)
|
|
66
|
+
rescue ArgumentError => e
|
|
67
|
+
return @errors.empty_output_path if e.message == "file output path cannot be empty"
|
|
68
|
+
|
|
69
|
+
raise e
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
private
|
|
73
|
+
|
|
74
|
+
def handle_error(result)
|
|
75
|
+
@result_error_handler.call(result, {
|
|
76
|
+
file_not_found: ->(r, errors) { errors.file_not_found(r.data[:path]) },
|
|
77
|
+
no_headers: ->(_r, errors) { errors.no_headers },
|
|
78
|
+
column_not_found: ->(_r, errors) { errors.column_not_found },
|
|
79
|
+
could_not_parse_csv: ->(_r, errors) { errors.could_not_parse_csv },
|
|
80
|
+
cannot_read_file: ->(r, errors) { errors.cannot_read_file(r.data[:path]) },
|
|
81
|
+
cannot_write_output_file: ->(r, errors) { errors.cannot_write_output_file(r.data[:path], r.data[:error_class]) }
|
|
82
|
+
})
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/application/use_cases/run_row_extraction"
|
|
4
|
+
require "csvtool/interface/cli/errors/presenter"
|
|
5
|
+
require "csvtool/interface/cli/prompts/file_path_prompt"
|
|
6
|
+
require "csvtool/interface/cli/prompts/separator_prompt"
|
|
7
|
+
require "csvtool/interface/cli/prompts/output_destination_prompt"
|
|
8
|
+
require "csvtool/interface/cli/workflows/builders/row_extraction_session_builder"
|
|
9
|
+
require "csvtool/interface/cli/workflows/presenters/row_extraction_presenter"
|
|
10
|
+
require "csvtool/interface/cli/workflows/support/output_destination_mapper"
|
|
11
|
+
require "csvtool/interface/cli/workflows/support/result_error_handler"
|
|
12
|
+
require "csvtool/interface/cli/workflows/steps/workflow_step_pipeline"
|
|
13
|
+
require "csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step"
|
|
14
|
+
require "csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step"
|
|
15
|
+
require "csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step"
|
|
16
|
+
require "csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step"
|
|
17
|
+
require "csvtool/interface/cli/workflows/steps/row_extraction/execute_step"
|
|
18
|
+
require "csvtool/domain/row_session/row_range"
|
|
19
|
+
module Csvtool
|
|
20
|
+
module Interface
|
|
21
|
+
module CLI
|
|
22
|
+
module Workflows
|
|
23
|
+
class RunRowExtractionWorkflow
|
|
24
|
+
def initialize(stdin:, stdout:, use_case: Application::UseCases::RunRowExtraction.new)
|
|
25
|
+
@stdin = stdin
|
|
26
|
+
@stdout = stdout
|
|
27
|
+
@use_case = use_case
|
|
28
|
+
@errors = Interface::CLI::Errors::Presenter.new(stdout: stdout)
|
|
29
|
+
@session_builder = Builders::RowExtractionSessionBuilder.new
|
|
30
|
+
@output_destination_mapper = Support::OutputDestinationMapper.new
|
|
31
|
+
@result_error_handler = Support::ResultErrorHandler.new(errors: @errors)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def call
|
|
35
|
+
context = {
|
|
36
|
+
use_case: @use_case,
|
|
37
|
+
session_builder: @session_builder,
|
|
38
|
+
output_destination_mapper: @output_destination_mapper,
|
|
39
|
+
handle_error: method(:handle_error)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
pipeline = Steps::WorkflowStepPipeline.new(steps: [
|
|
43
|
+
Steps::RowExtraction::CollectSourceStep.new(
|
|
44
|
+
file_path_prompt: Interface::CLI::Prompts::FilePathPrompt.new(stdin: @stdin, stdout: @stdout),
|
|
45
|
+
separator_prompt: Interface::CLI::Prompts::SeparatorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors)
|
|
46
|
+
),
|
|
47
|
+
Steps::RowExtraction::ReadHeadersStep.new,
|
|
48
|
+
Steps::RowExtraction::CollectRangeStep.new(stdin: @stdin, stdout: @stdout),
|
|
49
|
+
Steps::RowExtraction::CollectDestinationStep.new(
|
|
50
|
+
output_destination_prompt: Interface::CLI::Prompts::OutputDestinationPrompt.new(
|
|
51
|
+
stdin: @stdin,
|
|
52
|
+
stdout: @stdout,
|
|
53
|
+
errors: @errors
|
|
54
|
+
)
|
|
55
|
+
),
|
|
56
|
+
Steps::RowExtraction::ExecuteStep.new(stdout: @stdout, errors: @errors)
|
|
57
|
+
])
|
|
58
|
+
pipeline.call(context)
|
|
59
|
+
rescue Domain::RowSession::InvalidStartRowError
|
|
60
|
+
@errors.invalid_start_row
|
|
61
|
+
rescue Domain::RowSession::InvalidEndRowError
|
|
62
|
+
@errors.invalid_end_row
|
|
63
|
+
rescue Domain::RowSession::InvalidRowRangeOrderError
|
|
64
|
+
@errors.invalid_row_range_order
|
|
65
|
+
rescue ArgumentError => e
|
|
66
|
+
return @errors.empty_output_path if e.message == "file output path cannot be empty"
|
|
67
|
+
|
|
68
|
+
raise e
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
def handle_error(result)
|
|
74
|
+
@result_error_handler.call(result, {
|
|
75
|
+
file_not_found: ->(r, errors) { errors.file_not_found(r.data[:path]) },
|
|
76
|
+
no_headers: ->(_r, errors) { errors.no_headers },
|
|
77
|
+
could_not_parse_csv: ->(_r, errors) { errors.could_not_parse_csv },
|
|
78
|
+
cannot_read_file: ->(r, errors) { errors.cannot_read_file(r.data[:path]) },
|
|
79
|
+
cannot_write_output_file: ->(r, errors) { errors.cannot_write_output_file(r.data[:path], r.data[:error_class]) }
|
|
80
|
+
})
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|