csvops 0.5.0.alpha → 0.7.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +88 -7
- data/docs/architecture.md +119 -5
- data/docs/release-v0.6.0-alpha.md +84 -0
- data/docs/release-v0.7.0-alpha.md +87 -0
- data/lib/csvtool/application/use_cases/run_csv_parity.rb +70 -0
- data/lib/csvtool/application/use_cases/run_csv_split.rb +97 -0
- data/lib/csvtool/cli.rb +9 -1
- data/lib/csvtool/domain/csv_parity_session/parity_options.rb +22 -0
- data/lib/csvtool/domain/csv_parity_session/parity_session.rb +20 -0
- data/lib/csvtool/domain/csv_parity_session/source_pair.rb +19 -0
- data/lib/csvtool/domain/csv_split_session/split_options.rb +27 -0
- data/lib/csvtool/domain/csv_split_session/split_session.rb +20 -0
- data/lib/csvtool/domain/csv_split_session/split_source.rb +17 -0
- data/lib/csvtool/infrastructure/csv/csv_parity_comparator.rb +71 -0
- data/lib/csvtool/infrastructure/csv/csv_splitter.rb +64 -0
- data/lib/csvtool/infrastructure/output/csv_split_manifest_writer.rb +20 -0
- data/lib/csvtool/interface/cli/errors/presenter.rb +12 -0
- data/lib/csvtool/interface/cli/menu_loop.rb +8 -2
- data/lib/csvtool/interface/cli/prompts/chunk_size_prompt.rb +21 -0
- data/lib/csvtool/interface/cli/prompts/split_manifest_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/split_output_prompt.rb +38 -0
- data/lib/csvtool/interface/cli/workflows/builders/csv_parity_session_builder.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/builders/csv_split_session_builder.rb +44 -0
- data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +38 -0
- data/lib/csvtool/interface/cli/workflows/presenters/csv_split_presenter.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +66 -0
- data/lib/csvtool/interface/cli/workflows/run_csv_split_workflow.rb +89 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/build_session_step.rb +30 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step.rb +30 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step.rb +31 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/execute_step.rb +36 -0
- data/lib/csvtool/interface/cli/workflows/steps/parity/build_session_step.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/parity/execute_step.rb +26 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/run_csv_parity_test.rb +160 -0
- data/test/csvtool/application/use_cases/run_csv_split_test.rb +124 -0
- data/test/csvtool/cli_test.rb +222 -21
- data/test/csvtool/cli_unit_test.rb +4 -4
- data/test/csvtool/domain/csv_parity_session/parity_options_test.rb +17 -0
- data/test/csvtool/domain/csv_parity_session/parity_session_test.rb +18 -0
- data/test/csvtool/domain/csv_parity_session/source_pair_test.rb +11 -0
- data/test/csvtool/infrastructure/csv/csv_parity_comparator_test.rb +78 -0
- data/test/csvtool/infrastructure/csv/csv_splitter_test.rb +68 -0
- data/test/csvtool/infrastructure/output/csv_split_manifest_writer_test.rb +25 -0
- data/test/csvtool/interface/cli/errors/presenter_test.rb +2 -0
- data/test/csvtool/interface/cli/menu_loop_test.rb +87 -93
- data/test/csvtool/interface/cli/prompts/chunk_size_prompt_test.rb +17 -0
- data/test/csvtool/interface/cli/prompts/split_manifest_prompt_test.rb +42 -0
- data/test/csvtool/interface/cli/prompts/split_output_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/csv_parity_session_builder_test.rb +20 -0
- data/test/csvtool/interface/cli/workflows/builders/csv_split_session_builder_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +43 -0
- data/test/csvtool/interface/cli/workflows/presenters/csv_split_presenter_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +94 -0
- data/test/csvtool/interface/cli/workflows/run_csv_split_workflow_test.rb +200 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/build_session_step_test.rb +40 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step_test.rb +64 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step_test.rb +32 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/execute_step_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/steps/parity/build_session_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/steps/parity/execute_step_test.rb +40 -0
- data/test/fixtures/parity_duplicates_left.csv +4 -0
- data/test/fixtures/parity_duplicates_right.csv +3 -0
- data/test/fixtures/parity_people_header_mismatch.csv +4 -0
- data/test/fixtures/parity_people_many_reordered.csv +13 -0
- data/test/fixtures/parity_people_mismatch.csv +4 -0
- data/test/fixtures/parity_people_reordered.csv +4 -0
- data/test/fixtures/parity_people_reordered.tsv +4 -0
- data/test/fixtures/split_people_25.csv +26 -0
- metadata +64 -1
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvParitySession
|
|
6
|
+
class ParityOptions
|
|
7
|
+
attr_reader :separator
|
|
8
|
+
|
|
9
|
+
def initialize(separator:, headers_present:)
|
|
10
|
+
raise ArgumentError, "separator cannot be empty" if separator.to_s.empty?
|
|
11
|
+
|
|
12
|
+
@separator = separator
|
|
13
|
+
@headers_present = headers_present
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def headers_present?
|
|
17
|
+
@headers_present
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvParitySession
|
|
6
|
+
class ParitySession
|
|
7
|
+
attr_reader :source_pair, :options
|
|
8
|
+
|
|
9
|
+
def self.start(source_pair:, options:)
|
|
10
|
+
new(source_pair: source_pair, options: options)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def initialize(source_pair:, options:)
|
|
14
|
+
@source_pair = source_pair
|
|
15
|
+
@options = options
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvParitySession
|
|
6
|
+
class SourcePair
|
|
7
|
+
attr_reader :left_path, :right_path
|
|
8
|
+
|
|
9
|
+
def initialize(left_path:, right_path:)
|
|
10
|
+
raise ArgumentError, "left_path cannot be empty" if left_path.to_s.empty?
|
|
11
|
+
raise ArgumentError, "right_path cannot be empty" if right_path.to_s.empty?
|
|
12
|
+
|
|
13
|
+
@left_path = left_path
|
|
14
|
+
@right_path = right_path
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvSplitSession
|
|
6
|
+
class SplitOptions
|
|
7
|
+
attr_reader :chunk_size, :output_directory, :file_prefix, :overwrite_existing, :write_manifest, :manifest_path
|
|
8
|
+
|
|
9
|
+
def initialize(
|
|
10
|
+
chunk_size:,
|
|
11
|
+
output_directory: nil,
|
|
12
|
+
file_prefix: nil,
|
|
13
|
+
overwrite_existing: false,
|
|
14
|
+
write_manifest: false,
|
|
15
|
+
manifest_path: nil
|
|
16
|
+
)
|
|
17
|
+
@chunk_size = Integer(chunk_size)
|
|
18
|
+
@output_directory = output_directory
|
|
19
|
+
@file_prefix = file_prefix
|
|
20
|
+
@overwrite_existing = overwrite_existing
|
|
21
|
+
@write_manifest = write_manifest
|
|
22
|
+
@manifest_path = manifest_path
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvSplitSession
|
|
6
|
+
class SplitSession
|
|
7
|
+
attr_reader :source, :options
|
|
8
|
+
|
|
9
|
+
def self.start(source:, options:)
|
|
10
|
+
new(source: source, options: options)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def initialize(source:, options:)
|
|
14
|
+
@source = source
|
|
15
|
+
@options = options
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvSplitSession
|
|
6
|
+
class SplitSource
|
|
7
|
+
attr_reader :path, :separator, :headers_present
|
|
8
|
+
|
|
9
|
+
def initialize(path:, separator:, headers_present:)
|
|
10
|
+
@path = path
|
|
11
|
+
@separator = separator
|
|
12
|
+
@headers_present = headers_present
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module CSV
|
|
8
|
+
class CsvParityComparator
|
|
9
|
+
def call(left_path:, right_path:, col_sep:, headers_present:, sample_limit: 5)
|
|
10
|
+
deltas = Hash.new(0)
|
|
11
|
+
left_rows = stream_rows(path: left_path, col_sep: col_sep, headers_present: headers_present) do |key|
|
|
12
|
+
deltas[key] += 1
|
|
13
|
+
end
|
|
14
|
+
right_rows = stream_rows(path: right_path, col_sep: col_sep, headers_present: headers_present) do |key|
|
|
15
|
+
deltas[key] -= 1
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
left_only_count, right_only_count, left_only_examples, right_only_examples =
|
|
19
|
+
mismatch_totals_and_samples(deltas: deltas, sample_limit: sample_limit)
|
|
20
|
+
|
|
21
|
+
{
|
|
22
|
+
match: left_only_count.zero? && right_only_count.zero?,
|
|
23
|
+
left_rows: left_rows,
|
|
24
|
+
right_rows: right_rows,
|
|
25
|
+
left_only_count: left_only_count,
|
|
26
|
+
right_only_count: right_only_count,
|
|
27
|
+
left_only_examples: left_only_examples,
|
|
28
|
+
right_only_examples: right_only_examples
|
|
29
|
+
}
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def stream_rows(path:, col_sep:, headers_present:)
|
|
35
|
+
rows = 0
|
|
36
|
+
|
|
37
|
+
::CSV.foreach(path, headers: headers_present, col_sep: col_sep) do |row|
|
|
38
|
+
fields = headers_present ? row.fields : row
|
|
39
|
+
yield serialize(fields: fields, col_sep: col_sep)
|
|
40
|
+
rows += 1
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
rows
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def mismatch_totals_and_samples(deltas:, sample_limit:)
|
|
47
|
+
left_only_count = 0
|
|
48
|
+
right_only_count = 0
|
|
49
|
+
left_only_examples = []
|
|
50
|
+
right_only_examples = []
|
|
51
|
+
|
|
52
|
+
deltas.each do |key, delta|
|
|
53
|
+
if delta.positive?
|
|
54
|
+
left_only_count += delta
|
|
55
|
+
left_only_examples << { row: key, count_delta: delta } if left_only_examples.length < sample_limit
|
|
56
|
+
elsif delta.negative?
|
|
57
|
+
right_only_count += -delta
|
|
58
|
+
right_only_examples << { row: key, count_delta: -delta } if right_only_examples.length < sample_limit
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
[left_only_count, right_only_count, left_only_examples, right_only_examples]
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def serialize(fields:, col_sep:)
|
|
66
|
+
::CSV.generate_line(fields, row_sep: "", col_sep: col_sep).chomp
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module CSV
|
|
8
|
+
class CsvSplitter
|
|
9
|
+
class OutputFileExistsError < StandardError
|
|
10
|
+
attr_reader :path
|
|
11
|
+
|
|
12
|
+
def initialize(path)
|
|
13
|
+
super("output file exists: #{path}")
|
|
14
|
+
@path = path
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def call(file_path:, col_sep:, headers_present:, chunk_size:, output_directory:, file_prefix:, overwrite_existing:)
|
|
19
|
+
ext = File.extname(file_path)
|
|
20
|
+
ext = ".csv" if ext.empty?
|
|
21
|
+
sequence = 0
|
|
22
|
+
data_rows = 0
|
|
23
|
+
chunk_paths = []
|
|
24
|
+
chunk_row_counts = []
|
|
25
|
+
rows_in_chunk = 0
|
|
26
|
+
current_csv = nil
|
|
27
|
+
|
|
28
|
+
write_mode_headers = nil
|
|
29
|
+
write_headers = headers_present
|
|
30
|
+
|
|
31
|
+
::CSV.foreach(file_path, headers: headers_present, col_sep: col_sep) do |row|
|
|
32
|
+
if current_csv.nil? || rows_in_chunk >= chunk_size
|
|
33
|
+
current_csv&.close
|
|
34
|
+
sequence += 1
|
|
35
|
+
rows_in_chunk = 0
|
|
36
|
+
path = File.join(output_directory, format("%<prefix>s_part_%<num>03d%<ext>s", prefix: file_prefix, num: sequence, ext: ext))
|
|
37
|
+
raise OutputFileExistsError.new(path) if File.exist?(path) && !overwrite_existing
|
|
38
|
+
|
|
39
|
+
chunk_paths << path
|
|
40
|
+
chunk_row_counts << 0
|
|
41
|
+
write_mode_headers = headers_present ? row.headers : nil
|
|
42
|
+
current_csv = ::CSV.open(path, "w", write_headers: write_headers, headers: write_mode_headers, col_sep: col_sep)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
fields = headers_present ? row.fields : row
|
|
46
|
+
current_csv << fields
|
|
47
|
+
rows_in_chunk += 1
|
|
48
|
+
chunk_row_counts[-1] += 1
|
|
49
|
+
data_rows += 1
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
{
|
|
53
|
+
chunk_paths: chunk_paths,
|
|
54
|
+
chunk_count: chunk_paths.length,
|
|
55
|
+
data_rows: data_rows,
|
|
56
|
+
chunk_row_counts: chunk_row_counts
|
|
57
|
+
}
|
|
58
|
+
ensure
|
|
59
|
+
current_csv&.close unless current_csv&.closed?
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module Output
|
|
8
|
+
class CsvSplitManifestWriter
|
|
9
|
+
def call(path:, chunk_paths:, chunk_row_counts:)
|
|
10
|
+
::CSV.open(path, "w") do |csv|
|
|
11
|
+
csv << %w[chunk_index chunk_path row_count]
|
|
12
|
+
chunk_paths.each_with_index do |chunk_path, index|
|
|
13
|
+
csv << [index + 1, chunk_path, chunk_row_counts[index]]
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -33,6 +33,10 @@ module Csvtool
|
|
|
33
33
|
@stdout.puts "Cannot write output file: #{path} (#{error_class})"
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
+
def output_file_exists(path)
|
|
37
|
+
@stdout.puts "Output file already exists: #{path}"
|
|
38
|
+
end
|
|
39
|
+
|
|
36
40
|
def empty_output_path
|
|
37
41
|
@stdout.puts "Output file path cannot be empty."
|
|
38
42
|
end
|
|
@@ -53,6 +57,10 @@ module Csvtool
|
|
|
53
57
|
@stdout.puts "Seed must be an integer."
|
|
54
58
|
end
|
|
55
59
|
|
|
60
|
+
def invalid_chunk_size
|
|
61
|
+
@stdout.puts "Chunk size must be a positive integer."
|
|
62
|
+
end
|
|
63
|
+
|
|
56
64
|
def canceled
|
|
57
65
|
@stdout.puts "Canceled."
|
|
58
66
|
end
|
|
@@ -72,6 +80,10 @@ module Csvtool
|
|
|
72
80
|
def row_range_out_of_bounds(total_rows)
|
|
73
81
|
@stdout.puts "Row range is out of bounds. File has #{total_rows} data rows."
|
|
74
82
|
end
|
|
83
|
+
|
|
84
|
+
def header_mismatch
|
|
85
|
+
@stdout.puts "CSV headers do not match."
|
|
86
|
+
end
|
|
75
87
|
end
|
|
76
88
|
end
|
|
77
89
|
end
|
|
@@ -4,7 +4,7 @@ module Csvtool
|
|
|
4
4
|
module Interface
|
|
5
5
|
module CLI
|
|
6
6
|
class MenuLoop
|
|
7
|
-
def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:)
|
|
7
|
+
def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:, parity_action:, split_action:)
|
|
8
8
|
@stdin = stdin
|
|
9
9
|
@stdout = stdout
|
|
10
10
|
@menu_options = menu_options
|
|
@@ -12,6 +12,8 @@ module Csvtool
|
|
|
12
12
|
@extract_rows_action = extract_rows_action
|
|
13
13
|
@randomize_rows_action = randomize_rows_action
|
|
14
14
|
@dedupe_action = dedupe_action
|
|
15
|
+
@parity_action = parity_action
|
|
16
|
+
@split_action = split_action
|
|
15
17
|
end
|
|
16
18
|
|
|
17
19
|
def run
|
|
@@ -31,9 +33,13 @@ module Csvtool
|
|
|
31
33
|
when "4"
|
|
32
34
|
@dedupe_action.call
|
|
33
35
|
when "5"
|
|
36
|
+
@parity_action.call
|
|
37
|
+
when "6"
|
|
38
|
+
@split_action.call
|
|
39
|
+
when "7"
|
|
34
40
|
return 0
|
|
35
41
|
else
|
|
36
|
-
@stdout.puts "Please choose 1, 2, 3, 4, or
|
|
42
|
+
@stdout.puts "Please choose 1, 2, 3, 4, 5, 6, or 7."
|
|
37
43
|
end
|
|
38
44
|
end
|
|
39
45
|
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Prompts
|
|
7
|
+
class ChunkSizePrompt
|
|
8
|
+
def initialize(stdin:, stdout:)
|
|
9
|
+
@stdin = stdin
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call
|
|
14
|
+
@stdout.print "Rows per chunk: "
|
|
15
|
+
@stdin.gets&.strip.to_s
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Prompts
|
|
7
|
+
class SplitManifestPrompt
|
|
8
|
+
def initialize(stdin:, stdout:, yes_no_prompt:)
|
|
9
|
+
@stdin = stdin
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
@yes_no_prompt = yes_no_prompt
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def call(default_path:)
|
|
15
|
+
write_manifest = @yes_no_prompt.call(
|
|
16
|
+
label: "Write manifest file? [y/N]: ",
|
|
17
|
+
default: false
|
|
18
|
+
)
|
|
19
|
+
return { write_manifest: false, manifest_path: nil } unless write_manifest
|
|
20
|
+
|
|
21
|
+
@stdout.print "Manifest file path [#{default_path}]: "
|
|
22
|
+
path = @stdin.gets&.strip.to_s
|
|
23
|
+
path = default_path if path.empty?
|
|
24
|
+
{ write_manifest: true, manifest_path: path }
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Prompts
|
|
7
|
+
class SplitOutputPrompt
|
|
8
|
+
def initialize(stdin:, stdout:, yes_no_prompt:)
|
|
9
|
+
@stdin = stdin
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
@yes_no_prompt = yes_no_prompt
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def call(default_directory:, default_prefix:)
|
|
15
|
+
@stdout.print "Output directory [#{default_directory}]: "
|
|
16
|
+
output_directory = @stdin.gets&.strip.to_s
|
|
17
|
+
output_directory = default_directory if output_directory.empty?
|
|
18
|
+
|
|
19
|
+
@stdout.print "Output file prefix [#{default_prefix}]: "
|
|
20
|
+
file_prefix = @stdin.gets&.strip.to_s
|
|
21
|
+
file_prefix = default_prefix if file_prefix.empty?
|
|
22
|
+
|
|
23
|
+
overwrite_existing = @yes_no_prompt.call(
|
|
24
|
+
label: "Overwrite existing chunk files? [y/N]: ",
|
|
25
|
+
default: false
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
{
|
|
29
|
+
output_directory: output_directory,
|
|
30
|
+
file_prefix: file_prefix,
|
|
31
|
+
overwrite_existing: overwrite_existing
|
|
32
|
+
}
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/csv_parity_session/source_pair"
|
|
4
|
+
require "csvtool/domain/csv_parity_session/parity_options"
|
|
5
|
+
require "csvtool/domain/csv_parity_session/parity_session"
|
|
6
|
+
|
|
7
|
+
module Csvtool
|
|
8
|
+
module Interface
|
|
9
|
+
module CLI
|
|
10
|
+
module Workflows
|
|
11
|
+
module Builders
|
|
12
|
+
class CsvParitySessionBuilder
|
|
13
|
+
def call(left_path:, right_path:, col_sep:, headers_present:)
|
|
14
|
+
source_pair = Domain::CsvParitySession::SourcePair.new(
|
|
15
|
+
left_path: left_path,
|
|
16
|
+
right_path: right_path
|
|
17
|
+
)
|
|
18
|
+
options = Domain::CsvParitySession::ParityOptions.new(
|
|
19
|
+
separator: col_sep,
|
|
20
|
+
headers_present: headers_present
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
Domain::CsvParitySession::ParitySession.start(
|
|
24
|
+
source_pair: source_pair,
|
|
25
|
+
options: options
|
|
26
|
+
)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/csv_split_session/split_source"
|
|
4
|
+
require "csvtool/domain/csv_split_session/split_options"
|
|
5
|
+
require "csvtool/domain/csv_split_session/split_session"
|
|
6
|
+
|
|
7
|
+
module Csvtool
|
|
8
|
+
module Interface
|
|
9
|
+
module CLI
|
|
10
|
+
module Workflows
|
|
11
|
+
module Builders
|
|
12
|
+
class CsvSplitSessionBuilder
|
|
13
|
+
def call(
|
|
14
|
+
file_path:,
|
|
15
|
+
col_sep:,
|
|
16
|
+
headers_present:,
|
|
17
|
+
chunk_size:,
|
|
18
|
+
output_directory: nil,
|
|
19
|
+
file_prefix: nil,
|
|
20
|
+
overwrite_existing: false,
|
|
21
|
+
write_manifest: false,
|
|
22
|
+
manifest_path: nil
|
|
23
|
+
)
|
|
24
|
+
source = Domain::CsvSplitSession::SplitSource.new(
|
|
25
|
+
path: file_path,
|
|
26
|
+
separator: col_sep,
|
|
27
|
+
headers_present: headers_present
|
|
28
|
+
)
|
|
29
|
+
options = Domain::CsvSplitSession::SplitOptions.new(
|
|
30
|
+
chunk_size: chunk_size,
|
|
31
|
+
output_directory: output_directory,
|
|
32
|
+
file_prefix: file_prefix,
|
|
33
|
+
overwrite_existing: overwrite_existing,
|
|
34
|
+
write_manifest: write_manifest,
|
|
35
|
+
manifest_path: manifest_path
|
|
36
|
+
)
|
|
37
|
+
Domain::CsvSplitSession::SplitSession.start(source: source, options: options)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Presenters
|
|
8
|
+
class CsvParityPresenter
|
|
9
|
+
def initialize(stdout:)
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def print_summary(data)
|
|
14
|
+
@stdout.puts(data[:match] ? "MATCH" : "MISMATCH")
|
|
15
|
+
@stdout.puts "Summary: left_rows=#{data[:left_rows]} right_rows=#{data[:right_rows]} " \
|
|
16
|
+
"left_only=#{data[:left_only_count]} right_only=#{data[:right_only_count]}"
|
|
17
|
+
return if data[:match]
|
|
18
|
+
|
|
19
|
+
print_examples("Left-only examples", data[:left_only_examples])
|
|
20
|
+
print_examples("Right-only examples", data[:right_only_examples])
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def print_examples(label, examples)
|
|
26
|
+
return if examples.nil? || examples.empty?
|
|
27
|
+
|
|
28
|
+
@stdout.puts "#{label}:"
|
|
29
|
+
examples.each do |example|
|
|
30
|
+
@stdout.puts " #{example[:row]} (count +#{example[:count_delta]})"
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Presenters
|
|
8
|
+
class CsvSplitPresenter
|
|
9
|
+
def initialize(stdout:)
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def print_summary(data)
|
|
14
|
+
@stdout.puts "Split complete."
|
|
15
|
+
@stdout.puts "Chunk size: #{data[:chunk_size]}"
|
|
16
|
+
@stdout.puts "Data rows: #{data[:data_rows]}"
|
|
17
|
+
@stdout.puts "Chunks written: #{data[:chunk_count]}"
|
|
18
|
+
@stdout.puts "Manifest: #{data[:manifest_path]}" if data[:manifest_path]
|
|
19
|
+
data[:chunk_paths].each { |path| @stdout.puts path }
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/application/use_cases/run_csv_parity"
|
|
4
|
+
require "csvtool/interface/cli/errors/presenter"
|
|
5
|
+
require "csvtool/interface/cli/prompts/file_path_prompt"
|
|
6
|
+
require "csvtool/interface/cli/prompts/separator_prompt"
|
|
7
|
+
require "csvtool/interface/cli/prompts/headers_present_prompt"
|
|
8
|
+
require "csvtool/interface/cli/workflows/builders/csv_parity_session_builder"
|
|
9
|
+
require "csvtool/interface/cli/workflows/presenters/csv_parity_presenter"
|
|
10
|
+
require "csvtool/interface/cli/workflows/support/result_error_handler"
|
|
11
|
+
require "csvtool/interface/cli/workflows/steps/workflow_step_pipeline"
|
|
12
|
+
require "csvtool/interface/cli/workflows/steps/parity/collect_inputs_step"
|
|
13
|
+
require "csvtool/interface/cli/workflows/steps/parity/build_session_step"
|
|
14
|
+
require "csvtool/interface/cli/workflows/steps/parity/execute_step"
|
|
15
|
+
|
|
16
|
+
module Csvtool
|
|
17
|
+
module Interface
|
|
18
|
+
module CLI
|
|
19
|
+
module Workflows
|
|
20
|
+
class RunCsvParityWorkflow
|
|
21
|
+
def initialize(stdin:, stdout:, use_case: Application::UseCases::RunCsvParity.new)
|
|
22
|
+
@stdin = stdin
|
|
23
|
+
@stdout = stdout
|
|
24
|
+
@use_case = use_case
|
|
25
|
+
@errors = Interface::CLI::Errors::Presenter.new(stdout: stdout)
|
|
26
|
+
@session_builder = Builders::CsvParitySessionBuilder.new
|
|
27
|
+
@presenter = Presenters::CsvParityPresenter.new(stdout: stdout)
|
|
28
|
+
@result_error_handler = Support::ResultErrorHandler.new(errors: @errors)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def call
|
|
32
|
+
context = {
|
|
33
|
+
use_case: @use_case,
|
|
34
|
+
session_builder: @session_builder,
|
|
35
|
+
presenter: @presenter,
|
|
36
|
+
handle_error: method(:handle_error)
|
|
37
|
+
}
|
|
38
|
+
pipeline = Steps::WorkflowStepPipeline.new(steps: [
|
|
39
|
+
Steps::Parity::CollectInputsStep.new(
|
|
40
|
+
file_path_prompt: Interface::CLI::Prompts::FilePathPrompt.new(stdin: @stdin, stdout: @stdout),
|
|
41
|
+
separator_prompt: Interface::CLI::Prompts::SeparatorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors),
|
|
42
|
+
headers_present_prompt: Interface::CLI::Prompts::HeadersPresentPrompt.new(stdin: @stdin, stdout: @stdout)
|
|
43
|
+
),
|
|
44
|
+
Steps::Parity::BuildSessionStep.new,
|
|
45
|
+
Steps::Parity::ExecuteStep.new
|
|
46
|
+
])
|
|
47
|
+
pipeline.call(context)
|
|
48
|
+
nil
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def handle_error(result)
|
|
54
|
+
@result_error_handler.call(result, {
|
|
55
|
+
file_not_found: ->(r, errors) { errors.file_not_found(r.data[:path]) },
|
|
56
|
+
could_not_parse_csv: ->(_r, errors) { errors.could_not_parse_csv },
|
|
57
|
+
cannot_read_file: ->(r, errors) { errors.cannot_read_file(r.data[:path]) },
|
|
58
|
+
no_headers: ->(_r, errors) { errors.no_headers },
|
|
59
|
+
header_mismatch: ->(_r, errors) { errors.header_mismatch }
|
|
60
|
+
})
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|