csvops 0.5.0.alpha → 0.7.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +88 -7
  3. data/docs/architecture.md +119 -5
  4. data/docs/release-v0.6.0-alpha.md +84 -0
  5. data/docs/release-v0.7.0-alpha.md +87 -0
  6. data/lib/csvtool/application/use_cases/run_csv_parity.rb +70 -0
  7. data/lib/csvtool/application/use_cases/run_csv_split.rb +97 -0
  8. data/lib/csvtool/cli.rb +9 -1
  9. data/lib/csvtool/domain/csv_parity_session/parity_options.rb +22 -0
  10. data/lib/csvtool/domain/csv_parity_session/parity_session.rb +20 -0
  11. data/lib/csvtool/domain/csv_parity_session/source_pair.rb +19 -0
  12. data/lib/csvtool/domain/csv_split_session/split_options.rb +27 -0
  13. data/lib/csvtool/domain/csv_split_session/split_session.rb +20 -0
  14. data/lib/csvtool/domain/csv_split_session/split_source.rb +17 -0
  15. data/lib/csvtool/infrastructure/csv/csv_parity_comparator.rb +71 -0
  16. data/lib/csvtool/infrastructure/csv/csv_splitter.rb +64 -0
  17. data/lib/csvtool/infrastructure/output/csv_split_manifest_writer.rb +20 -0
  18. data/lib/csvtool/interface/cli/errors/presenter.rb +12 -0
  19. data/lib/csvtool/interface/cli/menu_loop.rb +8 -2
  20. data/lib/csvtool/interface/cli/prompts/chunk_size_prompt.rb +21 -0
  21. data/lib/csvtool/interface/cli/prompts/split_manifest_prompt.rb +30 -0
  22. data/lib/csvtool/interface/cli/prompts/split_output_prompt.rb +38 -0
  23. data/lib/csvtool/interface/cli/workflows/builders/csv_parity_session_builder.rb +33 -0
  24. data/lib/csvtool/interface/cli/workflows/builders/csv_split_session_builder.rb +44 -0
  25. data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +38 -0
  26. data/lib/csvtool/interface/cli/workflows/presenters/csv_split_presenter.rb +26 -0
  27. data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +66 -0
  28. data/lib/csvtool/interface/cli/workflows/run_csv_split_workflow.rb +89 -0
  29. data/lib/csvtool/interface/cli/workflows/steps/csv_split/build_session_step.rb +30 -0
  30. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step.rb +43 -0
  31. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step.rb +30 -0
  32. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step.rb +31 -0
  33. data/lib/csvtool/interface/cli/workflows/steps/csv_split/execute_step.rb +36 -0
  34. data/lib/csvtool/interface/cli/workflows/steps/parity/build_session_step.rb +25 -0
  35. data/lib/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step.rb +32 -0
  36. data/lib/csvtool/interface/cli/workflows/steps/parity/execute_step.rb +26 -0
  37. data/lib/csvtool/version.rb +1 -1
  38. data/test/csvtool/application/use_cases/run_csv_parity_test.rb +160 -0
  39. data/test/csvtool/application/use_cases/run_csv_split_test.rb +124 -0
  40. data/test/csvtool/cli_test.rb +222 -21
  41. data/test/csvtool/cli_unit_test.rb +4 -4
  42. data/test/csvtool/domain/csv_parity_session/parity_options_test.rb +17 -0
  43. data/test/csvtool/domain/csv_parity_session/parity_session_test.rb +18 -0
  44. data/test/csvtool/domain/csv_parity_session/source_pair_test.rb +11 -0
  45. data/test/csvtool/infrastructure/csv/csv_parity_comparator_test.rb +78 -0
  46. data/test/csvtool/infrastructure/csv/csv_splitter_test.rb +68 -0
  47. data/test/csvtool/infrastructure/output/csv_split_manifest_writer_test.rb +25 -0
  48. data/test/csvtool/interface/cli/errors/presenter_test.rb +2 -0
  49. data/test/csvtool/interface/cli/menu_loop_test.rb +87 -93
  50. data/test/csvtool/interface/cli/prompts/chunk_size_prompt_test.rb +17 -0
  51. data/test/csvtool/interface/cli/prompts/split_manifest_prompt_test.rb +42 -0
  52. data/test/csvtool/interface/cli/prompts/split_output_prompt_test.rb +22 -0
  53. data/test/csvtool/interface/cli/workflows/builders/csv_parity_session_builder_test.rb +20 -0
  54. data/test/csvtool/interface/cli/workflows/builders/csv_split_session_builder_test.rb +30 -0
  55. data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +43 -0
  56. data/test/csvtool/interface/cli/workflows/presenters/csv_split_presenter_test.rb +26 -0
  57. data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +94 -0
  58. data/test/csvtool/interface/cli/workflows/run_csv_split_workflow_test.rb +200 -0
  59. data/test/csvtool/interface/cli/workflows/steps/csv_split/build_session_step_test.rb +40 -0
  60. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step_test.rb +64 -0
  61. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step_test.rb +30 -0
  62. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step_test.rb +32 -0
  63. data/test/csvtool/interface/cli/workflows/steps/csv_split/execute_step_test.rb +83 -0
  64. data/test/csvtool/interface/cli/workflows/steps/parity/build_session_step_test.rb +41 -0
  65. data/test/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step_test.rb +30 -0
  66. data/test/csvtool/interface/cli/workflows/steps/parity/execute_step_test.rb +40 -0
  67. data/test/fixtures/parity_duplicates_left.csv +4 -0
  68. data/test/fixtures/parity_duplicates_right.csv +3 -0
  69. data/test/fixtures/parity_people_header_mismatch.csv +4 -0
  70. data/test/fixtures/parity_people_many_reordered.csv +13 -0
  71. data/test/fixtures/parity_people_mismatch.csv +4 -0
  72. data/test/fixtures/parity_people_reordered.csv +4 -0
  73. data/test/fixtures/parity_people_reordered.tsv +4 -0
  74. data/test/fixtures/split_people_25.csv +26 -0
  75. metadata +64 -1
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvParitySession
6
+ class ParityOptions
7
+ attr_reader :separator
8
+
9
+ def initialize(separator:, headers_present:)
10
+ raise ArgumentError, "separator cannot be empty" if separator.to_s.empty?
11
+
12
+ @separator = separator
13
+ @headers_present = headers_present
14
+ end
15
+
16
+ def headers_present?
17
+ @headers_present
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvParitySession
6
+ class ParitySession
7
+ attr_reader :source_pair, :options
8
+
9
+ def self.start(source_pair:, options:)
10
+ new(source_pair: source_pair, options: options)
11
+ end
12
+
13
+ def initialize(source_pair:, options:)
14
+ @source_pair = source_pair
15
+ @options = options
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvParitySession
6
+ class SourcePair
7
+ attr_reader :left_path, :right_path
8
+
9
+ def initialize(left_path:, right_path:)
10
+ raise ArgumentError, "left_path cannot be empty" if left_path.to_s.empty?
11
+ raise ArgumentError, "right_path cannot be empty" if right_path.to_s.empty?
12
+
13
+ @left_path = left_path
14
+ @right_path = right_path
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvSplitSession
6
+ class SplitOptions
7
+ attr_reader :chunk_size, :output_directory, :file_prefix, :overwrite_existing, :write_manifest, :manifest_path
8
+
9
+ def initialize(
10
+ chunk_size:,
11
+ output_directory: nil,
12
+ file_prefix: nil,
13
+ overwrite_existing: false,
14
+ write_manifest: false,
15
+ manifest_path: nil
16
+ )
17
+ @chunk_size = Integer(chunk_size)
18
+ @output_directory = output_directory
19
+ @file_prefix = file_prefix
20
+ @overwrite_existing = overwrite_existing
21
+ @write_manifest = write_manifest
22
+ @manifest_path = manifest_path
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvSplitSession
6
+ class SplitSession
7
+ attr_reader :source, :options
8
+
9
+ def self.start(source:, options:)
10
+ new(source: source, options: options)
11
+ end
12
+
13
+ def initialize(source:, options:)
14
+ @source = source
15
+ @options = options
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvSplitSession
6
+ class SplitSource
7
+ attr_reader :path, :separator, :headers_present
8
+
9
+ def initialize(path:, separator:, headers_present:)
10
+ @path = path
11
+ @separator = separator
12
+ @headers_present = headers_present
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module CSV
8
+ class CsvParityComparator
9
+ def call(left_path:, right_path:, col_sep:, headers_present:, sample_limit: 5)
10
+ deltas = Hash.new(0)
11
+ left_rows = stream_rows(path: left_path, col_sep: col_sep, headers_present: headers_present) do |key|
12
+ deltas[key] += 1
13
+ end
14
+ right_rows = stream_rows(path: right_path, col_sep: col_sep, headers_present: headers_present) do |key|
15
+ deltas[key] -= 1
16
+ end
17
+
18
+ left_only_count, right_only_count, left_only_examples, right_only_examples =
19
+ mismatch_totals_and_samples(deltas: deltas, sample_limit: sample_limit)
20
+
21
+ {
22
+ match: left_only_count.zero? && right_only_count.zero?,
23
+ left_rows: left_rows,
24
+ right_rows: right_rows,
25
+ left_only_count: left_only_count,
26
+ right_only_count: right_only_count,
27
+ left_only_examples: left_only_examples,
28
+ right_only_examples: right_only_examples
29
+ }
30
+ end
31
+
32
+ private
33
+
34
+ def stream_rows(path:, col_sep:, headers_present:)
35
+ rows = 0
36
+
37
+ ::CSV.foreach(path, headers: headers_present, col_sep: col_sep) do |row|
38
+ fields = headers_present ? row.fields : row
39
+ yield serialize(fields: fields, col_sep: col_sep)
40
+ rows += 1
41
+ end
42
+
43
+ rows
44
+ end
45
+
46
+ def mismatch_totals_and_samples(deltas:, sample_limit:)
47
+ left_only_count = 0
48
+ right_only_count = 0
49
+ left_only_examples = []
50
+ right_only_examples = []
51
+
52
+ deltas.each do |key, delta|
53
+ if delta.positive?
54
+ left_only_count += delta
55
+ left_only_examples << { row: key, count_delta: delta } if left_only_examples.length < sample_limit
56
+ elsif delta.negative?
57
+ right_only_count += -delta
58
+ right_only_examples << { row: key, count_delta: -delta } if right_only_examples.length < sample_limit
59
+ end
60
+ end
61
+
62
+ [left_only_count, right_only_count, left_only_examples, right_only_examples]
63
+ end
64
+
65
+ def serialize(fields:, col_sep:)
66
+ ::CSV.generate_line(fields, row_sep: "", col_sep: col_sep).chomp
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module CSV
8
+ class CsvSplitter
9
+ class OutputFileExistsError < StandardError
10
+ attr_reader :path
11
+
12
+ def initialize(path)
13
+ super("output file exists: #{path}")
14
+ @path = path
15
+ end
16
+ end
17
+
18
+ def call(file_path:, col_sep:, headers_present:, chunk_size:, output_directory:, file_prefix:, overwrite_existing:)
19
+ ext = File.extname(file_path)
20
+ ext = ".csv" if ext.empty?
21
+ sequence = 0
22
+ data_rows = 0
23
+ chunk_paths = []
24
+ chunk_row_counts = []
25
+ rows_in_chunk = 0
26
+ current_csv = nil
27
+
28
+ write_mode_headers = nil
29
+ write_headers = headers_present
30
+
31
+ ::CSV.foreach(file_path, headers: headers_present, col_sep: col_sep) do |row|
32
+ if current_csv.nil? || rows_in_chunk >= chunk_size
33
+ current_csv&.close
34
+ sequence += 1
35
+ rows_in_chunk = 0
36
+ path = File.join(output_directory, format("%<prefix>s_part_%<num>03d%<ext>s", prefix: file_prefix, num: sequence, ext: ext))
37
+ raise OutputFileExistsError.new(path) if File.exist?(path) && !overwrite_existing
38
+
39
+ chunk_paths << path
40
+ chunk_row_counts << 0
41
+ write_mode_headers = headers_present ? row.headers : nil
42
+ current_csv = ::CSV.open(path, "w", write_headers: write_headers, headers: write_mode_headers, col_sep: col_sep)
43
+ end
44
+
45
+ fields = headers_present ? row.fields : row
46
+ current_csv << fields
47
+ rows_in_chunk += 1
48
+ chunk_row_counts[-1] += 1
49
+ data_rows += 1
50
+ end
51
+
52
+ {
53
+ chunk_paths: chunk_paths,
54
+ chunk_count: chunk_paths.length,
55
+ data_rows: data_rows,
56
+ chunk_row_counts: chunk_row_counts
57
+ }
58
+ ensure
59
+ current_csv&.close unless current_csv&.closed?
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module Output
8
+ class CsvSplitManifestWriter
9
+ def call(path:, chunk_paths:, chunk_row_counts:)
10
+ ::CSV.open(path, "w") do |csv|
11
+ csv << %w[chunk_index chunk_path row_count]
12
+ chunk_paths.each_with_index do |chunk_path, index|
13
+ csv << [index + 1, chunk_path, chunk_row_counts[index]]
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -33,6 +33,10 @@ module Csvtool
33
33
  @stdout.puts "Cannot write output file: #{path} (#{error_class})"
34
34
  end
35
35
 
36
+ def output_file_exists(path)
37
+ @stdout.puts "Output file already exists: #{path}"
38
+ end
39
+
36
40
  def empty_output_path
37
41
  @stdout.puts "Output file path cannot be empty."
38
42
  end
@@ -53,6 +57,10 @@ module Csvtool
53
57
  @stdout.puts "Seed must be an integer."
54
58
  end
55
59
 
60
+ def invalid_chunk_size
61
+ @stdout.puts "Chunk size must be a positive integer."
62
+ end
63
+
56
64
  def canceled
57
65
  @stdout.puts "Canceled."
58
66
  end
@@ -72,6 +80,10 @@ module Csvtool
72
80
  def row_range_out_of_bounds(total_rows)
73
81
  @stdout.puts "Row range is out of bounds. File has #{total_rows} data rows."
74
82
  end
83
+
84
+ def header_mismatch
85
+ @stdout.puts "CSV headers do not match."
86
+ end
75
87
  end
76
88
  end
77
89
  end
@@ -4,7 +4,7 @@ module Csvtool
4
4
  module Interface
5
5
  module CLI
6
6
  class MenuLoop
7
- def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:)
7
+ def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:, parity_action:, split_action:)
8
8
  @stdin = stdin
9
9
  @stdout = stdout
10
10
  @menu_options = menu_options
@@ -12,6 +12,8 @@ module Csvtool
12
12
  @extract_rows_action = extract_rows_action
13
13
  @randomize_rows_action = randomize_rows_action
14
14
  @dedupe_action = dedupe_action
15
+ @parity_action = parity_action
16
+ @split_action = split_action
15
17
  end
16
18
 
17
19
  def run
@@ -31,9 +33,13 @@ module Csvtool
31
33
  when "4"
32
34
  @dedupe_action.call
33
35
  when "5"
36
+ @parity_action.call
37
+ when "6"
38
+ @split_action.call
39
+ when "7"
34
40
  return 0
35
41
  else
36
- @stdout.puts "Please choose 1, 2, 3, 4, or 5."
42
+ @stdout.puts "Please choose 1, 2, 3, 4, 5, 6, or 7."
37
43
  end
38
44
  end
39
45
  end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Prompts
7
+ class ChunkSizePrompt
8
+ def initialize(stdin:, stdout:)
9
+ @stdin = stdin
10
+ @stdout = stdout
11
+ end
12
+
13
+ def call
14
+ @stdout.print "Rows per chunk: "
15
+ @stdin.gets&.strip.to_s
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Prompts
7
+ class SplitManifestPrompt
8
+ def initialize(stdin:, stdout:, yes_no_prompt:)
9
+ @stdin = stdin
10
+ @stdout = stdout
11
+ @yes_no_prompt = yes_no_prompt
12
+ end
13
+
14
+ def call(default_path:)
15
+ write_manifest = @yes_no_prompt.call(
16
+ label: "Write manifest file? [y/N]: ",
17
+ default: false
18
+ )
19
+ return { write_manifest: false, manifest_path: nil } unless write_manifest
20
+
21
+ @stdout.print "Manifest file path [#{default_path}]: "
22
+ path = @stdin.gets&.strip.to_s
23
+ path = default_path if path.empty?
24
+ { write_manifest: true, manifest_path: path }
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Prompts
7
+ class SplitOutputPrompt
8
+ def initialize(stdin:, stdout:, yes_no_prompt:)
9
+ @stdin = stdin
10
+ @stdout = stdout
11
+ @yes_no_prompt = yes_no_prompt
12
+ end
13
+
14
+ def call(default_directory:, default_prefix:)
15
+ @stdout.print "Output directory [#{default_directory}]: "
16
+ output_directory = @stdin.gets&.strip.to_s
17
+ output_directory = default_directory if output_directory.empty?
18
+
19
+ @stdout.print "Output file prefix [#{default_prefix}]: "
20
+ file_prefix = @stdin.gets&.strip.to_s
21
+ file_prefix = default_prefix if file_prefix.empty?
22
+
23
+ overwrite_existing = @yes_no_prompt.call(
24
+ label: "Overwrite existing chunk files? [y/N]: ",
25
+ default: false
26
+ )
27
+
28
+ {
29
+ output_directory: output_directory,
30
+ file_prefix: file_prefix,
31
+ overwrite_existing: overwrite_existing
32
+ }
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/domain/csv_parity_session/source_pair"
4
+ require "csvtool/domain/csv_parity_session/parity_options"
5
+ require "csvtool/domain/csv_parity_session/parity_session"
6
+
7
+ module Csvtool
8
+ module Interface
9
+ module CLI
10
+ module Workflows
11
+ module Builders
12
+ class CsvParitySessionBuilder
13
+ def call(left_path:, right_path:, col_sep:, headers_present:)
14
+ source_pair = Domain::CsvParitySession::SourcePair.new(
15
+ left_path: left_path,
16
+ right_path: right_path
17
+ )
18
+ options = Domain::CsvParitySession::ParityOptions.new(
19
+ separator: col_sep,
20
+ headers_present: headers_present
21
+ )
22
+
23
+ Domain::CsvParitySession::ParitySession.start(
24
+ source_pair: source_pair,
25
+ options: options
26
+ )
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/domain/csv_split_session/split_source"
4
+ require "csvtool/domain/csv_split_session/split_options"
5
+ require "csvtool/domain/csv_split_session/split_session"
6
+
7
+ module Csvtool
8
+ module Interface
9
+ module CLI
10
+ module Workflows
11
+ module Builders
12
+ class CsvSplitSessionBuilder
13
+ def call(
14
+ file_path:,
15
+ col_sep:,
16
+ headers_present:,
17
+ chunk_size:,
18
+ output_directory: nil,
19
+ file_prefix: nil,
20
+ overwrite_existing: false,
21
+ write_manifest: false,
22
+ manifest_path: nil
23
+ )
24
+ source = Domain::CsvSplitSession::SplitSource.new(
25
+ path: file_path,
26
+ separator: col_sep,
27
+ headers_present: headers_present
28
+ )
29
+ options = Domain::CsvSplitSession::SplitOptions.new(
30
+ chunk_size: chunk_size,
31
+ output_directory: output_directory,
32
+ file_prefix: file_prefix,
33
+ overwrite_existing: overwrite_existing,
34
+ write_manifest: write_manifest,
35
+ manifest_path: manifest_path
36
+ )
37
+ Domain::CsvSplitSession::SplitSession.start(source: source, options: options)
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Workflows
7
+ module Presenters
8
+ class CsvParityPresenter
9
+ def initialize(stdout:)
10
+ @stdout = stdout
11
+ end
12
+
13
+ def print_summary(data)
14
+ @stdout.puts(data[:match] ? "MATCH" : "MISMATCH")
15
+ @stdout.puts "Summary: left_rows=#{data[:left_rows]} right_rows=#{data[:right_rows]} " \
16
+ "left_only=#{data[:left_only_count]} right_only=#{data[:right_only_count]}"
17
+ return if data[:match]
18
+
19
+ print_examples("Left-only examples", data[:left_only_examples])
20
+ print_examples("Right-only examples", data[:right_only_examples])
21
+ end
22
+
23
+ private
24
+
25
+ def print_examples(label, examples)
26
+ return if examples.nil? || examples.empty?
27
+
28
+ @stdout.puts "#{label}:"
29
+ examples.each do |example|
30
+ @stdout.puts " #{example[:row]} (count +#{example[:count_delta]})"
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Workflows
7
+ module Presenters
8
+ class CsvSplitPresenter
9
+ def initialize(stdout:)
10
+ @stdout = stdout
11
+ end
12
+
13
+ def print_summary(data)
14
+ @stdout.puts "Split complete."
15
+ @stdout.puts "Chunk size: #{data[:chunk_size]}"
16
+ @stdout.puts "Data rows: #{data[:data_rows]}"
17
+ @stdout.puts "Chunks written: #{data[:chunk_count]}"
18
+ @stdout.puts "Manifest: #{data[:manifest_path]}" if data[:manifest_path]
19
+ data[:chunk_paths].each { |path| @stdout.puts path }
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/application/use_cases/run_csv_parity"
4
+ require "csvtool/interface/cli/errors/presenter"
5
+ require "csvtool/interface/cli/prompts/file_path_prompt"
6
+ require "csvtool/interface/cli/prompts/separator_prompt"
7
+ require "csvtool/interface/cli/prompts/headers_present_prompt"
8
+ require "csvtool/interface/cli/workflows/builders/csv_parity_session_builder"
9
+ require "csvtool/interface/cli/workflows/presenters/csv_parity_presenter"
10
+ require "csvtool/interface/cli/workflows/support/result_error_handler"
11
+ require "csvtool/interface/cli/workflows/steps/workflow_step_pipeline"
12
+ require "csvtool/interface/cli/workflows/steps/parity/collect_inputs_step"
13
+ require "csvtool/interface/cli/workflows/steps/parity/build_session_step"
14
+ require "csvtool/interface/cli/workflows/steps/parity/execute_step"
15
+
16
+ module Csvtool
17
+ module Interface
18
+ module CLI
19
+ module Workflows
20
+ class RunCsvParityWorkflow
21
+ def initialize(stdin:, stdout:, use_case: Application::UseCases::RunCsvParity.new)
22
+ @stdin = stdin
23
+ @stdout = stdout
24
+ @use_case = use_case
25
+ @errors = Interface::CLI::Errors::Presenter.new(stdout: stdout)
26
+ @session_builder = Builders::CsvParitySessionBuilder.new
27
+ @presenter = Presenters::CsvParityPresenter.new(stdout: stdout)
28
+ @result_error_handler = Support::ResultErrorHandler.new(errors: @errors)
29
+ end
30
+
31
+ def call
32
+ context = {
33
+ use_case: @use_case,
34
+ session_builder: @session_builder,
35
+ presenter: @presenter,
36
+ handle_error: method(:handle_error)
37
+ }
38
+ pipeline = Steps::WorkflowStepPipeline.new(steps: [
39
+ Steps::Parity::CollectInputsStep.new(
40
+ file_path_prompt: Interface::CLI::Prompts::FilePathPrompt.new(stdin: @stdin, stdout: @stdout),
41
+ separator_prompt: Interface::CLI::Prompts::SeparatorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors),
42
+ headers_present_prompt: Interface::CLI::Prompts::HeadersPresentPrompt.new(stdin: @stdin, stdout: @stdout)
43
+ ),
44
+ Steps::Parity::BuildSessionStep.new,
45
+ Steps::Parity::ExecuteStep.new
46
+ ])
47
+ pipeline.call(context)
48
+ nil
49
+ end
50
+
51
+ private
52
+
53
+ def handle_error(result)
54
+ @result_error_handler.call(result, {
55
+ file_not_found: ->(r, errors) { errors.file_not_found(r.data[:path]) },
56
+ could_not_parse_csv: ->(_r, errors) { errors.could_not_parse_csv },
57
+ cannot_read_file: ->(r, errors) { errors.cannot_read_file(r.data[:path]) },
58
+ no_headers: ->(_r, errors) { errors.no_headers },
59
+ header_mismatch: ->(_r, errors) { errors.header_mismatch }
60
+ })
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end