csvops 0.4.0.alpha → 0.6.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +60 -12
  3. data/docs/architecture.md +208 -21
  4. data/docs/release-v0.5.0-alpha.md +89 -0
  5. data/docs/release-v0.6.0-alpha.md +84 -0
  6. data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +17 -14
  7. data/lib/csvtool/application/use_cases/run_csv_parity.rb +70 -0
  8. data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
  9. data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
  10. data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
  11. data/lib/csvtool/cli.rb +11 -7
  12. data/lib/csvtool/domain/csv_parity_session/parity_options.rb +22 -0
  13. data/lib/csvtool/domain/csv_parity_session/parity_session.rb +20 -0
  14. data/lib/csvtool/domain/csv_parity_session/source_pair.rb +19 -0
  15. data/lib/csvtool/infrastructure/csv/csv_parity_comparator.rb +71 -0
  16. data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
  17. data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
  18. data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
  19. data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
  20. data/lib/csvtool/interface/cli/errors/presenter.rb +4 -0
  21. data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
  22. data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
  23. data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
  24. data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
  25. data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
  26. data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
  27. data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
  28. data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
  29. data/lib/csvtool/interface/cli/workflows/builders/csv_parity_session_builder.rb +33 -0
  30. data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
  31. data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
  32. data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
  33. data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
  34. data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +38 -0
  35. data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
  36. data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
  37. data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +48 -125
  38. data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +66 -0
  39. data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
  40. data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
  41. data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
  42. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
  43. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
  44. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
  45. data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
  46. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
  47. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
  48. data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
  49. data/lib/csvtool/interface/cli/workflows/steps/parity/build_session_step.rb +25 -0
  50. data/lib/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step.rb +32 -0
  51. data/lib/csvtool/interface/cli/workflows/steps/parity/execute_step.rb +26 -0
  52. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
  53. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
  54. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
  55. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
  56. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
  57. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
  58. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
  59. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
  60. data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
  61. data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
  62. data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
  63. data/lib/csvtool/version.rb +1 -1
  64. data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
  65. data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +28 -0
  66. data/test/csvtool/application/use_cases/run_csv_parity_test.rb +160 -0
  67. data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
  68. data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
  69. data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
  70. data/test/csvtool/cli_test.rb +175 -21
  71. data/test/csvtool/cli_unit_test.rb +4 -4
  72. data/test/csvtool/domain/csv_parity_session/parity_options_test.rb +17 -0
  73. data/test/csvtool/domain/csv_parity_session/parity_session_test.rb +18 -0
  74. data/test/csvtool/domain/csv_parity_session/source_pair_test.rb +11 -0
  75. data/test/csvtool/infrastructure/csv/csv_parity_comparator_test.rb +78 -0
  76. data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
  77. data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
  78. data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
  79. data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
  80. data/test/csvtool/interface/cli/errors/presenter_test.rb +2 -0
  81. data/test/csvtool/interface/cli/menu_loop_test.rb +59 -16
  82. data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
  83. data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
  84. data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
  85. data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
  86. data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
  87. data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
  88. data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
  89. data/test/csvtool/interface/cli/workflows/builders/csv_parity_session_builder_test.rb +20 -0
  90. data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
  91. data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
  92. data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
  93. data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
  94. data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +43 -0
  95. data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
  96. data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
  97. data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +94 -0
  98. data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
  99. data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
  100. data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
  101. data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
  102. data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
  103. data/test/csvtool/interface/cli/workflows/steps/parity/build_session_step_test.rb +41 -0
  104. data/test/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step_test.rb +30 -0
  105. data/test/csvtool/interface/cli/workflows/steps/parity/execute_step_test.rb +40 -0
  106. data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
  107. data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
  108. data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
  109. data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
  110. data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
  111. data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
  112. data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
  113. data/test/fixtures/parity_duplicates_left.csv +4 -0
  114. data/test/fixtures/parity_duplicates_right.csv +3 -0
  115. data/test/fixtures/parity_people_header_mismatch.csv +4 -0
  116. data/test/fixtures/parity_people_many_reordered.csv +13 -0
  117. data/test/fixtures/parity_people_mismatch.csv +4 -0
  118. data/test/fixtures/parity_people_reordered.csv +4 -0
  119. data/test/fixtures/parity_people_reordered.tsv +4 -0
  120. metadata +90 -1
data/lib/csvtool/cli.rb CHANGED
@@ -2,10 +2,11 @@
2
2
 
3
3
  require "csv"
4
4
  require "csvtool/interface/cli/menu_loop"
5
- require "csvtool/application/use_cases/run_extraction"
6
- require "csvtool/application/use_cases/run_row_extraction"
7
- require "csvtool/application/use_cases/run_row_randomization"
5
+ require "csvtool/interface/cli/workflows/run_extraction_workflow"
6
+ require "csvtool/interface/cli/workflows/run_row_extraction_workflow"
7
+ require "csvtool/interface/cli/workflows/run_row_randomization_workflow"
8
8
  require "csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow"
9
+ require "csvtool/interface/cli/workflows/run_csv_parity_workflow"
9
10
  require "csvtool/interface/cli/errors/presenter"
10
11
  require "csvtool/infrastructure/csv/header_reader"
11
12
  require "csvtool/infrastructure/csv/value_streamer"
@@ -18,6 +19,7 @@ module Csvtool
18
19
  "Extract rows (range)",
19
20
  "Randomize rows",
20
21
  "Dedupe using another CSV",
22
+ "Validate parity",
21
23
  "Exit"
22
24
  ].freeze
23
25
 
@@ -47,10 +49,11 @@ module Csvtool
47
49
  private
48
50
 
49
51
  def run_menu_loop
50
- extract_column_action = -> { Application::UseCases::RunExtraction.new(stdin: @stdin, stdout: @stdout).call }
51
- extract_rows_action = -> { Application::UseCases::RunRowExtraction.new(stdin: @stdin, stdout: @stdout).call }
52
- randomize_rows_action = -> { Application::UseCases::RunRowRandomization.new(stdin: @stdin, stdout: @stdout).call }
52
+ extract_column_action = -> { Interface::CLI::Workflows::RunExtractionWorkflow.new(stdin: @stdin, stdout: @stdout).call }
53
+ extract_rows_action = -> { Interface::CLI::Workflows::RunRowExtractionWorkflow.new(stdin: @stdin, stdout: @stdout).call }
54
+ randomize_rows_action = -> { Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: @stdin, stdout: @stdout).call }
53
55
  dedupe_action = -> { Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow.new(stdin: @stdin, stdout: @stdout).call }
56
+ parity_action = -> { Interface::CLI::Workflows::RunCsvParityWorkflow.new(stdin: @stdin, stdout: @stdout).call }
54
57
  Interface::CLI::MenuLoop.new(
55
58
  stdin: @stdin,
56
59
  stdout: @stdout,
@@ -58,7 +61,8 @@ module Csvtool
58
61
  extract_column_action: extract_column_action,
59
62
  extract_rows_action: extract_rows_action,
60
63
  randomize_rows_action: randomize_rows_action,
61
- dedupe_action: dedupe_action
64
+ dedupe_action: dedupe_action,
65
+ parity_action: parity_action
62
66
  ).run
63
67
  end
64
68
 
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvParitySession
6
+ class ParityOptions
7
+ attr_reader :separator
8
+
9
+ def initialize(separator:, headers_present:)
10
+ raise ArgumentError, "separator cannot be empty" if separator.to_s.empty?
11
+
12
+ @separator = separator
13
+ @headers_present = headers_present
14
+ end
15
+
16
+ def headers_present?
17
+ @headers_present
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvParitySession
6
+ class ParitySession
7
+ attr_reader :source_pair, :options
8
+
9
+ def self.start(source_pair:, options:)
10
+ new(source_pair: source_pair, options: options)
11
+ end
12
+
13
+ def initialize(source_pair:, options:)
14
+ @source_pair = source_pair
15
+ @options = options
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvParitySession
6
+ class SourcePair
7
+ attr_reader :left_path, :right_path
8
+
9
+ def initialize(left_path:, right_path:)
10
+ raise ArgumentError, "left_path cannot be empty" if left_path.to_s.empty?
11
+ raise ArgumentError, "right_path cannot be empty" if right_path.to_s.empty?
12
+
13
+ @left_path = left_path
14
+ @right_path = right_path
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module CSV
8
+ class CsvParityComparator
9
+ def call(left_path:, right_path:, col_sep:, headers_present:, sample_limit: 5)
10
+ deltas = Hash.new(0)
11
+ left_rows = stream_rows(path: left_path, col_sep: col_sep, headers_present: headers_present) do |key|
12
+ deltas[key] += 1
13
+ end
14
+ right_rows = stream_rows(path: right_path, col_sep: col_sep, headers_present: headers_present) do |key|
15
+ deltas[key] -= 1
16
+ end
17
+
18
+ left_only_count, right_only_count, left_only_examples, right_only_examples =
19
+ mismatch_totals_and_samples(deltas: deltas, sample_limit: sample_limit)
20
+
21
+ {
22
+ match: left_only_count.zero? && right_only_count.zero?,
23
+ left_rows: left_rows,
24
+ right_rows: right_rows,
25
+ left_only_count: left_only_count,
26
+ right_only_count: right_only_count,
27
+ left_only_examples: left_only_examples,
28
+ right_only_examples: right_only_examples
29
+ }
30
+ end
31
+
32
+ private
33
+
34
+ def stream_rows(path:, col_sep:, headers_present:)
35
+ rows = 0
36
+
37
+ ::CSV.foreach(path, headers: headers_present, col_sep: col_sep) do |row|
38
+ fields = headers_present ? row.fields : row
39
+ yield serialize(fields: fields, col_sep: col_sep)
40
+ rows += 1
41
+ end
42
+
43
+ rows
44
+ end
45
+
46
+ def mismatch_totals_and_samples(deltas:, sample_limit:)
47
+ left_only_count = 0
48
+ right_only_count = 0
49
+ left_only_examples = []
50
+ right_only_examples = []
51
+
52
+ deltas.each do |key, delta|
53
+ if delta.positive?
54
+ left_only_count += delta
55
+ left_only_examples << { row: key, count_delta: delta } if left_only_examples.length < sample_limit
56
+ elsif delta.negative?
57
+ right_only_count += -delta
58
+ right_only_examples << { row: key, count_delta: -delta } if right_only_examples.length < sample_limit
59
+ end
60
+ end
61
+
62
+ [left_only_count, right_only_count, left_only_examples, right_only_examples]
63
+ end
64
+
65
+ def serialize(fields:, col_sep:)
66
+ ::CSV.generate_line(fields, row_sep: "", col_sep: col_sep).chomp
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module Output
8
+ class CsvCrossCsvDedupeFileWriter
9
+ def initialize(deduper:)
10
+ @deduper = deduper
11
+ end
12
+
13
+ def call(path:, headers:, col_sep:, dedupe_options:)
14
+ stats = nil
15
+ ::CSV.open(path, "w", write_headers: !headers.nil?, headers: headers, col_sep: col_sep) do |csv|
16
+ stats = @deduper.each_retained(**dedupe_options) { |fields| csv << fields }
17
+ end
18
+ stats
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -6,9 +6,7 @@ module Csvtool
6
6
  module Infrastructure
7
7
  module Output
8
8
  class CsvFileWriter
9
- def initialize(stdout:, errors:, value_streamer:)
10
- @stdout = stdout
11
- @errors = errors
9
+ def initialize(value_streamer:)
12
10
  @value_streamer = value_streamer
13
11
  end
14
12
 
@@ -19,10 +17,6 @@ module Csvtool
19
17
  csv << [value]
20
18
  end
21
19
  end
22
-
23
- @stdout.puts "Wrote output to #{output_path}"
24
- rescue Errno::EACCES, Errno::ENOENT => e
25
- @errors.cannot_write_output_file(output_path, e.class)
26
20
  end
27
21
  end
28
22
  end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module Output
8
+ class CsvRandomizedRowFileWriter
9
+ def initialize(row_randomizer:)
10
+ @row_randomizer = row_randomizer
11
+ end
12
+
13
+ def call(path:, headers:, file_path:, col_sep:, headers_present:, seed:)
14
+ ::CSV.open(path, "w", write_headers: !headers.nil?, headers: headers, col_sep: col_sep) do |csv|
15
+ @row_randomizer.each(file_path: file_path, col_sep: col_sep, headers: headers_present, seed: seed) do |fields|
16
+ csv << fields
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -6,9 +6,7 @@ module Csvtool
6
6
  module Infrastructure
7
7
  module Output
8
8
  class CsvRowFileWriter
9
- def initialize(stdout:, errors:, row_streamer:)
10
- @stdout = stdout
11
- @errors = errors
9
+ def initialize(row_streamer:)
12
10
  @row_streamer = row_streamer
13
11
  end
14
12
 
@@ -30,12 +28,7 @@ module Csvtool
30
28
  csv << fields
31
29
  end
32
30
 
33
- csv&.close
34
- @stdout.puts "Wrote output to #{output_path}" if wrote_rows
35
- stats
36
- rescue Errno::EACCES, Errno::ENOENT => e
37
- @errors.cannot_write_output_file(output_path, e.class)
38
- nil
31
+ stats.merge(wrote_rows: wrote_rows)
39
32
  ensure
40
33
  csv&.close unless csv&.closed?
41
34
  end
@@ -72,6 +72,10 @@ module Csvtool
72
72
  def row_range_out_of_bounds(total_rows)
73
73
  @stdout.puts "Row range is out of bounds. File has #{total_rows} data rows."
74
74
  end
75
+
76
+ def header_mismatch
77
+ @stdout.puts "CSV headers do not match."
78
+ end
75
79
  end
76
80
  end
77
81
  end
@@ -4,7 +4,7 @@ module Csvtool
4
4
  module Interface
5
5
  module CLI
6
6
  class MenuLoop
7
- def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:)
7
+ def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:, parity_action:)
8
8
  @stdin = stdin
9
9
  @stdout = stdout
10
10
  @menu_options = menu_options
@@ -12,6 +12,7 @@ module Csvtool
12
12
  @extract_rows_action = extract_rows_action
13
13
  @randomize_rows_action = randomize_rows_action
14
14
  @dedupe_action = dedupe_action
15
+ @parity_action = parity_action
15
16
  end
16
17
 
17
18
  def run
@@ -31,9 +32,11 @@ module Csvtool
31
32
  when "4"
32
33
  @dedupe_action.call
33
34
  when "5"
35
+ @parity_action.call
36
+ when "6"
34
37
  return 0
35
38
  else
36
- @stdout.puts "Please choose 1, 2, 3, 4, or 5."
39
+ @stdout.puts "Please choose 1, 2, 3, 4, 5, or 6."
37
40
  end
38
41
  end
39
42
  end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
4
+
5
+ module Csvtool
6
+ module Interface
7
+ module CLI
8
+ module Prompts
9
+ class DedupeKeySelectorPrompt
10
+ def initialize(stdin:, stdout:)
11
+ @stdin = stdin
12
+ @stdout = stdout
13
+ end
14
+
15
+ def call(label:, headers_present:)
16
+ if headers_present
17
+ @stdout.print "#{label} key column name: "
18
+ else
19
+ @stdout.print "#{label} key column index (1-based): "
20
+ end
21
+ input = @stdin.gets&.strip.to_s
22
+ Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: headers_present, input: input)
23
+ rescue ArgumentError
24
+ nil
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -5,13 +5,15 @@ module Csvtool
5
5
  module CLI
6
6
  module Prompts
7
7
  class FilePathPrompt
8
+ DEFAULT_LABEL = "CSV file path: "
9
+
8
10
  def initialize(stdin:, stdout:)
9
11
  @stdin = stdin
10
12
  @stdout = stdout
11
13
  end
12
14
 
13
- def call
14
- @stdout.print "CSV file path: "
15
+ def call(label: DEFAULT_LABEL)
16
+ @stdout.print label
15
17
  @stdin.gets&.strip.to_s
16
18
  end
17
19
  end
@@ -5,13 +5,15 @@ module Csvtool
5
5
  module CLI
6
6
  module Prompts
7
7
  class HeadersPresentPrompt
8
+ DEFAULT_LABEL = "Headers present? [Y/n]: "
9
+
8
10
  def initialize(stdin:, stdout:)
9
11
  @stdin = stdin
10
12
  @stdout = stdout
11
13
  end
12
14
 
13
- def call
14
- @stdout.print "Headers present? [Y/n]: "
15
+ def call(label: DEFAULT_LABEL)
16
+ @stdout.print label
15
17
  answer = @stdin.gets&.strip.to_s.downcase
16
18
  !%w[n no].include?(answer)
17
19
  end
@@ -5,14 +5,16 @@ module Csvtool
5
5
  module CLI
6
6
  module Prompts
7
7
  class SeparatorPrompt
8
+ DEFAULT_LABEL = "Choose separator:"
9
+
8
10
  def initialize(stdin:, stdout:, errors:)
9
11
  @stdin = stdin
10
12
  @stdout = stdout
11
13
  @errors = errors
12
14
  end
13
15
 
14
- def call
15
- @stdout.puts "Choose separator:"
16
+ def call(label: DEFAULT_LABEL)
17
+ @stdout.puts label
16
18
  @stdout.puts "1. comma (,)"
17
19
  @stdout.puts "2. tab (\\t)"
18
20
  @stdout.puts "3. semicolon (;)"
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Prompts
7
+ class YesNoPrompt
8
+ def initialize(stdin:, stdout:)
9
+ @stdin = stdin
10
+ @stdout = stdout
11
+ end
12
+
13
+ def call(label:, default:)
14
+ @stdout.print label
15
+ answer = @stdin.gets&.strip.to_s.downcase
16
+ return default if answer.empty?
17
+ return true if %w[y yes].include?(answer)
18
+ return false if %w[n no].include?(answer)
19
+
20
+ default
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/domain/column_session/separator"
4
+ require "csvtool/domain/column_session/csv_source"
5
+ require "csvtool/domain/column_session/column_selection"
6
+ require "csvtool/domain/column_session/extraction_options"
7
+ require "csvtool/domain/column_session/column_session"
8
+
9
+ module Csvtool
10
+ module Interface
11
+ module CLI
12
+ module Workflows
13
+ module Builders
14
+ class ColumnSessionBuilder
15
+ def call(file_path:, col_sep:, column_name:, skip_blanks:)
16
+ separator = Domain::ColumnSession::Separator.new(col_sep)
17
+ source = Domain::ColumnSession::CsvSource.new(path: file_path, separator: separator)
18
+ column_selection = Domain::ColumnSession::ColumnSelection.new(name: column_name)
19
+ options = Domain::ColumnSession::ExtractionOptions.new(skip_blanks: skip_blanks, preview_limit: 10)
20
+
21
+ Domain::ColumnSession::ColumnSession.start(
22
+ source: source,
23
+ column_selection: column_selection,
24
+ options: options
25
+ )
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
4
+ require "csvtool/domain/cross_csv_dedupe_session/match_options"
5
+ require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
6
+
7
+ module Csvtool
8
+ module Interface
9
+ module CLI
10
+ module Workflows
11
+ module Builders
12
+ class CrossCsvDedupeSessionBuilder
13
+ def call(source:, reference:, source_selector:, reference_selector:, trim_whitespace:, case_insensitive:, destination:)
14
+ key_mapping = Domain::CrossCsvDedupeSession::KeyMapping.new(
15
+ source_selector: source_selector,
16
+ reference_selector: reference_selector
17
+ )
18
+ match_options = Domain::CrossCsvDedupeSession::MatchOptions.new(
19
+ trim_whitespace: trim_whitespace,
20
+ case_insensitive: case_insensitive
21
+ )
22
+
23
+ Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
24
+ source: source,
25
+ reference: reference,
26
+ key_mapping: key_mapping,
27
+ match_options: match_options
28
+ ).with_output_destination(destination)
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/domain/csv_parity_session/source_pair"
4
+ require "csvtool/domain/csv_parity_session/parity_options"
5
+ require "csvtool/domain/csv_parity_session/parity_session"
6
+
7
+ module Csvtool
8
+ module Interface
9
+ module CLI
10
+ module Workflows
11
+ module Builders
12
+ class CsvParitySessionBuilder
13
+ def call(left_path:, right_path:, col_sep:, headers_present:)
14
+ source_pair = Domain::CsvParitySession::SourcePair.new(
15
+ left_path: left_path,
16
+ right_path: right_path
17
+ )
18
+ options = Domain::CsvParitySession::ParityOptions.new(
19
+ separator: col_sep,
20
+ headers_present: headers_present
21
+ )
22
+
23
+ Domain::CsvParitySession::ParitySession.start(
24
+ source_pair: source_pair,
25
+ options: options
26
+ )
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/domain/row_session/row_source"
4
+ require "csvtool/domain/row_session/row_session"
5
+
6
+ module Csvtool
7
+ module Interface
8
+ module CLI
9
+ module Workflows
10
+ module Builders
11
+ class RowExtractionSessionBuilder
12
+ def call(file_path:, col_sep:, row_range:, destination:)
13
+ source = Domain::RowSession::RowSource.new(path: file_path, separator: col_sep)
14
+ session = Domain::RowSession::RowSession.start(source: source, row_range: row_range)
15
+ session.with_output_destination(destination)
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/domain/row_randomization_session/randomization_source"
4
+ require "csvtool/domain/row_randomization_session/randomization_options"
5
+ require "csvtool/domain/row_randomization_session/randomization_session"
6
+
7
+ module Csvtool
8
+ module Interface
9
+ module CLI
10
+ module Workflows
11
+ module Builders
12
+ class RowRandomizationSessionBuilder
13
+ def call(file_path:, col_sep:, headers_present:, seed:, destination:)
14
+ source = Domain::RowRandomizationSession::RandomizationSource.new(
15
+ path: file_path,
16
+ separator: col_sep,
17
+ headers_present: headers_present
18
+ )
19
+ options = Domain::RowRandomizationSession::RandomizationOptions.new(seed: seed)
20
+ session = Domain::RowRandomizationSession::RandomizationSession.start(source: source, options: options)
21
+ session.with_output_destination(destination)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Workflows
7
+ module Presenters
8
+ class ColumnExtractionPresenter
9
+ def initialize(stdout:)
10
+ @stdout = stdout
11
+ end
12
+
13
+ def print_value(value)
14
+ @stdout.puts value
15
+ end
16
+
17
+ def print_file_written(path)
18
+ @stdout.puts "Wrote output to #{path}"
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Interface
7
+ module CLI
8
+ module Workflows
9
+ module Presenters
10
+ class CrossCsvDedupePresenter
11
+ def initialize(stdout:, col_sep:)
12
+ @stdout = stdout
13
+ @col_sep = col_sep
14
+ end
15
+
16
+ def print_header(headers)
17
+ @stdout.puts
18
+ @stdout.puts ::CSV.generate_line(headers, row_sep: "", col_sep: @col_sep).chomp
19
+ end
20
+
21
+ def print_row(fields)
22
+ @stdout.puts ::CSV.generate_line(fields, row_sep: "", col_sep: @col_sep).chomp
23
+ end
24
+
25
+ def print_file_written(path)
26
+ @stdout.puts "Wrote output to #{path}"
27
+ end
28
+
29
+ def print_summary(stats)
30
+ @stdout.puts "Summary: source_rows=#{stats[:source_rows]} removed_rows=#{stats[:removed_rows]} kept_rows=#{stats[:kept_rows_count]}"
31
+ @stdout.puts "No rows removed; no matching keys found." if stats[:removed_rows].zero?
32
+ @stdout.puts "All source rows were removed by dedupe." if stats[:source_rows].positive? && stats[:kept_rows_count].zero?
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end