csvops 0.3.0.alpha → 0.4.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +56 -142
  3. data/docs/architecture.md +266 -0
  4. data/docs/release-v0.4.0-alpha.md +87 -0
  5. data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +93 -0
  6. data/lib/csvtool/application/use_cases/run_extraction.rb +3 -3
  7. data/lib/csvtool/application/use_cases/run_row_extraction.rb +3 -3
  8. data/lib/csvtool/application/use_cases/run_row_randomization.rb +3 -3
  9. data/lib/csvtool/cli.rb +5 -1
  10. data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
  11. data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
  12. data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
  13. data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
  14. data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
  15. data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +1 -0
  16. data/lib/csvtool/domain/row_session/row_source.rb +3 -0
  17. data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
  18. data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
  19. data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
  20. data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
  21. data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +163 -0
  22. data/lib/csvtool/version.rb +1 -1
  23. data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +113 -0
  24. data/test/csvtool/cli_test.rb +130 -16
  25. data/test/csvtool/cli_unit_test.rb +16 -3
  26. data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
  27. data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
  28. data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
  29. data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
  30. data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
  31. data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
  32. data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
  33. data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +2 -2
  34. data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +15 -1
  35. data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
  36. data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
  37. data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
  38. data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
  39. data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
  40. data/test/csvtool/interface/cli/menu_loop_test.rb +50 -13
  41. data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
  42. data/test/fixtures/dedupe_reference.csv +3 -0
  43. data/test/fixtures/dedupe_reference.tsv +3 -0
  44. data/test/fixtures/dedupe_reference_all.csv +5 -0
  45. data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
  46. data/test/fixtures/dedupe_reference_none.csv +2 -0
  47. data/test/fixtures/dedupe_reference_normalization.csv +3 -0
  48. data/test/fixtures/dedupe_source.csv +6 -0
  49. data/test/fixtures/dedupe_source.tsv +6 -0
  50. data/test/fixtures/dedupe_source_no_headers.csv +5 -0
  51. data/test/fixtures/dedupe_source_normalization.csv +4 -0
  52. metadata +34 -8
  53. data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +0 -31
  54. data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
  55. data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
  56. data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +0 -21
  57. data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
@@ -19,8 +19,8 @@ require "csvtool/domain/column_session/column_selection"
19
19
  require "csvtool/domain/column_session/extraction_options"
20
20
  require "csvtool/domain/column_session/extraction_value"
21
21
  require "csvtool/domain/column_session/preview"
22
- require "csvtool/domain/column_session/output_destination"
23
22
  require "csvtool/domain/column_session/column_session"
23
+ require "csvtool/domain/shared/output_destination"
24
24
 
25
25
  module Csvtool
26
26
  module Application
@@ -79,9 +79,9 @@ module Csvtool
79
79
  return if output_destination.nil?
80
80
  domain_destination =
81
81
  if output_destination[:mode] == :file
82
- Domain::ColumnSession::OutputDestination.file(path: output_destination[:path])
82
+ Domain::Shared::OutputDestination.file(path: output_destination[:path])
83
83
  else
84
- Domain::ColumnSession::OutputDestination.console
84
+ Domain::Shared::OutputDestination.console
85
85
  end
86
86
  session = session.with_output_destination(domain_destination)
87
87
 
@@ -11,8 +11,8 @@ require "csvtool/infrastructure/output/csv_row_console_writer"
11
11
  require "csvtool/infrastructure/output/csv_row_file_writer"
12
12
  require "csvtool/domain/row_session/row_range"
13
13
  require "csvtool/domain/row_session/row_source"
14
- require "csvtool/domain/row_session/row_output_destination"
15
14
  require "csvtool/domain/row_session/row_session"
15
+ require "csvtool/domain/shared/output_destination"
16
16
 
17
17
  module Csvtool
18
18
  module Application
@@ -56,9 +56,9 @@ module Csvtool
56
56
  return if output_destination.nil?
57
57
  destination =
58
58
  if output_destination[:mode] == :file
59
- Domain::RowSession::RowOutputDestination.file(path: output_destination[:path])
59
+ Domain::Shared::OutputDestination.file(path: output_destination[:path])
60
60
  else
61
- Domain::RowSession::RowOutputDestination.console
61
+ Domain::Shared::OutputDestination.console
62
62
  end
63
63
  session = session.with_output_destination(destination)
64
64
 
@@ -11,8 +11,8 @@ require "csvtool/infrastructure/csv/header_reader"
11
11
  require "csvtool/infrastructure/csv/row_randomizer"
12
12
  require "csvtool/domain/row_randomization_session/randomization_source"
13
13
  require "csvtool/domain/row_randomization_session/randomization_options"
14
- require "csvtool/domain/row_randomization_session/randomization_output_destination"
15
14
  require "csvtool/domain/row_randomization_session/randomization_session"
15
+ require "csvtool/domain/shared/output_destination"
16
16
 
17
17
  module Csvtool
18
18
  module Application
@@ -55,9 +55,9 @@ module Csvtool
55
55
  return if output_destination.nil?
56
56
  destination =
57
57
  if output_destination[:mode] == :file
58
- Domain::RowRandomizationSession::RandomizationOutputDestination.file(path: output_destination[:path])
58
+ Domain::Shared::OutputDestination.file(path: output_destination[:path])
59
59
  else
60
- Domain::RowRandomizationSession::RandomizationOutputDestination.console
60
+ Domain::Shared::OutputDestination.console
61
61
  end
62
62
  session = session.with_output_destination(destination)
63
63
 
data/lib/csvtool/cli.rb CHANGED
@@ -5,6 +5,7 @@ require "csvtool/interface/cli/menu_loop"
5
5
  require "csvtool/application/use_cases/run_extraction"
6
6
  require "csvtool/application/use_cases/run_row_extraction"
7
7
  require "csvtool/application/use_cases/run_row_randomization"
8
+ require "csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow"
8
9
  require "csvtool/interface/cli/errors/presenter"
9
10
  require "csvtool/infrastructure/csv/header_reader"
10
11
  require "csvtool/infrastructure/csv/value_streamer"
@@ -16,6 +17,7 @@ module Csvtool
16
17
  "Extract column",
17
18
  "Extract rows (range)",
18
19
  "Randomize rows",
20
+ "Dedupe using another CSV",
19
21
  "Exit"
20
22
  ].freeze
21
23
 
@@ -48,13 +50,15 @@ module Csvtool
48
50
  extract_column_action = -> { Application::UseCases::RunExtraction.new(stdin: @stdin, stdout: @stdout).call }
49
51
  extract_rows_action = -> { Application::UseCases::RunRowExtraction.new(stdin: @stdin, stdout: @stdout).call }
50
52
  randomize_rows_action = -> { Application::UseCases::RunRowRandomization.new(stdin: @stdin, stdout: @stdout).call }
53
+ dedupe_action = -> { Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow.new(stdin: @stdin, stdout: @stdout).call }
51
54
  Interface::CLI::MenuLoop.new(
52
55
  stdin: @stdin,
53
56
  stdout: @stdout,
54
57
  menu_options: MENU_OPTIONS,
55
58
  extract_column_action: extract_column_action,
56
59
  extract_rows_action: extract_rows_action,
57
- randomize_rows_action: randomize_rows_action
60
+ randomize_rows_action: randomize_rows_action,
61
+ dedupe_action: dedupe_action
58
62
  ).run
59
63
  end
60
64
 
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CrossCsvDedupeSession
6
+ class ColumnSelector
7
+ attr_reader :value
8
+
9
+ def self.from_input(headers_present:, input:)
10
+ if headers_present
11
+ raise ArgumentError, "column name cannot be empty" if input.to_s.empty?
12
+
13
+ new(value: input.to_s, headers_present: true)
14
+ else
15
+ raise ArgumentError, "column index must be a positive integer" unless /\A[1-9]\d*\z/.match?(input.to_s)
16
+
17
+ new(value: input.to_i, headers_present: false)
18
+ end
19
+ end
20
+
21
+ def initialize(value:, headers_present:)
22
+ @value = value
23
+ @headers_present = !!headers_present
24
+ end
25
+
26
+ def headers_present?
27
+ @headers_present
28
+ end
29
+
30
+ def index?
31
+ !@headers_present
32
+ end
33
+
34
+ def extract_from(row)
35
+ if headers_present?
36
+ row[@value].to_s
37
+ else
38
+ row[@value - 1].to_s
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
4
+ require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
5
+ require "csvtool/domain/cross_csv_dedupe_session/match_options"
6
+ require "csvtool/domain/shared/output_destination"
7
+
8
+ module Csvtool
9
+ module Domain
10
+ module CrossCsvDedupeSession
11
+ class CrossCsvDedupeSession
12
+ attr_reader :source, :reference, :key_mapping, :match_options, :output_destination
13
+
14
+ def self.start(source:, reference:, key_mapping:, match_options:)
15
+ new(source: source, reference: reference, key_mapping: key_mapping, match_options: match_options)
16
+ end
17
+
18
+ def initialize(source:, reference:, key_mapping:, match_options:, output_destination: nil)
19
+ raise ArgumentError, "source must be CsvProfile" unless source.is_a?(CsvProfile)
20
+ raise ArgumentError, "reference must be CsvProfile" unless reference.is_a?(CsvProfile)
21
+ raise ArgumentError, "key_mapping must be KeyMapping" unless key_mapping.is_a?(KeyMapping)
22
+ raise ArgumentError, "match_options must be MatchOptions" unless match_options.is_a?(MatchOptions)
23
+ unless output_destination.nil? || output_destination.is_a?(Domain::Shared::OutputDestination)
24
+ raise ArgumentError, "output_destination must be OutputDestination or nil"
25
+ end
26
+
27
+ @source = source
28
+ @reference = reference
29
+ @key_mapping = key_mapping
30
+ @match_options = match_options
31
+ @output_destination = output_destination
32
+ end
33
+
34
+ def with_output_destination(destination)
35
+ self.class.new(
36
+ source: @source,
37
+ reference: @reference,
38
+ key_mapping: @key_mapping,
39
+ match_options: @match_options,
40
+ output_destination: destination
41
+ )
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CrossCsvDedupeSession
6
+ class CsvProfile
7
+ attr_reader :path, :separator
8
+
9
+ def initialize(path:, separator:, headers_present:)
10
+ raise ArgumentError, "path cannot be empty" if path.to_s.empty?
11
+ raise ArgumentError, "separator cannot be empty" if separator.to_s.empty?
12
+
13
+ @path = path
14
+ @separator = separator
15
+ @headers_present = !!headers_present
16
+ end
17
+
18
+ def headers_present?
19
+ @headers_present
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
4
+
5
+ module Csvtool
6
+ module Domain
7
+ module CrossCsvDedupeSession
8
+ class KeyMapping
9
+ attr_reader :source_selector, :reference_selector
10
+
11
+ def initialize(source_selector:, reference_selector:)
12
+ unless source_selector.is_a?(ColumnSelector) && reference_selector.is_a?(ColumnSelector)
13
+ raise ArgumentError, "selectors must be ColumnSelector"
14
+ end
15
+
16
+ @source_selector = source_selector
17
+ @reference_selector = reference_selector
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CrossCsvDedupeSession
6
+ class MatchOptions
7
+ attr_reader :trim_whitespace, :case_insensitive
8
+
9
+ def initialize(trim_whitespace:, case_insensitive:)
10
+ @trim_whitespace = !!trim_whitespace
11
+ @case_insensitive = !!case_insensitive
12
+ end
13
+
14
+ def trim_whitespace?
15
+ @trim_whitespace
16
+ end
17
+
18
+ def case_insensitive?
19
+ @case_insensitive
20
+ end
21
+
22
+ def normalize(value)
23
+ normalized = trim_whitespace? ? value.to_s.strip : value.to_s
24
+ case_insensitive? ? normalized.downcase : normalized
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -7,6 +7,7 @@ module Csvtool
7
7
  attr_reader :path, :separator
8
8
 
9
9
  def initialize(path:, separator:, headers_present:)
10
+ raise ArgumentError, "path cannot be empty" if path.to_s.empty?
10
11
  raise ArgumentError, "separator cannot be empty" if separator.to_s.empty?
11
12
 
12
13
  @path = path
@@ -7,6 +7,9 @@ module Csvtool
7
7
  attr_reader :path, :separator
8
8
 
9
9
  def initialize(path:, separator:)
10
+ raise ArgumentError, "path cannot be empty" if path.to_s.empty?
11
+ raise ArgumentError, "separator cannot be empty" if separator.to_s.empty?
12
+
10
13
  @path = path
11
14
  @separator = separator
12
15
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Csvtool
4
4
  module Domain
5
- module ColumnSession
5
+ module Shared
6
6
  class OutputDestination
7
7
  attr_reader :mode, :path
8
8
 
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+ require "set"
5
+ require "csvtool/domain/cross_csv_dedupe_session/match_options"
6
+
7
+ module Csvtool
8
+ module Infrastructure
9
+ module CSV
10
+ class CrossCsvDeduper
11
+ def call(
12
+ source_path:,
13
+ reference_path:,
14
+ source_selector:,
15
+ reference_selector:,
16
+ source_col_sep: ",",
17
+ reference_col_sep: ",",
18
+ match_options: Domain::CrossCsvDedupeSession::MatchOptions.new(trim_whitespace: true, case_insensitive: false)
19
+ )
20
+ kept_rows = []
21
+ stats = each_retained(
22
+ source_path: source_path,
23
+ reference_path: reference_path,
24
+ source_selector: source_selector,
25
+ reference_selector: reference_selector,
26
+ source_col_sep: source_col_sep,
27
+ reference_col_sep: reference_col_sep,
28
+ match_options: match_options
29
+ ) do |fields|
30
+ kept_rows << fields
31
+ end
32
+
33
+ stats.merge(kept_rows: kept_rows)
34
+ end
35
+
36
+ def each_retained(
37
+ source_path:,
38
+ reference_path:,
39
+ source_selector:,
40
+ reference_selector:,
41
+ source_col_sep: ",",
42
+ reference_col_sep: ",",
43
+ match_options: Domain::CrossCsvDedupeSession::MatchOptions.new(trim_whitespace: true, case_insensitive: false)
44
+ )
45
+ source_has_headers = source_selector.headers_present?
46
+ reference_has_headers = reference_selector.headers_present?
47
+ reference_keys = Set.new
48
+ ::CSV.foreach(reference_path, headers: reference_has_headers, col_sep: reference_col_sep) do |row|
49
+ reference_keys << extract_key(row, selector: reference_selector, match_options: match_options)
50
+ end
51
+
52
+ source_header_row = nil
53
+ source_rows = 0
54
+ removed_rows = 0
55
+ kept_rows_count = 0
56
+
57
+ ::CSV.foreach(source_path, headers: source_has_headers, col_sep: source_col_sep) do |row|
58
+ source_header_row ||= row.headers if source_has_headers
59
+ source_rows += 1
60
+ key = extract_key(row, selector: source_selector, match_options: match_options)
61
+ if reference_keys.include?(key)
62
+ removed_rows += 1
63
+ else
64
+ kept_rows_count += 1
65
+ yield(source_has_headers ? row.fields : row) if block_given?
66
+ end
67
+ end
68
+
69
+ {
70
+ headers: source_has_headers ? (source_header_row || []) : nil,
71
+ source_rows: source_rows,
72
+ removed_rows: removed_rows,
73
+ kept_rows_count: kept_rows_count
74
+ }
75
+ end
76
+
77
+ private
78
+
79
+ def extract_key(row, selector:, match_options:)
80
+ match_options.normalize(selector.extract_from(row))
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+ require "csvtool/infrastructure/csv/header_reader"
5
+
6
+ module Csvtool
7
+ module Infrastructure
8
+ module CSV
9
+ class SelectorValidator
10
+ def initialize(header_reader: HeaderReader.new)
11
+ @header_reader = header_reader
12
+ end
13
+
14
+ def valid?(profile:, selector:)
15
+ if selector.headers_present?
16
+ headers = @header_reader.call(file_path: profile.path, col_sep: profile.separator)
17
+ return false if headers.empty?
18
+
19
+ headers.include?(selector.value)
20
+ else
21
+ first_row = ::CSV.open(profile.path, "r", headers: false, col_sep: profile.separator, &:first)
22
+ return false if first_row.nil?
23
+
24
+ selector.value <= first_row.length
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -4,13 +4,14 @@ module Csvtool
4
4
  module Interface
5
5
  module CLI
6
6
  class MenuLoop
7
- def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:)
7
+ def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:)
8
8
  @stdin = stdin
9
9
  @stdout = stdout
10
10
  @menu_options = menu_options
11
11
  @extract_column_action = extract_column_action
12
12
  @extract_rows_action = extract_rows_action
13
13
  @randomize_rows_action = randomize_rows_action
14
+ @dedupe_action = dedupe_action
14
15
  end
15
16
 
16
17
  def run
@@ -28,9 +29,11 @@ module Csvtool
28
29
  when "3"
29
30
  @randomize_rows_action.call
30
31
  when "4"
32
+ @dedupe_action.call
33
+ when "5"
31
34
  return 0
32
35
  else
33
- @stdout.puts "Please choose 1, 2, 3, or 4."
36
+ @stdout.puts "Please choose 1, 2, 3, 4, or 5."
34
37
  end
35
38
  end
36
39
  end
@@ -0,0 +1,163 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+ require "csvtool/application/use_cases/run_cross_csv_dedupe"
5
+ require "csvtool/interface/cli/errors/presenter"
6
+ require "csvtool/interface/cli/prompts/file_path_prompt"
7
+ require "csvtool/interface/cli/prompts/separator_prompt"
8
+ require "csvtool/interface/cli/prompts/output_destination_prompt"
9
+ require "csvtool/domain/cross_csv_dedupe_session/csv_profile"
10
+ require "csvtool/domain/cross_csv_dedupe_session/column_selector"
11
+ require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
12
+ require "csvtool/domain/cross_csv_dedupe_session/match_options"
13
+ require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
14
+ require "csvtool/domain/shared/output_destination"
15
+
16
+ module Csvtool
17
+ module Interface
18
+ module CLI
19
+ module Workflows
20
+ class RunCrossCsvDedupeWorkflow
21
+ def initialize(stdin:, stdout:, use_case: Application::UseCases::RunCrossCsvDedupe.new)
22
+ @stdin = stdin
23
+ @stdout = stdout
24
+ @use_case = use_case
25
+ @errors = Interface::CLI::Errors::Presenter.new(stdout: stdout)
26
+ end
27
+
28
+ def call
29
+ source_path = Interface::CLI::Prompts::FilePathPrompt.new(stdin: @stdin, stdout: @stdout).call
30
+ return @errors.file_not_found(source_path) unless File.file?(source_path)
31
+
32
+ @stdout.puts "Source CSV separator:"
33
+ source_col_sep = Interface::CLI::Prompts::SeparatorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors).call
34
+ return if source_col_sep.nil?
35
+ @stdout.print "Source headers present? [Y/n]: "
36
+ source_headers_present = !%w[n no].include?(@stdin.gets&.strip.to_s.downcase)
37
+ source = Domain::CrossCsvDedupeSession::CsvProfile.new(
38
+ path: source_path,
39
+ separator: source_col_sep,
40
+ headers_present: source_headers_present
41
+ )
42
+
43
+ @stdout.print "Reference CSV file path: "
44
+ reference_path = @stdin.gets&.strip.to_s
45
+ return @errors.file_not_found(reference_path) unless File.file?(reference_path)
46
+
47
+ @stdout.puts "Reference CSV separator:"
48
+ reference_col_sep = Interface::CLI::Prompts::SeparatorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors).call
49
+ return if reference_col_sep.nil?
50
+ @stdout.print "Reference headers present? [Y/n]: "
51
+ reference_headers_present = !%w[n no].include?(@stdin.gets&.strip.to_s.downcase)
52
+ reference = Domain::CrossCsvDedupeSession::CsvProfile.new(
53
+ path: reference_path,
54
+ separator: reference_col_sep,
55
+ headers_present: reference_headers_present
56
+ )
57
+
58
+ source_selector = prompt_selector("Source", source.headers_present?)
59
+ return @errors.column_not_found if source_selector.nil?
60
+ reference_selector = prompt_selector("Reference", reference.headers_present?)
61
+ return @errors.column_not_found if reference_selector.nil?
62
+
63
+ @stdout.print "Trim whitespace before matching? [Y/n]: "
64
+ trim_whitespace = read_yes_no(default: true)
65
+ @stdout.print "Case-insensitive matching? [y/N]: "
66
+ case_insensitive = read_yes_no(default: false)
67
+
68
+ key_mapping = Domain::CrossCsvDedupeSession::KeyMapping.new(
69
+ source_selector: source_selector,
70
+ reference_selector: reference_selector
71
+ )
72
+ match_options = Domain::CrossCsvDedupeSession::MatchOptions.new(
73
+ trim_whitespace: trim_whitespace,
74
+ case_insensitive: case_insensitive
75
+ )
76
+ session = Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
77
+ source: source,
78
+ reference: reference,
79
+ key_mapping: key_mapping,
80
+ match_options: match_options
81
+ )
82
+
83
+ output_destination = Interface::CLI::Prompts::OutputDestinationPrompt.new(
84
+ stdin: @stdin,
85
+ stdout: @stdout,
86
+ errors: @errors
87
+ ).call
88
+ return if output_destination.nil?
89
+ session = session.with_output_destination(
90
+ if output_destination[:mode] == :file
91
+ Domain::Shared::OutputDestination.file(path: output_destination[:path])
92
+ else
93
+ Domain::Shared::OutputDestination.console
94
+ end
95
+ )
96
+
97
+ result = @use_case.call(
98
+ session: session,
99
+ on_header: ->(headers) { print_header(headers, col_sep: session.source.separator) },
100
+ on_row: ->(fields) { print_row(fields, col_sep: session.source.separator) }
101
+ )
102
+ return handle_error(result) unless result.ok?
103
+
104
+ @stdout.puts "Wrote output to #{result.data[:output_path]}" if session.output_destination.file?
105
+ stats = result.data[:stats]
106
+ @stdout.puts "Summary: source_rows=#{stats[:source_rows]} removed_rows=#{stats[:removed_rows]} kept_rows=#{stats[:kept_rows_count]}"
107
+ @stdout.puts "No rows removed; no matching keys found." if stats[:removed_rows].zero?
108
+ @stdout.puts "All source rows were removed by dedupe." if stats[:source_rows].positive? && stats[:kept_rows_count].zero?
109
+ rescue ArgumentError => e
110
+ return @errors.empty_output_path if e.message == "file output path cannot be empty"
111
+
112
+ raise e
113
+ end
114
+
115
+ private
116
+
117
+ def prompt_selector(label, headers_present)
118
+ if headers_present
119
+ @stdout.print "#{label} key column name: "
120
+ else
121
+ @stdout.print "#{label} key column index (1-based): "
122
+ end
123
+ input = @stdin.gets&.strip.to_s
124
+ Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: headers_present, input: input)
125
+ rescue ArgumentError
126
+ nil
127
+ end
128
+
129
+ def print_header(headers, col_sep:)
130
+ @stdout.puts
131
+ @stdout.puts ::CSV.generate_line(headers, row_sep: "", col_sep: col_sep).chomp
132
+ end
133
+
134
+ def print_row(fields, col_sep:)
135
+ @stdout.puts ::CSV.generate_line(fields, row_sep: "", col_sep: col_sep).chomp
136
+ end
137
+
138
+ def handle_error(result)
139
+ case result.error
140
+ when :column_not_found
141
+ @errors.column_not_found
142
+ when :could_not_parse_csv
143
+ @errors.could_not_parse_csv
144
+ when :cannot_read_file
145
+ @errors.cannot_read_file(result.data[:path])
146
+ when :cannot_write_output_file
147
+ @errors.cannot_write_output_file(result.data[:path], result.data[:error_class])
148
+ end
149
+ end
150
+
151
+ def read_yes_no(default:)
152
+ answer = @stdin.gets&.strip.to_s.downcase
153
+ return default if answer.empty?
154
+ return true if %w[y yes].include?(answer)
155
+ return false if %w[n no].include?(answer)
156
+
157
+ default
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Csvtool
4
- VERSION = "0.3.0.alpha"
4
+ VERSION = "0.4.0.alpha"
5
5
  end