csvops 0.4.0.alpha → 0.6.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -12
- data/docs/architecture.md +208 -21
- data/docs/release-v0.5.0-alpha.md +89 -0
- data/docs/release-v0.6.0-alpha.md +84 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +17 -14
- data/lib/csvtool/application/use_cases/run_csv_parity.rb +70 -0
- data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
- data/lib/csvtool/cli.rb +11 -7
- data/lib/csvtool/domain/csv_parity_session/parity_options.rb +22 -0
- data/lib/csvtool/domain/csv_parity_session/parity_session.rb +20 -0
- data/lib/csvtool/domain/csv_parity_session/source_pair.rb +19 -0
- data/lib/csvtool/infrastructure/csv/csv_parity_comparator.rb +71 -0
- data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
- data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
- data/lib/csvtool/interface/cli/errors/presenter.rb +4 -0
- data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
- data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/builders/csv_parity_session_builder.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
- data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +38 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +48 -125
- data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +66 -0
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/parity/build_session_step.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/parity/execute_step.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
- data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
- data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +28 -0
- data/test/csvtool/application/use_cases/run_csv_parity_test.rb +160 -0
- data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
- data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
- data/test/csvtool/cli_test.rb +175 -21
- data/test/csvtool/cli_unit_test.rb +4 -4
- data/test/csvtool/domain/csv_parity_session/parity_options_test.rb +17 -0
- data/test/csvtool/domain/csv_parity_session/parity_session_test.rb +18 -0
- data/test/csvtool/domain/csv_parity_session/source_pair_test.rb +11 -0
- data/test/csvtool/infrastructure/csv/csv_parity_comparator_test.rb +78 -0
- data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
- data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
- data/test/csvtool/interface/cli/errors/presenter_test.rb +2 -0
- data/test/csvtool/interface/cli/menu_loop_test.rb +59 -16
- data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
- data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
- data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/csv_parity_session_builder_test.rb +20 -0
- data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
- data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +43 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +94 -0
- data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
- data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
- data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
- data/test/csvtool/interface/cli/workflows/steps/parity/build_session_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/steps/parity/execute_step_test.rb +40 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
- data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
- data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
- data/test/fixtures/parity_duplicates_left.csv +4 -0
- data/test/fixtures/parity_duplicates_right.csv +3 -0
- data/test/fixtures/parity_people_header_mismatch.csv +4 -0
- data/test/fixtures/parity_people_many_reordered.csv +13 -0
- data/test/fixtures/parity_people_mismatch.csv +4 -0
- data/test/fixtures/parity_people_reordered.csv +4 -0
- data/test/fixtures/parity_people_reordered.tsv +4 -0
- metadata +90 -1
data/lib/csvtool/cli.rb
CHANGED
|
@@ -2,10 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
require "csv"
|
|
4
4
|
require "csvtool/interface/cli/menu_loop"
|
|
5
|
-
require "csvtool/
|
|
6
|
-
require "csvtool/
|
|
7
|
-
require "csvtool/
|
|
5
|
+
require "csvtool/interface/cli/workflows/run_extraction_workflow"
|
|
6
|
+
require "csvtool/interface/cli/workflows/run_row_extraction_workflow"
|
|
7
|
+
require "csvtool/interface/cli/workflows/run_row_randomization_workflow"
|
|
8
8
|
require "csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow"
|
|
9
|
+
require "csvtool/interface/cli/workflows/run_csv_parity_workflow"
|
|
9
10
|
require "csvtool/interface/cli/errors/presenter"
|
|
10
11
|
require "csvtool/infrastructure/csv/header_reader"
|
|
11
12
|
require "csvtool/infrastructure/csv/value_streamer"
|
|
@@ -18,6 +19,7 @@ module Csvtool
|
|
|
18
19
|
"Extract rows (range)",
|
|
19
20
|
"Randomize rows",
|
|
20
21
|
"Dedupe using another CSV",
|
|
22
|
+
"Validate parity",
|
|
21
23
|
"Exit"
|
|
22
24
|
].freeze
|
|
23
25
|
|
|
@@ -47,10 +49,11 @@ module Csvtool
|
|
|
47
49
|
private
|
|
48
50
|
|
|
49
51
|
def run_menu_loop
|
|
50
|
-
extract_column_action = -> {
|
|
51
|
-
extract_rows_action = -> {
|
|
52
|
-
randomize_rows_action = -> {
|
|
52
|
+
extract_column_action = -> { Interface::CLI::Workflows::RunExtractionWorkflow.new(stdin: @stdin, stdout: @stdout).call }
|
|
53
|
+
extract_rows_action = -> { Interface::CLI::Workflows::RunRowExtractionWorkflow.new(stdin: @stdin, stdout: @stdout).call }
|
|
54
|
+
randomize_rows_action = -> { Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: @stdin, stdout: @stdout).call }
|
|
53
55
|
dedupe_action = -> { Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow.new(stdin: @stdin, stdout: @stdout).call }
|
|
56
|
+
parity_action = -> { Interface::CLI::Workflows::RunCsvParityWorkflow.new(stdin: @stdin, stdout: @stdout).call }
|
|
54
57
|
Interface::CLI::MenuLoop.new(
|
|
55
58
|
stdin: @stdin,
|
|
56
59
|
stdout: @stdout,
|
|
@@ -58,7 +61,8 @@ module Csvtool
|
|
|
58
61
|
extract_column_action: extract_column_action,
|
|
59
62
|
extract_rows_action: extract_rows_action,
|
|
60
63
|
randomize_rows_action: randomize_rows_action,
|
|
61
|
-
dedupe_action: dedupe_action
|
|
64
|
+
dedupe_action: dedupe_action,
|
|
65
|
+
parity_action: parity_action
|
|
62
66
|
).run
|
|
63
67
|
end
|
|
64
68
|
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvParitySession
|
|
6
|
+
class ParityOptions
|
|
7
|
+
attr_reader :separator
|
|
8
|
+
|
|
9
|
+
def initialize(separator:, headers_present:)
|
|
10
|
+
raise ArgumentError, "separator cannot be empty" if separator.to_s.empty?
|
|
11
|
+
|
|
12
|
+
@separator = separator
|
|
13
|
+
@headers_present = headers_present
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def headers_present?
|
|
17
|
+
@headers_present
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvParitySession
|
|
6
|
+
class ParitySession
|
|
7
|
+
attr_reader :source_pair, :options
|
|
8
|
+
|
|
9
|
+
def self.start(source_pair:, options:)
|
|
10
|
+
new(source_pair: source_pair, options: options)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def initialize(source_pair:, options:)
|
|
14
|
+
@source_pair = source_pair
|
|
15
|
+
@options = options
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvParitySession
|
|
6
|
+
class SourcePair
|
|
7
|
+
attr_reader :left_path, :right_path
|
|
8
|
+
|
|
9
|
+
def initialize(left_path:, right_path:)
|
|
10
|
+
raise ArgumentError, "left_path cannot be empty" if left_path.to_s.empty?
|
|
11
|
+
raise ArgumentError, "right_path cannot be empty" if right_path.to_s.empty?
|
|
12
|
+
|
|
13
|
+
@left_path = left_path
|
|
14
|
+
@right_path = right_path
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module CSV
|
|
8
|
+
class CsvParityComparator
|
|
9
|
+
def call(left_path:, right_path:, col_sep:, headers_present:, sample_limit: 5)
|
|
10
|
+
deltas = Hash.new(0)
|
|
11
|
+
left_rows = stream_rows(path: left_path, col_sep: col_sep, headers_present: headers_present) do |key|
|
|
12
|
+
deltas[key] += 1
|
|
13
|
+
end
|
|
14
|
+
right_rows = stream_rows(path: right_path, col_sep: col_sep, headers_present: headers_present) do |key|
|
|
15
|
+
deltas[key] -= 1
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
left_only_count, right_only_count, left_only_examples, right_only_examples =
|
|
19
|
+
mismatch_totals_and_samples(deltas: deltas, sample_limit: sample_limit)
|
|
20
|
+
|
|
21
|
+
{
|
|
22
|
+
match: left_only_count.zero? && right_only_count.zero?,
|
|
23
|
+
left_rows: left_rows,
|
|
24
|
+
right_rows: right_rows,
|
|
25
|
+
left_only_count: left_only_count,
|
|
26
|
+
right_only_count: right_only_count,
|
|
27
|
+
left_only_examples: left_only_examples,
|
|
28
|
+
right_only_examples: right_only_examples
|
|
29
|
+
}
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def stream_rows(path:, col_sep:, headers_present:)
|
|
35
|
+
rows = 0
|
|
36
|
+
|
|
37
|
+
::CSV.foreach(path, headers: headers_present, col_sep: col_sep) do |row|
|
|
38
|
+
fields = headers_present ? row.fields : row
|
|
39
|
+
yield serialize(fields: fields, col_sep: col_sep)
|
|
40
|
+
rows += 1
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
rows
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def mismatch_totals_and_samples(deltas:, sample_limit:)
|
|
47
|
+
left_only_count = 0
|
|
48
|
+
right_only_count = 0
|
|
49
|
+
left_only_examples = []
|
|
50
|
+
right_only_examples = []
|
|
51
|
+
|
|
52
|
+
deltas.each do |key, delta|
|
|
53
|
+
if delta.positive?
|
|
54
|
+
left_only_count += delta
|
|
55
|
+
left_only_examples << { row: key, count_delta: delta } if left_only_examples.length < sample_limit
|
|
56
|
+
elsif delta.negative?
|
|
57
|
+
right_only_count += -delta
|
|
58
|
+
right_only_examples << { row: key, count_delta: -delta } if right_only_examples.length < sample_limit
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
[left_only_count, right_only_count, left_only_examples, right_only_examples]
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def serialize(fields:, col_sep:)
|
|
66
|
+
::CSV.generate_line(fields, row_sep: "", col_sep: col_sep).chomp
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module Output
|
|
8
|
+
class CsvCrossCsvDedupeFileWriter
|
|
9
|
+
def initialize(deduper:)
|
|
10
|
+
@deduper = deduper
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(path:, headers:, col_sep:, dedupe_options:)
|
|
14
|
+
stats = nil
|
|
15
|
+
::CSV.open(path, "w", write_headers: !headers.nil?, headers: headers, col_sep: col_sep) do |csv|
|
|
16
|
+
stats = @deduper.each_retained(**dedupe_options) { |fields| csv << fields }
|
|
17
|
+
end
|
|
18
|
+
stats
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -6,9 +6,7 @@ module Csvtool
|
|
|
6
6
|
module Infrastructure
|
|
7
7
|
module Output
|
|
8
8
|
class CsvFileWriter
|
|
9
|
-
def initialize(
|
|
10
|
-
@stdout = stdout
|
|
11
|
-
@errors = errors
|
|
9
|
+
def initialize(value_streamer:)
|
|
12
10
|
@value_streamer = value_streamer
|
|
13
11
|
end
|
|
14
12
|
|
|
@@ -19,10 +17,6 @@ module Csvtool
|
|
|
19
17
|
csv << [value]
|
|
20
18
|
end
|
|
21
19
|
end
|
|
22
|
-
|
|
23
|
-
@stdout.puts "Wrote output to #{output_path}"
|
|
24
|
-
rescue Errno::EACCES, Errno::ENOENT => e
|
|
25
|
-
@errors.cannot_write_output_file(output_path, e.class)
|
|
26
20
|
end
|
|
27
21
|
end
|
|
28
22
|
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module Output
|
|
8
|
+
class CsvRandomizedRowFileWriter
|
|
9
|
+
def initialize(row_randomizer:)
|
|
10
|
+
@row_randomizer = row_randomizer
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(path:, headers:, file_path:, col_sep:, headers_present:, seed:)
|
|
14
|
+
::CSV.open(path, "w", write_headers: !headers.nil?, headers: headers, col_sep: col_sep) do |csv|
|
|
15
|
+
@row_randomizer.each(file_path: file_path, col_sep: col_sep, headers: headers_present, seed: seed) do |fields|
|
|
16
|
+
csv << fields
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -6,9 +6,7 @@ module Csvtool
|
|
|
6
6
|
module Infrastructure
|
|
7
7
|
module Output
|
|
8
8
|
class CsvRowFileWriter
|
|
9
|
-
def initialize(
|
|
10
|
-
@stdout = stdout
|
|
11
|
-
@errors = errors
|
|
9
|
+
def initialize(row_streamer:)
|
|
12
10
|
@row_streamer = row_streamer
|
|
13
11
|
end
|
|
14
12
|
|
|
@@ -30,12 +28,7 @@ module Csvtool
|
|
|
30
28
|
csv << fields
|
|
31
29
|
end
|
|
32
30
|
|
|
33
|
-
|
|
34
|
-
@stdout.puts "Wrote output to #{output_path}" if wrote_rows
|
|
35
|
-
stats
|
|
36
|
-
rescue Errno::EACCES, Errno::ENOENT => e
|
|
37
|
-
@errors.cannot_write_output_file(output_path, e.class)
|
|
38
|
-
nil
|
|
31
|
+
stats.merge(wrote_rows: wrote_rows)
|
|
39
32
|
ensure
|
|
40
33
|
csv&.close unless csv&.closed?
|
|
41
34
|
end
|
|
@@ -4,7 +4,7 @@ module Csvtool
|
|
|
4
4
|
module Interface
|
|
5
5
|
module CLI
|
|
6
6
|
class MenuLoop
|
|
7
|
-
def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:)
|
|
7
|
+
def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:, parity_action:)
|
|
8
8
|
@stdin = stdin
|
|
9
9
|
@stdout = stdout
|
|
10
10
|
@menu_options = menu_options
|
|
@@ -12,6 +12,7 @@ module Csvtool
|
|
|
12
12
|
@extract_rows_action = extract_rows_action
|
|
13
13
|
@randomize_rows_action = randomize_rows_action
|
|
14
14
|
@dedupe_action = dedupe_action
|
|
15
|
+
@parity_action = parity_action
|
|
15
16
|
end
|
|
16
17
|
|
|
17
18
|
def run
|
|
@@ -31,9 +32,11 @@ module Csvtool
|
|
|
31
32
|
when "4"
|
|
32
33
|
@dedupe_action.call
|
|
33
34
|
when "5"
|
|
35
|
+
@parity_action.call
|
|
36
|
+
when "6"
|
|
34
37
|
return 0
|
|
35
38
|
else
|
|
36
|
-
@stdout.puts "Please choose 1, 2, 3, 4, or
|
|
39
|
+
@stdout.puts "Please choose 1, 2, 3, 4, 5, or 6."
|
|
37
40
|
end
|
|
38
41
|
end
|
|
39
42
|
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Interface
|
|
7
|
+
module CLI
|
|
8
|
+
module Prompts
|
|
9
|
+
class DedupeKeySelectorPrompt
|
|
10
|
+
def initialize(stdin:, stdout:)
|
|
11
|
+
@stdin = stdin
|
|
12
|
+
@stdout = stdout
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def call(label:, headers_present:)
|
|
16
|
+
if headers_present
|
|
17
|
+
@stdout.print "#{label} key column name: "
|
|
18
|
+
else
|
|
19
|
+
@stdout.print "#{label} key column index (1-based): "
|
|
20
|
+
end
|
|
21
|
+
input = @stdin.gets&.strip.to_s
|
|
22
|
+
Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: headers_present, input: input)
|
|
23
|
+
rescue ArgumentError
|
|
24
|
+
nil
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -5,13 +5,15 @@ module Csvtool
|
|
|
5
5
|
module CLI
|
|
6
6
|
module Prompts
|
|
7
7
|
class FilePathPrompt
|
|
8
|
+
DEFAULT_LABEL = "CSV file path: "
|
|
9
|
+
|
|
8
10
|
def initialize(stdin:, stdout:)
|
|
9
11
|
@stdin = stdin
|
|
10
12
|
@stdout = stdout
|
|
11
13
|
end
|
|
12
14
|
|
|
13
|
-
def call
|
|
14
|
-
@stdout.print
|
|
15
|
+
def call(label: DEFAULT_LABEL)
|
|
16
|
+
@stdout.print label
|
|
15
17
|
@stdin.gets&.strip.to_s
|
|
16
18
|
end
|
|
17
19
|
end
|
|
@@ -5,13 +5,15 @@ module Csvtool
|
|
|
5
5
|
module CLI
|
|
6
6
|
module Prompts
|
|
7
7
|
class HeadersPresentPrompt
|
|
8
|
+
DEFAULT_LABEL = "Headers present? [Y/n]: "
|
|
9
|
+
|
|
8
10
|
def initialize(stdin:, stdout:)
|
|
9
11
|
@stdin = stdin
|
|
10
12
|
@stdout = stdout
|
|
11
13
|
end
|
|
12
14
|
|
|
13
|
-
def call
|
|
14
|
-
@stdout.print
|
|
15
|
+
def call(label: DEFAULT_LABEL)
|
|
16
|
+
@stdout.print label
|
|
15
17
|
answer = @stdin.gets&.strip.to_s.downcase
|
|
16
18
|
!%w[n no].include?(answer)
|
|
17
19
|
end
|
|
@@ -5,14 +5,16 @@ module Csvtool
|
|
|
5
5
|
module CLI
|
|
6
6
|
module Prompts
|
|
7
7
|
class SeparatorPrompt
|
|
8
|
+
DEFAULT_LABEL = "Choose separator:"
|
|
9
|
+
|
|
8
10
|
def initialize(stdin:, stdout:, errors:)
|
|
9
11
|
@stdin = stdin
|
|
10
12
|
@stdout = stdout
|
|
11
13
|
@errors = errors
|
|
12
14
|
end
|
|
13
15
|
|
|
14
|
-
def call
|
|
15
|
-
@stdout.puts
|
|
16
|
+
def call(label: DEFAULT_LABEL)
|
|
17
|
+
@stdout.puts label
|
|
16
18
|
@stdout.puts "1. comma (,)"
|
|
17
19
|
@stdout.puts "2. tab (\\t)"
|
|
18
20
|
@stdout.puts "3. semicolon (;)"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Prompts
|
|
7
|
+
class YesNoPrompt
|
|
8
|
+
def initialize(stdin:, stdout:)
|
|
9
|
+
@stdin = stdin
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(label:, default:)
|
|
14
|
+
@stdout.print label
|
|
15
|
+
answer = @stdin.gets&.strip.to_s.downcase
|
|
16
|
+
return default if answer.empty?
|
|
17
|
+
return true if %w[y yes].include?(answer)
|
|
18
|
+
return false if %w[n no].include?(answer)
|
|
19
|
+
|
|
20
|
+
default
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/column_session/separator"
|
|
4
|
+
require "csvtool/domain/column_session/csv_source"
|
|
5
|
+
require "csvtool/domain/column_session/column_selection"
|
|
6
|
+
require "csvtool/domain/column_session/extraction_options"
|
|
7
|
+
require "csvtool/domain/column_session/column_session"
|
|
8
|
+
|
|
9
|
+
module Csvtool
|
|
10
|
+
module Interface
|
|
11
|
+
module CLI
|
|
12
|
+
module Workflows
|
|
13
|
+
module Builders
|
|
14
|
+
class ColumnSessionBuilder
|
|
15
|
+
def call(file_path:, col_sep:, column_name:, skip_blanks:)
|
|
16
|
+
separator = Domain::ColumnSession::Separator.new(col_sep)
|
|
17
|
+
source = Domain::ColumnSession::CsvSource.new(path: file_path, separator: separator)
|
|
18
|
+
column_selection = Domain::ColumnSession::ColumnSelection.new(name: column_name)
|
|
19
|
+
options = Domain::ColumnSession::ExtractionOptions.new(skip_blanks: skip_blanks, preview_limit: 10)
|
|
20
|
+
|
|
21
|
+
Domain::ColumnSession::ColumnSession.start(
|
|
22
|
+
source: source,
|
|
23
|
+
column_selection: column_selection,
|
|
24
|
+
options: options
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/cross_csv_dedupe_session/key_mapping"
|
|
4
|
+
require "csvtool/domain/cross_csv_dedupe_session/match_options"
|
|
5
|
+
require "csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session"
|
|
6
|
+
|
|
7
|
+
module Csvtool
|
|
8
|
+
module Interface
|
|
9
|
+
module CLI
|
|
10
|
+
module Workflows
|
|
11
|
+
module Builders
|
|
12
|
+
class CrossCsvDedupeSessionBuilder
|
|
13
|
+
def call(source:, reference:, source_selector:, reference_selector:, trim_whitespace:, case_insensitive:, destination:)
|
|
14
|
+
key_mapping = Domain::CrossCsvDedupeSession::KeyMapping.new(
|
|
15
|
+
source_selector: source_selector,
|
|
16
|
+
reference_selector: reference_selector
|
|
17
|
+
)
|
|
18
|
+
match_options = Domain::CrossCsvDedupeSession::MatchOptions.new(
|
|
19
|
+
trim_whitespace: trim_whitespace,
|
|
20
|
+
case_insensitive: case_insensitive
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
Domain::CrossCsvDedupeSession::CrossCsvDedupeSession.start(
|
|
24
|
+
source: source,
|
|
25
|
+
reference: reference,
|
|
26
|
+
key_mapping: key_mapping,
|
|
27
|
+
match_options: match_options
|
|
28
|
+
).with_output_destination(destination)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/csv_parity_session/source_pair"
|
|
4
|
+
require "csvtool/domain/csv_parity_session/parity_options"
|
|
5
|
+
require "csvtool/domain/csv_parity_session/parity_session"
|
|
6
|
+
|
|
7
|
+
module Csvtool
|
|
8
|
+
module Interface
|
|
9
|
+
module CLI
|
|
10
|
+
module Workflows
|
|
11
|
+
module Builders
|
|
12
|
+
class CsvParitySessionBuilder
|
|
13
|
+
def call(left_path:, right_path:, col_sep:, headers_present:)
|
|
14
|
+
source_pair = Domain::CsvParitySession::SourcePair.new(
|
|
15
|
+
left_path: left_path,
|
|
16
|
+
right_path: right_path
|
|
17
|
+
)
|
|
18
|
+
options = Domain::CsvParitySession::ParityOptions.new(
|
|
19
|
+
separator: col_sep,
|
|
20
|
+
headers_present: headers_present
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
Domain::CsvParitySession::ParitySession.start(
|
|
24
|
+
source_pair: source_pair,
|
|
25
|
+
options: options
|
|
26
|
+
)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/row_session/row_source"
|
|
4
|
+
require "csvtool/domain/row_session/row_session"
|
|
5
|
+
|
|
6
|
+
module Csvtool
|
|
7
|
+
module Interface
|
|
8
|
+
module CLI
|
|
9
|
+
module Workflows
|
|
10
|
+
module Builders
|
|
11
|
+
class RowExtractionSessionBuilder
|
|
12
|
+
def call(file_path:, col_sep:, row_range:, destination:)
|
|
13
|
+
source = Domain::RowSession::RowSource.new(path: file_path, separator: col_sep)
|
|
14
|
+
session = Domain::RowSession::RowSession.start(source: source, row_range: row_range)
|
|
15
|
+
session.with_output_destination(destination)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/row_randomization_session/randomization_source"
|
|
4
|
+
require "csvtool/domain/row_randomization_session/randomization_options"
|
|
5
|
+
require "csvtool/domain/row_randomization_session/randomization_session"
|
|
6
|
+
|
|
7
|
+
module Csvtool
|
|
8
|
+
module Interface
|
|
9
|
+
module CLI
|
|
10
|
+
module Workflows
|
|
11
|
+
module Builders
|
|
12
|
+
class RowRandomizationSessionBuilder
|
|
13
|
+
def call(file_path:, col_sep:, headers_present:, seed:, destination:)
|
|
14
|
+
source = Domain::RowRandomizationSession::RandomizationSource.new(
|
|
15
|
+
path: file_path,
|
|
16
|
+
separator: col_sep,
|
|
17
|
+
headers_present: headers_present
|
|
18
|
+
)
|
|
19
|
+
options = Domain::RowRandomizationSession::RandomizationOptions.new(seed: seed)
|
|
20
|
+
session = Domain::RowRandomizationSession::RandomizationSession.start(source: source, options: options)
|
|
21
|
+
session.with_output_destination(destination)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Presenters
|
|
8
|
+
class ColumnExtractionPresenter
|
|
9
|
+
def initialize(stdout:)
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def print_value(value)
|
|
14
|
+
@stdout.puts value
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def print_file_written(path)
|
|
18
|
+
@stdout.puts "Wrote output to #{path}"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Interface
|
|
7
|
+
module CLI
|
|
8
|
+
module Workflows
|
|
9
|
+
module Presenters
|
|
10
|
+
class CrossCsvDedupePresenter
|
|
11
|
+
def initialize(stdout:, col_sep:)
|
|
12
|
+
@stdout = stdout
|
|
13
|
+
@col_sep = col_sep
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def print_header(headers)
|
|
17
|
+
@stdout.puts
|
|
18
|
+
@stdout.puts ::CSV.generate_line(headers, row_sep: "", col_sep: @col_sep).chomp
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def print_row(fields)
|
|
22
|
+
@stdout.puts ::CSV.generate_line(fields, row_sep: "", col_sep: @col_sep).chomp
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def print_file_written(path)
|
|
26
|
+
@stdout.puts "Wrote output to #{path}"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def print_summary(stats)
|
|
30
|
+
@stdout.puts "Summary: source_rows=#{stats[:source_rows]} removed_rows=#{stats[:removed_rows]} kept_rows=#{stats[:kept_rows_count]}"
|
|
31
|
+
@stdout.puts "No rows removed; no matching keys found." if stats[:removed_rows].zero?
|
|
32
|
+
@stdout.puts "All source rows were removed by dedupe." if stats[:source_rows].positive? && stats[:kept_rows_count].zero?
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|