csvops 0.4.0.alpha → 0.5.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -9
- data/docs/architecture.md +148 -18
- data/docs/release-v0.5.0-alpha.md +89 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +17 -14
- data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
- data/lib/csvtool/cli.rb +6 -6
- data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
- data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
- data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +48 -125
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
- data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
- data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +28 -0
- data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
- data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
- data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
- data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
- data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
- data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
- data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
- data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
- data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
- data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
- data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
- data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
- metadata +60 -1
|
@@ -1,64 +1,47 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "csv"
|
|
4
|
-
require "csvtool/interface/cli/errors/presenter"
|
|
5
|
-
require "csvtool/interface/cli/prompts/file_path_prompt"
|
|
6
|
-
require "csvtool/interface/cli/prompts/separator_prompt"
|
|
7
|
-
require "csvtool/interface/cli/prompts/column_selector_prompt"
|
|
8
|
-
require "csvtool/interface/cli/prompts/skip_blanks_prompt"
|
|
9
|
-
require "csvtool/interface/cli/prompts/confirm_prompt"
|
|
10
|
-
require "csvtool/interface/cli/prompts/output_destination_prompt"
|
|
11
4
|
require "csvtool/infrastructure/csv/header_reader"
|
|
12
5
|
require "csvtool/infrastructure/csv/value_streamer"
|
|
13
|
-
require "csvtool/services/preview_builder"
|
|
14
|
-
require "csvtool/infrastructure/output/console_writer"
|
|
15
6
|
require "csvtool/infrastructure/output/csv_file_writer"
|
|
16
|
-
require "csvtool/
|
|
17
|
-
require "csvtool/domain/column_session/csv_source"
|
|
18
|
-
require "csvtool/domain/column_session/column_selection"
|
|
19
|
-
require "csvtool/domain/column_session/extraction_options"
|
|
20
|
-
require "csvtool/domain/column_session/extraction_value"
|
|
21
|
-
require "csvtool/domain/column_session/preview"
|
|
22
|
-
require "csvtool/domain/column_session/column_session"
|
|
23
|
-
require "csvtool/domain/shared/output_destination"
|
|
7
|
+
require "csvtool/services/preview_builder"
|
|
24
8
|
|
|
25
9
|
module Csvtool
|
|
26
10
|
module Application
|
|
27
11
|
module UseCases
|
|
28
12
|
class RunExtraction
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
@header_reader = Infrastructure::CSV::HeaderReader.new
|
|
34
|
-
@value_streamer = Infrastructure::CSV::ValueStreamer.new
|
|
35
|
-
@preview_builder = Services::PreviewBuilder.new(value_streamer: @value_streamer)
|
|
13
|
+
Result = Struct.new(:ok, :error, :data, keyword_init: true) do
|
|
14
|
+
def ok?
|
|
15
|
+
ok
|
|
16
|
+
end
|
|
36
17
|
end
|
|
37
18
|
|
|
38
|
-
def
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
19
|
+
def initialize(
|
|
20
|
+
header_reader: Infrastructure::CSV::HeaderReader.new,
|
|
21
|
+
value_streamer: Infrastructure::CSV::ValueStreamer.new,
|
|
22
|
+
preview_builder: nil,
|
|
23
|
+
csv_file_writer: nil
|
|
24
|
+
)
|
|
25
|
+
@header_reader = header_reader
|
|
26
|
+
@value_streamer = value_streamer
|
|
27
|
+
@preview_builder = preview_builder || Services::PreviewBuilder.new(value_streamer: value_streamer)
|
|
28
|
+
@csv_file_writer = csv_file_writer || Infrastructure::Output::CsvFileWriter.new(value_streamer: @value_streamer)
|
|
29
|
+
end
|
|
45
30
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
return @errors.no_headers if headers.empty?
|
|
31
|
+
def read_headers(file_path:, col_sep:)
|
|
32
|
+
return failure(:file_not_found, path: file_path) unless File.file?(file_path)
|
|
49
33
|
|
|
50
|
-
|
|
51
|
-
return if
|
|
52
|
-
column_selection = Domain::ColumnSession::ColumnSelection.new(name: column_name)
|
|
34
|
+
headers = @header_reader.call(file_path: file_path, col_sep: col_sep)
|
|
35
|
+
return failure(:no_headers) if headers.empty?
|
|
53
36
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
)
|
|
37
|
+
success(headers: headers)
|
|
38
|
+
rescue CSV::MalformedCSVError
|
|
39
|
+
failure(:could_not_parse_csv)
|
|
40
|
+
rescue Errno::EACCES
|
|
41
|
+
failure(:cannot_read_file, path: file_path)
|
|
42
|
+
end
|
|
61
43
|
|
|
44
|
+
def preview(session:)
|
|
62
45
|
preview_values = @preview_builder.call(
|
|
63
46
|
file_path: session.source.path,
|
|
64
47
|
column_name: session.column_selection.name,
|
|
@@ -66,58 +49,50 @@ module Csvtool
|
|
|
66
49
|
skip_blanks: session.options.skip_blanks?,
|
|
67
50
|
limit: session.options.preview_limit
|
|
68
51
|
)
|
|
69
|
-
|
|
70
|
-
values: preview_values.map { |value| Domain::ColumnSession::ExtractionValue.new(value) }
|
|
71
|
-
)
|
|
72
|
-
session = session.with_preview(preview)
|
|
73
|
-
|
|
74
|
-
confirmed = Interface::CLI::Prompts::ConfirmPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors).call(session.preview.to_strings)
|
|
75
|
-
return unless confirmed
|
|
76
|
-
session = session.confirm!
|
|
77
|
-
|
|
78
|
-
output_destination = Interface::CLI::Prompts::OutputDestinationPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors).call
|
|
79
|
-
return if output_destination.nil?
|
|
80
|
-
domain_destination =
|
|
81
|
-
if output_destination[:mode] == :file
|
|
82
|
-
Domain::Shared::OutputDestination.file(path: output_destination[:path])
|
|
83
|
-
else
|
|
84
|
-
Domain::Shared::OutputDestination.console
|
|
85
|
-
end
|
|
86
|
-
session = session.with_output_destination(domain_destination)
|
|
87
|
-
|
|
88
|
-
write_output(
|
|
89
|
-
session.output_destination,
|
|
90
|
-
file_path: session.source.path,
|
|
91
|
-
column_name: session.column_selection.name,
|
|
92
|
-
col_sep: session.source.separator.value,
|
|
93
|
-
skip_blanks: session.options.skip_blanks?
|
|
94
|
-
)
|
|
52
|
+
success(preview_values: preview_values)
|
|
95
53
|
rescue CSV::MalformedCSVError
|
|
96
|
-
|
|
54
|
+
failure(:could_not_parse_csv)
|
|
97
55
|
rescue Errno::EACCES
|
|
98
|
-
|
|
56
|
+
failure(:cannot_read_file, path: session.source.path)
|
|
99
57
|
end
|
|
100
58
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
59
|
+
def extract(session:, on_value: nil)
|
|
60
|
+
if session.output_destination.file?
|
|
61
|
+
@csv_file_writer.call(
|
|
62
|
+
output_path: session.output_destination.path,
|
|
63
|
+
file_path: session.source.path,
|
|
64
|
+
column_name: session.column_selection.name,
|
|
65
|
+
col_sep: session.source.separator.value,
|
|
66
|
+
skip_blanks: session.options.skip_blanks?
|
|
67
|
+
)
|
|
68
|
+
success(output_path: session.output_destination.path)
|
|
106
69
|
else
|
|
107
|
-
|
|
70
|
+
@value_streamer.each(
|
|
71
|
+
file_path: session.source.path,
|
|
72
|
+
column_name: session.column_selection.name,
|
|
73
|
+
col_sep: session.source.separator.value,
|
|
74
|
+
skip_blanks: session.options.skip_blanks?
|
|
75
|
+
) { |value| on_value.call(value) if on_value }
|
|
76
|
+
success({})
|
|
108
77
|
end
|
|
78
|
+
rescue CSV::MalformedCSVError
|
|
79
|
+
failure(:could_not_parse_csv)
|
|
80
|
+
rescue Errno::EACCES, Errno::ENOENT => e
|
|
81
|
+
if session.output_destination.file?
|
|
82
|
+
failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class)
|
|
83
|
+
else
|
|
84
|
+
failure(:cannot_read_file, path: session.source.path)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
private
|
|
89
|
+
|
|
90
|
+
def success(data)
|
|
91
|
+
Result.new(ok: true, error: nil, data: data)
|
|
109
92
|
end
|
|
110
93
|
|
|
111
|
-
def
|
|
112
|
-
|
|
113
|
-
args = {
|
|
114
|
-
file_path: file_path,
|
|
115
|
-
column_name: column_name,
|
|
116
|
-
col_sep: col_sep,
|
|
117
|
-
skip_blanks: skip_blanks
|
|
118
|
-
}
|
|
119
|
-
args[:output_path] = output_destination.path if output_destination.file?
|
|
120
|
-
writer.call(**args)
|
|
94
|
+
def failure(code, data = {})
|
|
95
|
+
Result.new(ok: false, error: code, data: data)
|
|
121
96
|
end
|
|
122
97
|
end
|
|
123
98
|
end
|
|
@@ -1,74 +1,46 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "csv"
|
|
4
|
-
require "csvtool/interface/cli/errors/presenter"
|
|
5
|
-
require "csvtool/interface/cli/prompts/file_path_prompt"
|
|
6
|
-
require "csvtool/interface/cli/prompts/separator_prompt"
|
|
7
|
-
require "csvtool/interface/cli/prompts/output_destination_prompt"
|
|
8
4
|
require "csvtool/infrastructure/csv/header_reader"
|
|
9
5
|
require "csvtool/infrastructure/csv/row_streamer"
|
|
10
|
-
require "csvtool/infrastructure/output/csv_row_console_writer"
|
|
11
6
|
require "csvtool/infrastructure/output/csv_row_file_writer"
|
|
12
|
-
require "csvtool/domain/row_session/row_range"
|
|
13
|
-
require "csvtool/domain/row_session/row_source"
|
|
14
|
-
require "csvtool/domain/row_session/row_session"
|
|
15
|
-
require "csvtool/domain/shared/output_destination"
|
|
16
7
|
|
|
17
8
|
module Csvtool
|
|
18
9
|
module Application
|
|
19
10
|
module UseCases
|
|
20
11
|
class RunRowExtraction
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
@header_reader = Infrastructure::CSV::HeaderReader.new
|
|
26
|
-
@row_streamer = Infrastructure::CSV::RowStreamer.new
|
|
12
|
+
Result = Struct.new(:ok, :error, :data, keyword_init: true) do
|
|
13
|
+
def ok?
|
|
14
|
+
ok
|
|
15
|
+
end
|
|
27
16
|
end
|
|
28
17
|
|
|
29
|
-
def
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
start_row_input = @stdin.gets&.strip.to_s
|
|
39
|
-
@stdout.print "End row (1-based, inclusive): "
|
|
40
|
-
end_row_input = @stdin.gets&.strip.to_s
|
|
18
|
+
def initialize(
|
|
19
|
+
header_reader: Infrastructure::CSV::HeaderReader.new,
|
|
20
|
+
row_streamer: Infrastructure::CSV::RowStreamer.new,
|
|
21
|
+
csv_row_file_writer: nil
|
|
22
|
+
)
|
|
23
|
+
@header_reader = header_reader
|
|
24
|
+
@row_streamer = row_streamer
|
|
25
|
+
@csv_row_file_writer = csv_row_file_writer || Infrastructure::Output::CsvRowFileWriter.new(row_streamer: @row_streamer)
|
|
26
|
+
end
|
|
41
27
|
|
|
42
|
-
|
|
43
|
-
return
|
|
28
|
+
def read_headers(file_path:, col_sep:)
|
|
29
|
+
return failure(:file_not_found, path: file_path) unless File.file?(file_path)
|
|
44
30
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
end_row_input: end_row_input
|
|
48
|
-
)
|
|
49
|
-
session = Domain::RowSession::RowSession.start(source: source, row_range: row_range)
|
|
31
|
+
headers = @header_reader.call(file_path: file_path, col_sep: col_sep)
|
|
32
|
+
return failure(:no_headers) if headers.empty?
|
|
50
33
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
destination =
|
|
58
|
-
if output_destination[:mode] == :file
|
|
59
|
-
Domain::Shared::OutputDestination.file(path: output_destination[:path])
|
|
60
|
-
else
|
|
61
|
-
Domain::Shared::OutputDestination.console
|
|
62
|
-
end
|
|
63
|
-
session = session.with_output_destination(destination)
|
|
34
|
+
success(headers: headers)
|
|
35
|
+
rescue CSV::MalformedCSVError
|
|
36
|
+
failure(:could_not_parse_csv)
|
|
37
|
+
rescue Errno::EACCES
|
|
38
|
+
failure(:cannot_read_file, path: file_path)
|
|
39
|
+
end
|
|
64
40
|
|
|
65
|
-
|
|
41
|
+
def extract(session:, headers:, on_row: nil)
|
|
66
42
|
if session.output_destination.file?
|
|
67
|
-
|
|
68
|
-
stdout: @stdout,
|
|
69
|
-
errors: @errors,
|
|
70
|
-
row_streamer: @row_streamer
|
|
71
|
-
).call(
|
|
43
|
+
stats = @csv_row_file_writer.call(
|
|
72
44
|
output_path: session.output_destination.path,
|
|
73
45
|
file_path: session.source.path,
|
|
74
46
|
col_sep: session.source.separator,
|
|
@@ -76,35 +48,35 @@ module Csvtool
|
|
|
76
48
|
start_row: session.row_range.start_row,
|
|
77
49
|
end_row: session.row_range.end_row
|
|
78
50
|
)
|
|
51
|
+
success(stats.merge(output_path: session.output_destination.path))
|
|
79
52
|
else
|
|
80
|
-
|
|
53
|
+
stats = @row_streamer.each_in_range(
|
|
81
54
|
file_path: session.source.path,
|
|
82
55
|
col_sep: session.source.separator,
|
|
83
|
-
headers: headers,
|
|
84
56
|
start_row: session.row_range.start_row,
|
|
85
57
|
end_row: session.row_range.end_row
|
|
86
|
-
)
|
|
58
|
+
) { |fields| on_row.call(fields) if on_row }
|
|
59
|
+
success(stats)
|
|
87
60
|
end
|
|
88
|
-
return if stats.nil?
|
|
89
|
-
|
|
90
|
-
@errors.row_range_out_of_bounds(stats[:row_count]) unless stats[:matched]
|
|
91
|
-
rescue Domain::RowSession::InvalidStartRowError
|
|
92
|
-
@errors.invalid_start_row
|
|
93
|
-
rescue Domain::RowSession::InvalidEndRowError
|
|
94
|
-
@errors.invalid_end_row
|
|
95
|
-
rescue Domain::RowSession::InvalidRowRangeOrderError
|
|
96
|
-
@errors.invalid_row_range_order
|
|
97
|
-
rescue ArgumentError => e
|
|
98
|
-
return @errors.empty_output_path if e.message == "file output path cannot be empty"
|
|
99
|
-
|
|
100
|
-
raise e
|
|
101
61
|
rescue CSV::MalformedCSVError
|
|
102
|
-
|
|
103
|
-
rescue Errno::EACCES
|
|
104
|
-
|
|
62
|
+
failure(:could_not_parse_csv)
|
|
63
|
+
rescue Errno::EACCES, Errno::ENOENT => e
|
|
64
|
+
if session.output_destination.file?
|
|
65
|
+
failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class)
|
|
66
|
+
else
|
|
67
|
+
failure(:cannot_read_file, path: session.source.path)
|
|
68
|
+
end
|
|
105
69
|
end
|
|
106
|
-
|
|
70
|
+
|
|
107
71
|
private
|
|
72
|
+
|
|
73
|
+
def success(data)
|
|
74
|
+
Result.new(ok: true, error: nil, data: data)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def failure(code, data = {})
|
|
78
|
+
Result.new(ok: false, error: code, data: data)
|
|
79
|
+
end
|
|
108
80
|
end
|
|
109
81
|
end
|
|
110
82
|
end
|
|
@@ -1,103 +1,86 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "csv"
|
|
4
|
-
require "csvtool/interface/cli/errors/presenter"
|
|
5
|
-
require "csvtool/interface/cli/prompts/file_path_prompt"
|
|
6
|
-
require "csvtool/interface/cli/prompts/separator_prompt"
|
|
7
|
-
require "csvtool/interface/cli/prompts/headers_present_prompt"
|
|
8
|
-
require "csvtool/interface/cli/prompts/seed_prompt"
|
|
9
|
-
require "csvtool/interface/cli/prompts/output_destination_prompt"
|
|
10
4
|
require "csvtool/infrastructure/csv/header_reader"
|
|
11
5
|
require "csvtool/infrastructure/csv/row_randomizer"
|
|
12
|
-
require "csvtool/
|
|
13
|
-
require "csvtool/domain/row_randomization_session/randomization_options"
|
|
14
|
-
require "csvtool/domain/row_randomization_session/randomization_session"
|
|
15
|
-
require "csvtool/domain/shared/output_destination"
|
|
6
|
+
require "csvtool/infrastructure/output/csv_randomized_row_file_writer"
|
|
16
7
|
|
|
17
8
|
module Csvtool
|
|
18
9
|
module Application
|
|
19
10
|
module UseCases
|
|
20
11
|
class RunRowRandomization
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
@header_reader = Infrastructure::CSV::HeaderReader.new
|
|
26
|
-
@row_randomizer = Infrastructure::CSV::RowRandomizer.new
|
|
12
|
+
Result = Struct.new(:ok, :error, :data, keyword_init: true) do
|
|
13
|
+
def ok?
|
|
14
|
+
ok
|
|
15
|
+
end
|
|
27
16
|
end
|
|
28
17
|
|
|
29
|
-
def
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
path: file_path,
|
|
39
|
-
separator: col_sep,
|
|
40
|
-
headers_present: headers_present
|
|
18
|
+
def initialize(
|
|
19
|
+
header_reader: Infrastructure::CSV::HeaderReader.new,
|
|
20
|
+
row_randomizer: Infrastructure::CSV::RowRandomizer.new,
|
|
21
|
+
csv_randomized_row_file_writer: nil
|
|
22
|
+
)
|
|
23
|
+
@header_reader = header_reader
|
|
24
|
+
@row_randomizer = row_randomizer
|
|
25
|
+
@csv_randomized_row_file_writer = csv_randomized_row_file_writer || Infrastructure::Output::CsvRandomizedRowFileWriter.new(
|
|
26
|
+
row_randomizer: @row_randomizer
|
|
41
27
|
)
|
|
42
|
-
|
|
43
|
-
return @errors.no_headers if source.headers_present? && headers.empty?
|
|
28
|
+
end
|
|
44
29
|
|
|
45
|
-
|
|
46
|
-
return
|
|
47
|
-
options = Domain::RowRandomizationSession::RandomizationOptions.new(seed: seed)
|
|
48
|
-
session = Domain::RowRandomizationSession::RandomizationSession.start(source: source, options: options)
|
|
30
|
+
def read_headers(file_path:, col_sep:, headers_present:)
|
|
31
|
+
return failure(:file_not_found, path: file_path) unless File.file?(file_path)
|
|
49
32
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
return if output_destination.nil?
|
|
56
|
-
destination =
|
|
57
|
-
if output_destination[:mode] == :file
|
|
58
|
-
Domain::Shared::OutputDestination.file(path: output_destination[:path])
|
|
59
|
-
else
|
|
60
|
-
Domain::Shared::OutputDestination.console
|
|
61
|
-
end
|
|
62
|
-
session = session.with_output_destination(destination)
|
|
33
|
+
headers = nil
|
|
34
|
+
if headers_present
|
|
35
|
+
headers = @header_reader.call(file_path: file_path, col_sep: col_sep)
|
|
36
|
+
return failure(:no_headers) if headers.empty?
|
|
37
|
+
end
|
|
63
38
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
39
|
+
success(headers: headers)
|
|
40
|
+
rescue CSV::MalformedCSVError
|
|
41
|
+
failure(:could_not_parse_csv)
|
|
42
|
+
rescue Errno::EACCES
|
|
43
|
+
failure(:cannot_read_file, path: file_path)
|
|
44
|
+
end
|
|
70
45
|
|
|
46
|
+
def randomize(session:, headers:, on_row: nil)
|
|
71
47
|
if session.output_destination.file?
|
|
72
|
-
|
|
48
|
+
@csv_randomized_row_file_writer.call(
|
|
49
|
+
path: session.output_destination.path,
|
|
50
|
+
headers: headers,
|
|
51
|
+
file_path: session.source.path,
|
|
52
|
+
col_sep: session.source.separator,
|
|
53
|
+
headers_present: session.source.headers_present?,
|
|
54
|
+
seed: session.options.seed
|
|
55
|
+
)
|
|
56
|
+
success(output_path: session.output_destination.path)
|
|
73
57
|
else
|
|
74
|
-
|
|
58
|
+
@row_randomizer.each(
|
|
59
|
+
file_path: session.source.path,
|
|
60
|
+
col_sep: session.source.separator,
|
|
61
|
+
headers: session.source.headers_present?,
|
|
62
|
+
seed: session.options.seed
|
|
63
|
+
) { |fields| on_row.call(fields) if on_row }
|
|
64
|
+
success({})
|
|
75
65
|
end
|
|
76
66
|
rescue CSV::MalformedCSVError
|
|
77
|
-
|
|
78
|
-
rescue
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
67
|
+
failure(:could_not_parse_csv)
|
|
68
|
+
rescue Errno::EACCES, Errno::ENOENT => e
|
|
69
|
+
if session.output_destination.file?
|
|
70
|
+
failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class)
|
|
71
|
+
else
|
|
72
|
+
failure(:cannot_read_file, path: session.source.path)
|
|
73
|
+
end
|
|
84
74
|
end
|
|
85
75
|
|
|
86
76
|
private
|
|
87
77
|
|
|
88
|
-
def
|
|
89
|
-
|
|
90
|
-
@stdout.puts ::CSV.generate_line(headers, row_sep: "", col_sep: col_sep).chomp if headers
|
|
91
|
-
rows.each { |fields| @stdout.puts ::CSV.generate_line(fields, row_sep: "", col_sep: col_sep).chomp }
|
|
78
|
+
def success(data)
|
|
79
|
+
Result.new(ok: true, error: nil, data: data)
|
|
92
80
|
end
|
|
93
81
|
|
|
94
|
-
def
|
|
95
|
-
|
|
96
|
-
rows.each { |fields| csv << fields }
|
|
97
|
-
end
|
|
98
|
-
@stdout.puts "Wrote output to #{path}"
|
|
99
|
-
rescue Errno::EACCES, Errno::ENOENT => e
|
|
100
|
-
@errors.cannot_write_output_file(path, e.class)
|
|
82
|
+
def failure(code, data = {})
|
|
83
|
+
Result.new(ok: false, error: code, data: data)
|
|
101
84
|
end
|
|
102
85
|
end
|
|
103
86
|
end
|
data/lib/csvtool/cli.rb
CHANGED
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
require "csv"
|
|
4
4
|
require "csvtool/interface/cli/menu_loop"
|
|
5
|
-
require "csvtool/
|
|
6
|
-
require "csvtool/
|
|
7
|
-
require "csvtool/
|
|
5
|
+
require "csvtool/interface/cli/workflows/run_extraction_workflow"
|
|
6
|
+
require "csvtool/interface/cli/workflows/run_row_extraction_workflow"
|
|
7
|
+
require "csvtool/interface/cli/workflows/run_row_randomization_workflow"
|
|
8
8
|
require "csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow"
|
|
9
9
|
require "csvtool/interface/cli/errors/presenter"
|
|
10
10
|
require "csvtool/infrastructure/csv/header_reader"
|
|
@@ -47,9 +47,9 @@ module Csvtool
|
|
|
47
47
|
private
|
|
48
48
|
|
|
49
49
|
def run_menu_loop
|
|
50
|
-
extract_column_action = -> {
|
|
51
|
-
extract_rows_action = -> {
|
|
52
|
-
randomize_rows_action = -> {
|
|
50
|
+
extract_column_action = -> { Interface::CLI::Workflows::RunExtractionWorkflow.new(stdin: @stdin, stdout: @stdout).call }
|
|
51
|
+
extract_rows_action = -> { Interface::CLI::Workflows::RunRowExtractionWorkflow.new(stdin: @stdin, stdout: @stdout).call }
|
|
52
|
+
randomize_rows_action = -> { Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: @stdin, stdout: @stdout).call }
|
|
53
53
|
dedupe_action = -> { Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow.new(stdin: @stdin, stdout: @stdout).call }
|
|
54
54
|
Interface::CLI::MenuLoop.new(
|
|
55
55
|
stdin: @stdin,
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module Output
|
|
8
|
+
class CsvCrossCsvDedupeFileWriter
|
|
9
|
+
def initialize(deduper:)
|
|
10
|
+
@deduper = deduper
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(path:, headers:, col_sep:, dedupe_options:)
|
|
14
|
+
stats = nil
|
|
15
|
+
::CSV.open(path, "w", write_headers: !headers.nil?, headers: headers, col_sep: col_sep) do |csv|
|
|
16
|
+
stats = @deduper.each_retained(**dedupe_options) { |fields| csv << fields }
|
|
17
|
+
end
|
|
18
|
+
stats
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -6,9 +6,7 @@ module Csvtool
|
|
|
6
6
|
module Infrastructure
|
|
7
7
|
module Output
|
|
8
8
|
class CsvFileWriter
|
|
9
|
-
def initialize(
|
|
10
|
-
@stdout = stdout
|
|
11
|
-
@errors = errors
|
|
9
|
+
def initialize(value_streamer:)
|
|
12
10
|
@value_streamer = value_streamer
|
|
13
11
|
end
|
|
14
12
|
|
|
@@ -19,10 +17,6 @@ module Csvtool
|
|
|
19
17
|
csv << [value]
|
|
20
18
|
end
|
|
21
19
|
end
|
|
22
|
-
|
|
23
|
-
@stdout.puts "Wrote output to #{output_path}"
|
|
24
|
-
rescue Errno::EACCES, Errno::ENOENT => e
|
|
25
|
-
@errors.cannot_write_output_file(output_path, e.class)
|
|
26
20
|
end
|
|
27
21
|
end
|
|
28
22
|
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module Output
|
|
8
|
+
class CsvRandomizedRowFileWriter
|
|
9
|
+
def initialize(row_randomizer:)
|
|
10
|
+
@row_randomizer = row_randomizer
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(path:, headers:, file_path:, col_sep:, headers_present:, seed:)
|
|
14
|
+
::CSV.open(path, "w", write_headers: !headers.nil?, headers: headers, col_sep: col_sep) do |csv|
|
|
15
|
+
@row_randomizer.each(file_path: file_path, col_sep: col_sep, headers: headers_present, seed: seed) do |fields|
|
|
16
|
+
csv << fields
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -6,9 +6,7 @@ module Csvtool
|
|
|
6
6
|
module Infrastructure
|
|
7
7
|
module Output
|
|
8
8
|
class CsvRowFileWriter
|
|
9
|
-
def initialize(
|
|
10
|
-
@stdout = stdout
|
|
11
|
-
@errors = errors
|
|
9
|
+
def initialize(row_streamer:)
|
|
12
10
|
@row_streamer = row_streamer
|
|
13
11
|
end
|
|
14
12
|
|
|
@@ -30,12 +28,7 @@ module Csvtool
|
|
|
30
28
|
csv << fields
|
|
31
29
|
end
|
|
32
30
|
|
|
33
|
-
|
|
34
|
-
@stdout.puts "Wrote output to #{output_path}" if wrote_rows
|
|
35
|
-
stats
|
|
36
|
-
rescue Errno::EACCES, Errno::ENOENT => e
|
|
37
|
-
@errors.cannot_write_output_file(output_path, e.class)
|
|
38
|
-
nil
|
|
31
|
+
stats.merge(wrote_rows: wrote_rows)
|
|
39
32
|
ensure
|
|
40
33
|
csv&.close unless csv&.closed?
|
|
41
34
|
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/cross_csv_dedupe_session/column_selector"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Interface
|
|
7
|
+
module CLI
|
|
8
|
+
module Prompts
|
|
9
|
+
class DedupeKeySelectorPrompt
|
|
10
|
+
def initialize(stdin:, stdout:)
|
|
11
|
+
@stdin = stdin
|
|
12
|
+
@stdout = stdout
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def call(label:, headers_present:)
|
|
16
|
+
if headers_present
|
|
17
|
+
@stdout.print "#{label} key column name: "
|
|
18
|
+
else
|
|
19
|
+
@stdout.print "#{label} key column index (1-based): "
|
|
20
|
+
end
|
|
21
|
+
input = @stdin.gets&.strip.to_s
|
|
22
|
+
Domain::CrossCsvDedupeSession::ColumnSelector.from_input(headers_present: headers_present, input: input)
|
|
23
|
+
rescue ArgumentError
|
|
24
|
+
nil
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|