csvops 0.6.0.alpha → 0.8.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +103 -24
- data/docs/architecture.md +121 -4
- data/docs/release-v0.7.0-alpha.md +87 -0
- data/docs/release-v0.8.0-alpha.md +88 -0
- data/lib/csvtool/application/use_cases/run_csv_split.rb +97 -0
- data/lib/csvtool/application/use_cases/run_csv_stats.rb +64 -0
- data/lib/csvtool/cli.rb +9 -1
- data/lib/csvtool/domain/csv_split_session/split_options.rb +27 -0
- data/lib/csvtool/domain/csv_split_session/split_session.rb +20 -0
- data/lib/csvtool/domain/csv_split_session/split_source.rb +17 -0
- data/lib/csvtool/domain/csv_stats_session/stats_options.rb +11 -0
- data/lib/csvtool/domain/csv_stats_session/stats_session.rb +25 -0
- data/lib/csvtool/domain/csv_stats_session/stats_source.rb +17 -0
- data/lib/csvtool/infrastructure/csv/csv_splitter.rb +64 -0
- data/lib/csvtool/infrastructure/csv/csv_stats_scanner.rb +67 -0
- data/lib/csvtool/infrastructure/output/csv_split_manifest_writer.rb +20 -0
- data/lib/csvtool/infrastructure/output/csv_stats_file_writer.rb +26 -0
- data/lib/csvtool/interface/cli/errors/presenter.rb +8 -0
- data/lib/csvtool/interface/cli/menu_loop.rb +8 -2
- data/lib/csvtool/interface/cli/prompts/chunk_size_prompt.rb +21 -0
- data/lib/csvtool/interface/cli/prompts/split_manifest_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/split_output_prompt.rb +38 -0
- data/lib/csvtool/interface/cli/workflows/builders/csv_split_session_builder.rb +44 -0
- data/lib/csvtool/interface/cli/workflows/builders/csv_stats_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/csv_split_presenter.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/presenters/csv_stats_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_csv_split_workflow.rb +89 -0
- data/lib/csvtool/interface/cli/workflows/run_csv_stats_workflow.rb +77 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/build_session_step.rb +30 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step.rb +30 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step.rb +31 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/execute_step.rb +36 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step.rb +27 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step.rb +31 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/execute_step.rb +27 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/run_csv_split_test.rb +124 -0
- data/test/csvtool/application/use_cases/run_csv_stats_test.rb +165 -0
- data/test/csvtool/cli_test.rb +139 -29
- data/test/csvtool/infrastructure/csv/csv_splitter_test.rb +68 -0
- data/test/csvtool/infrastructure/csv/csv_stats_scanner_test.rb +68 -0
- data/test/csvtool/infrastructure/output/csv_split_manifest_writer_test.rb +25 -0
- data/test/csvtool/infrastructure/output/csv_stats_file_writer_test.rb +38 -0
- data/test/csvtool/interface/cli/menu_loop_test.rb +104 -130
- data/test/csvtool/interface/cli/prompts/chunk_size_prompt_test.rb +17 -0
- data/test/csvtool/interface/cli/prompts/split_manifest_prompt_test.rb +42 -0
- data/test/csvtool/interface/cli/prompts/split_output_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/csv_split_session_builder_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/builders/csv_stats_session_builder_test.rb +19 -0
- data/test/csvtool/interface/cli/workflows/presenters/csv_split_presenter_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/csv_stats_presenter_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/run_csv_split_workflow_test.rb +200 -0
- data/test/csvtool/interface/cli/workflows/run_csv_stats_workflow_test.rb +146 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/build_session_step_test.rb +40 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step_test.rb +64 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step_test.rb +32 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/execute_step_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step_test.rb +49 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step_test.rb +61 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/execute_step_test.rb +65 -0
- data/test/fixtures/split_people_25.csv +26 -0
- metadata +58 -1
data/lib/csvtool/cli.rb
CHANGED
|
@@ -7,6 +7,8 @@ require "csvtool/interface/cli/workflows/run_row_extraction_workflow"
|
|
|
7
7
|
require "csvtool/interface/cli/workflows/run_row_randomization_workflow"
|
|
8
8
|
require "csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow"
|
|
9
9
|
require "csvtool/interface/cli/workflows/run_csv_parity_workflow"
|
|
10
|
+
require "csvtool/interface/cli/workflows/run_csv_split_workflow"
|
|
11
|
+
require "csvtool/interface/cli/workflows/run_csv_stats_workflow"
|
|
10
12
|
require "csvtool/interface/cli/errors/presenter"
|
|
11
13
|
require "csvtool/infrastructure/csv/header_reader"
|
|
12
14
|
require "csvtool/infrastructure/csv/value_streamer"
|
|
@@ -20,6 +22,8 @@ module Csvtool
|
|
|
20
22
|
"Randomize rows",
|
|
21
23
|
"Dedupe using another CSV",
|
|
22
24
|
"Validate parity",
|
|
25
|
+
"Split CSV into chunks",
|
|
26
|
+
"CSV stats summary",
|
|
23
27
|
"Exit"
|
|
24
28
|
].freeze
|
|
25
29
|
|
|
@@ -54,6 +58,8 @@ module Csvtool
|
|
|
54
58
|
randomize_rows_action = -> { Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: @stdin, stdout: @stdout).call }
|
|
55
59
|
dedupe_action = -> { Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow.new(stdin: @stdin, stdout: @stdout).call }
|
|
56
60
|
parity_action = -> { Interface::CLI::Workflows::RunCsvParityWorkflow.new(stdin: @stdin, stdout: @stdout).call }
|
|
61
|
+
split_action = -> { Interface::CLI::Workflows::RunCsvSplitWorkflow.new(stdin: @stdin, stdout: @stdout).call }
|
|
62
|
+
stats_action = -> { Interface::CLI::Workflows::RunCsvStatsWorkflow.new(stdin: @stdin, stdout: @stdout).call }
|
|
57
63
|
Interface::CLI::MenuLoop.new(
|
|
58
64
|
stdin: @stdin,
|
|
59
65
|
stdout: @stdout,
|
|
@@ -62,7 +68,9 @@ module Csvtool
|
|
|
62
68
|
extract_rows_action: extract_rows_action,
|
|
63
69
|
randomize_rows_action: randomize_rows_action,
|
|
64
70
|
dedupe_action: dedupe_action,
|
|
65
|
-
parity_action: parity_action
|
|
71
|
+
parity_action: parity_action,
|
|
72
|
+
split_action: split_action,
|
|
73
|
+
stats_action: stats_action
|
|
66
74
|
).run
|
|
67
75
|
end
|
|
68
76
|
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvSplitSession
|
|
6
|
+
class SplitOptions
|
|
7
|
+
attr_reader :chunk_size, :output_directory, :file_prefix, :overwrite_existing, :write_manifest, :manifest_path
|
|
8
|
+
|
|
9
|
+
def initialize(
|
|
10
|
+
chunk_size:,
|
|
11
|
+
output_directory: nil,
|
|
12
|
+
file_prefix: nil,
|
|
13
|
+
overwrite_existing: false,
|
|
14
|
+
write_manifest: false,
|
|
15
|
+
manifest_path: nil
|
|
16
|
+
)
|
|
17
|
+
@chunk_size = Integer(chunk_size)
|
|
18
|
+
@output_directory = output_directory
|
|
19
|
+
@file_prefix = file_prefix
|
|
20
|
+
@overwrite_existing = overwrite_existing
|
|
21
|
+
@write_manifest = write_manifest
|
|
22
|
+
@manifest_path = manifest_path
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvSplitSession
|
|
6
|
+
class SplitSession
|
|
7
|
+
attr_reader :source, :options
|
|
8
|
+
|
|
9
|
+
def self.start(source:, options:)
|
|
10
|
+
new(source: source, options: options)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def initialize(source:, options:)
|
|
14
|
+
@source = source
|
|
15
|
+
@options = options
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvSplitSession
|
|
6
|
+
class SplitSource
|
|
7
|
+
attr_reader :path, :separator, :headers_present
|
|
8
|
+
|
|
9
|
+
def initialize(path:, separator:, headers_present:)
|
|
10
|
+
@path = path
|
|
11
|
+
@separator = separator
|
|
12
|
+
@headers_present = headers_present
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvStatsSession
|
|
6
|
+
class StatsSession
|
|
7
|
+
attr_reader :source, :options, :output_destination
|
|
8
|
+
|
|
9
|
+
def self.start(source:, options:)
|
|
10
|
+
new(source: source, options: options)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def initialize(source:, options:, output_destination: nil)
|
|
14
|
+
@source = source
|
|
15
|
+
@options = options
|
|
16
|
+
@output_destination = output_destination
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def with_output_destination(output_destination)
|
|
20
|
+
self.class.new(source: source, options: options, output_destination: output_destination)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvStatsSession
|
|
6
|
+
class StatsSource
|
|
7
|
+
attr_reader :path, :separator, :headers_present
|
|
8
|
+
|
|
9
|
+
def initialize(path:, separator:, headers_present:)
|
|
10
|
+
@path = path
|
|
11
|
+
@separator = separator
|
|
12
|
+
@headers_present = headers_present
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module CSV
|
|
8
|
+
class CsvSplitter
|
|
9
|
+
class OutputFileExistsError < StandardError
|
|
10
|
+
attr_reader :path
|
|
11
|
+
|
|
12
|
+
def initialize(path)
|
|
13
|
+
super("output file exists: #{path}")
|
|
14
|
+
@path = path
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def call(file_path:, col_sep:, headers_present:, chunk_size:, output_directory:, file_prefix:, overwrite_existing:)
|
|
19
|
+
ext = File.extname(file_path)
|
|
20
|
+
ext = ".csv" if ext.empty?
|
|
21
|
+
sequence = 0
|
|
22
|
+
data_rows = 0
|
|
23
|
+
chunk_paths = []
|
|
24
|
+
chunk_row_counts = []
|
|
25
|
+
rows_in_chunk = 0
|
|
26
|
+
current_csv = nil
|
|
27
|
+
|
|
28
|
+
write_mode_headers = nil
|
|
29
|
+
write_headers = headers_present
|
|
30
|
+
|
|
31
|
+
::CSV.foreach(file_path, headers: headers_present, col_sep: col_sep) do |row|
|
|
32
|
+
if current_csv.nil? || rows_in_chunk >= chunk_size
|
|
33
|
+
current_csv&.close
|
|
34
|
+
sequence += 1
|
|
35
|
+
rows_in_chunk = 0
|
|
36
|
+
path = File.join(output_directory, format("%<prefix>s_part_%<num>03d%<ext>s", prefix: file_prefix, num: sequence, ext: ext))
|
|
37
|
+
raise OutputFileExistsError.new(path) if File.exist?(path) && !overwrite_existing
|
|
38
|
+
|
|
39
|
+
chunk_paths << path
|
|
40
|
+
chunk_row_counts << 0
|
|
41
|
+
write_mode_headers = headers_present ? row.headers : nil
|
|
42
|
+
current_csv = ::CSV.open(path, "w", write_headers: write_headers, headers: write_mode_headers, col_sep: col_sep)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
fields = headers_present ? row.fields : row
|
|
46
|
+
current_csv << fields
|
|
47
|
+
rows_in_chunk += 1
|
|
48
|
+
chunk_row_counts[-1] += 1
|
|
49
|
+
data_rows += 1
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
{
|
|
53
|
+
chunk_paths: chunk_paths,
|
|
54
|
+
chunk_count: chunk_paths.length,
|
|
55
|
+
data_rows: data_rows,
|
|
56
|
+
chunk_row_counts: chunk_row_counts
|
|
57
|
+
}
|
|
58
|
+
ensure
|
|
59
|
+
current_csv&.close unless current_csv&.closed?
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module CSV
|
|
8
|
+
class CsvStatsScanner
|
|
9
|
+
def initialize(csv: ::CSV)
|
|
10
|
+
@csv = csv
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(file_path:, col_sep:, headers_present:)
|
|
14
|
+
data_row_count = 0
|
|
15
|
+
headers = nil
|
|
16
|
+
column_count = 0
|
|
17
|
+
column_stats = []
|
|
18
|
+
|
|
19
|
+
# Streaming scan: memory grows with per-column metrics, not row count.
|
|
20
|
+
@csv.foreach(file_path, headers: headers_present, col_sep: col_sep) do |row|
|
|
21
|
+
if headers_present
|
|
22
|
+
headers ||= row.headers
|
|
23
|
+
column_count = headers.length
|
|
24
|
+
if column_stats.empty?
|
|
25
|
+
column_stats = headers.map { |name| { name: name, blank_count: 0, non_blank_count: 0 } }
|
|
26
|
+
end
|
|
27
|
+
fields = row.fields
|
|
28
|
+
fields.fill(nil, fields.length...column_count)
|
|
29
|
+
fields.each_with_index { |value, index| apply_value(column_stats[index], value) }
|
|
30
|
+
data_row_count += 1
|
|
31
|
+
else
|
|
32
|
+
fields = row.is_a?(::CSV::Row) ? row.fields : row
|
|
33
|
+
column_count = [column_count, fields.length].max
|
|
34
|
+
while column_stats.length < column_count
|
|
35
|
+
column_stats << {
|
|
36
|
+
name: "column_#{column_stats.length + 1}",
|
|
37
|
+
blank_count: 0,
|
|
38
|
+
non_blank_count: 0
|
|
39
|
+
}
|
|
40
|
+
end
|
|
41
|
+
fields.fill(nil, fields.length...column_count)
|
|
42
|
+
fields.each_with_index { |value, index| apply_value(column_stats[index], value) }
|
|
43
|
+
data_row_count += 1
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
{
|
|
48
|
+
row_count: data_row_count,
|
|
49
|
+
column_count: column_count,
|
|
50
|
+
headers: headers,
|
|
51
|
+
column_stats: column_stats
|
|
52
|
+
}
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
def apply_value(stats, value)
|
|
58
|
+
if value.nil? || value.strip.empty?
|
|
59
|
+
stats[:blank_count] += 1
|
|
60
|
+
else
|
|
61
|
+
stats[:non_blank_count] += 1
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module Output
|
|
8
|
+
class CsvSplitManifestWriter
|
|
9
|
+
def call(path:, chunk_paths:, chunk_row_counts:)
|
|
10
|
+
::CSV.open(path, "w") do |csv|
|
|
11
|
+
csv << %w[chunk_index chunk_path row_count]
|
|
12
|
+
chunk_paths.each_with_index do |chunk_path, index|
|
|
13
|
+
csv << [index + 1, chunk_path, chunk_row_counts[index]]
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module Output
|
|
8
|
+
class CsvStatsFileWriter
|
|
9
|
+
def call(path:, data:)
|
|
10
|
+
::CSV.open(path, "w") do |csv|
|
|
11
|
+
csv << %w[metric value]
|
|
12
|
+
csv << ["row_count", data[:row_count]]
|
|
13
|
+
csv << ["column_count", data[:column_count]]
|
|
14
|
+
unless data[:headers].nil? || data[:headers].empty?
|
|
15
|
+
csv << ["headers", data[:headers].join("|")]
|
|
16
|
+
end
|
|
17
|
+
data.fetch(:column_stats, []).each do |stats|
|
|
18
|
+
csv << ["column.#{stats[:name]}.non_blank", stats[:non_blank_count]]
|
|
19
|
+
csv << ["column.#{stats[:name]}.blank", stats[:blank_count]]
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -33,6 +33,10 @@ module Csvtool
|
|
|
33
33
|
@stdout.puts "Cannot write output file: #{path} (#{error_class})"
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
+
def output_file_exists(path)
|
|
37
|
+
@stdout.puts "Output file already exists: #{path}"
|
|
38
|
+
end
|
|
39
|
+
|
|
36
40
|
def empty_output_path
|
|
37
41
|
@stdout.puts "Output file path cannot be empty."
|
|
38
42
|
end
|
|
@@ -53,6 +57,10 @@ module Csvtool
|
|
|
53
57
|
@stdout.puts "Seed must be an integer."
|
|
54
58
|
end
|
|
55
59
|
|
|
60
|
+
def invalid_chunk_size
|
|
61
|
+
@stdout.puts "Chunk size must be a positive integer."
|
|
62
|
+
end
|
|
63
|
+
|
|
56
64
|
def canceled
|
|
57
65
|
@stdout.puts "Canceled."
|
|
58
66
|
end
|
|
@@ -4,7 +4,7 @@ module Csvtool
|
|
|
4
4
|
module Interface
|
|
5
5
|
module CLI
|
|
6
6
|
class MenuLoop
|
|
7
|
-
def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:, parity_action:)
|
|
7
|
+
def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:, parity_action:, split_action:, stats_action:)
|
|
8
8
|
@stdin = stdin
|
|
9
9
|
@stdout = stdout
|
|
10
10
|
@menu_options = menu_options
|
|
@@ -13,6 +13,8 @@ module Csvtool
|
|
|
13
13
|
@randomize_rows_action = randomize_rows_action
|
|
14
14
|
@dedupe_action = dedupe_action
|
|
15
15
|
@parity_action = parity_action
|
|
16
|
+
@split_action = split_action
|
|
17
|
+
@stats_action = stats_action
|
|
16
18
|
end
|
|
17
19
|
|
|
18
20
|
def run
|
|
@@ -34,9 +36,13 @@ module Csvtool
|
|
|
34
36
|
when "5"
|
|
35
37
|
@parity_action.call
|
|
36
38
|
when "6"
|
|
39
|
+
@split_action.call
|
|
40
|
+
when "7"
|
|
41
|
+
@stats_action.call
|
|
42
|
+
when "8"
|
|
37
43
|
return 0
|
|
38
44
|
else
|
|
39
|
-
@stdout.puts "Please choose 1, 2, 3, 4, 5, or
|
|
45
|
+
@stdout.puts "Please choose 1, 2, 3, 4, 5, 6, 7, or 8."
|
|
40
46
|
end
|
|
41
47
|
end
|
|
42
48
|
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Prompts
|
|
7
|
+
class ChunkSizePrompt
|
|
8
|
+
def initialize(stdin:, stdout:)
|
|
9
|
+
@stdin = stdin
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call
|
|
14
|
+
@stdout.print "Rows per chunk: "
|
|
15
|
+
@stdin.gets&.strip.to_s
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Prompts
|
|
7
|
+
class SplitManifestPrompt
|
|
8
|
+
def initialize(stdin:, stdout:, yes_no_prompt:)
|
|
9
|
+
@stdin = stdin
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
@yes_no_prompt = yes_no_prompt
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def call(default_path:)
|
|
15
|
+
write_manifest = @yes_no_prompt.call(
|
|
16
|
+
label: "Write manifest file? [y/N]: ",
|
|
17
|
+
default: false
|
|
18
|
+
)
|
|
19
|
+
return { write_manifest: false, manifest_path: nil } unless write_manifest
|
|
20
|
+
|
|
21
|
+
@stdout.print "Manifest file path [#{default_path}]: "
|
|
22
|
+
path = @stdin.gets&.strip.to_s
|
|
23
|
+
path = default_path if path.empty?
|
|
24
|
+
{ write_manifest: true, manifest_path: path }
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Prompts
|
|
7
|
+
class SplitOutputPrompt
|
|
8
|
+
def initialize(stdin:, stdout:, yes_no_prompt:)
|
|
9
|
+
@stdin = stdin
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
@yes_no_prompt = yes_no_prompt
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def call(default_directory:, default_prefix:)
|
|
15
|
+
@stdout.print "Output directory [#{default_directory}]: "
|
|
16
|
+
output_directory = @stdin.gets&.strip.to_s
|
|
17
|
+
output_directory = default_directory if output_directory.empty?
|
|
18
|
+
|
|
19
|
+
@stdout.print "Output file prefix [#{default_prefix}]: "
|
|
20
|
+
file_prefix = @stdin.gets&.strip.to_s
|
|
21
|
+
file_prefix = default_prefix if file_prefix.empty?
|
|
22
|
+
|
|
23
|
+
overwrite_existing = @yes_no_prompt.call(
|
|
24
|
+
label: "Overwrite existing chunk files? [y/N]: ",
|
|
25
|
+
default: false
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
{
|
|
29
|
+
output_directory: output_directory,
|
|
30
|
+
file_prefix: file_prefix,
|
|
31
|
+
overwrite_existing: overwrite_existing
|
|
32
|
+
}
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/csv_split_session/split_source"
|
|
4
|
+
require "csvtool/domain/csv_split_session/split_options"
|
|
5
|
+
require "csvtool/domain/csv_split_session/split_session"
|
|
6
|
+
|
|
7
|
+
module Csvtool
|
|
8
|
+
module Interface
|
|
9
|
+
module CLI
|
|
10
|
+
module Workflows
|
|
11
|
+
module Builders
|
|
12
|
+
class CsvSplitSessionBuilder
|
|
13
|
+
def call(
|
|
14
|
+
file_path:,
|
|
15
|
+
col_sep:,
|
|
16
|
+
headers_present:,
|
|
17
|
+
chunk_size:,
|
|
18
|
+
output_directory: nil,
|
|
19
|
+
file_prefix: nil,
|
|
20
|
+
overwrite_existing: false,
|
|
21
|
+
write_manifest: false,
|
|
22
|
+
manifest_path: nil
|
|
23
|
+
)
|
|
24
|
+
source = Domain::CsvSplitSession::SplitSource.new(
|
|
25
|
+
path: file_path,
|
|
26
|
+
separator: col_sep,
|
|
27
|
+
headers_present: headers_present
|
|
28
|
+
)
|
|
29
|
+
options = Domain::CsvSplitSession::SplitOptions.new(
|
|
30
|
+
chunk_size: chunk_size,
|
|
31
|
+
output_directory: output_directory,
|
|
32
|
+
file_prefix: file_prefix,
|
|
33
|
+
overwrite_existing: overwrite_existing,
|
|
34
|
+
write_manifest: write_manifest,
|
|
35
|
+
manifest_path: manifest_path
|
|
36
|
+
)
|
|
37
|
+
Domain::CsvSplitSession::SplitSession.start(source: source, options: options)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/csv_stats_session/stats_source"
|
|
4
|
+
require "csvtool/domain/csv_stats_session/stats_options"
|
|
5
|
+
require "csvtool/domain/csv_stats_session/stats_session"
|
|
6
|
+
|
|
7
|
+
module Csvtool
|
|
8
|
+
module Interface
|
|
9
|
+
module CLI
|
|
10
|
+
module Workflows
|
|
11
|
+
module Builders
|
|
12
|
+
class CsvStatsSessionBuilder
|
|
13
|
+
def call(file_path:, col_sep:, headers_present:, destination:)
|
|
14
|
+
source = Domain::CsvStatsSession::StatsSource.new(
|
|
15
|
+
path: file_path,
|
|
16
|
+
separator: col_sep,
|
|
17
|
+
headers_present: headers_present
|
|
18
|
+
)
|
|
19
|
+
options = Domain::CsvStatsSession::StatsOptions.new
|
|
20
|
+
session = Domain::CsvStatsSession::StatsSession.start(source: source, options: options)
|
|
21
|
+
session.with_output_destination(destination)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Presenters
|
|
8
|
+
class CsvSplitPresenter
|
|
9
|
+
def initialize(stdout:)
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def print_summary(data)
|
|
14
|
+
@stdout.puts "Split complete."
|
|
15
|
+
@stdout.puts "Chunk size: #{data[:chunk_size]}"
|
|
16
|
+
@stdout.puts "Data rows: #{data[:data_rows]}"
|
|
17
|
+
@stdout.puts "Chunks written: #{data[:chunk_count]}"
|
|
18
|
+
@stdout.puts "Manifest: #{data[:manifest_path]}" if data[:manifest_path]
|
|
19
|
+
data[:chunk_paths].each { |path| @stdout.puts path }
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Presenters
|
|
8
|
+
class CsvStatsPresenter
|
|
9
|
+
def initialize(stdout:)
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def print_summary(data)
|
|
14
|
+
@stdout.puts "CSV Stats Summary"
|
|
15
|
+
@stdout.puts "Rows: #{data[:row_count]}"
|
|
16
|
+
@stdout.puts "Columns: #{data[:column_count]}"
|
|
17
|
+
@stdout.puts "Headers: #{data[:headers].join(', ')}" unless data[:headers].nil? || data[:headers].empty?
|
|
18
|
+
return if data[:column_stats].nil? || data[:column_stats].empty?
|
|
19
|
+
|
|
20
|
+
@stdout.puts "Column completeness:"
|
|
21
|
+
data[:column_stats].each do |stats|
|
|
22
|
+
@stdout.puts " #{stats[:name]}: non_blank=#{stats[:non_blank_count]} blank=#{stats[:blank_count]}"
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def print_file_written(path)
|
|
27
|
+
@stdout.puts "Wrote output to #{path}"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|