csvops 0.6.0.alpha → 0.8.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +103 -24
  3. data/docs/architecture.md +121 -4
  4. data/docs/release-v0.7.0-alpha.md +87 -0
  5. data/docs/release-v0.8.0-alpha.md +88 -0
  6. data/lib/csvtool/application/use_cases/run_csv_split.rb +97 -0
  7. data/lib/csvtool/application/use_cases/run_csv_stats.rb +64 -0
  8. data/lib/csvtool/cli.rb +9 -1
  9. data/lib/csvtool/domain/csv_split_session/split_options.rb +27 -0
  10. data/lib/csvtool/domain/csv_split_session/split_session.rb +20 -0
  11. data/lib/csvtool/domain/csv_split_session/split_source.rb +17 -0
  12. data/lib/csvtool/domain/csv_stats_session/stats_options.rb +11 -0
  13. data/lib/csvtool/domain/csv_stats_session/stats_session.rb +25 -0
  14. data/lib/csvtool/domain/csv_stats_session/stats_source.rb +17 -0
  15. data/lib/csvtool/infrastructure/csv/csv_splitter.rb +64 -0
  16. data/lib/csvtool/infrastructure/csv/csv_stats_scanner.rb +67 -0
  17. data/lib/csvtool/infrastructure/output/csv_split_manifest_writer.rb +20 -0
  18. data/lib/csvtool/infrastructure/output/csv_stats_file_writer.rb +26 -0
  19. data/lib/csvtool/interface/cli/errors/presenter.rb +8 -0
  20. data/lib/csvtool/interface/cli/menu_loop.rb +8 -2
  21. data/lib/csvtool/interface/cli/prompts/chunk_size_prompt.rb +21 -0
  22. data/lib/csvtool/interface/cli/prompts/split_manifest_prompt.rb +30 -0
  23. data/lib/csvtool/interface/cli/prompts/split_output_prompt.rb +38 -0
  24. data/lib/csvtool/interface/cli/workflows/builders/csv_split_session_builder.rb +44 -0
  25. data/lib/csvtool/interface/cli/workflows/builders/csv_stats_session_builder.rb +28 -0
  26. data/lib/csvtool/interface/cli/workflows/presenters/csv_split_presenter.rb +26 -0
  27. data/lib/csvtool/interface/cli/workflows/presenters/csv_stats_presenter.rb +34 -0
  28. data/lib/csvtool/interface/cli/workflows/run_csv_split_workflow.rb +89 -0
  29. data/lib/csvtool/interface/cli/workflows/run_csv_stats_workflow.rb +77 -0
  30. data/lib/csvtool/interface/cli/workflows/steps/csv_split/build_session_step.rb +30 -0
  31. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step.rb +43 -0
  32. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step.rb +30 -0
  33. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step.rb +31 -0
  34. data/lib/csvtool/interface/cli/workflows/steps/csv_split/execute_step.rb +36 -0
  35. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step.rb +25 -0
  36. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step.rb +27 -0
  37. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step.rb +31 -0
  38. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/execute_step.rb +27 -0
  39. data/lib/csvtool/version.rb +1 -1
  40. data/test/csvtool/application/use_cases/run_csv_split_test.rb +124 -0
  41. data/test/csvtool/application/use_cases/run_csv_stats_test.rb +165 -0
  42. data/test/csvtool/cli_test.rb +139 -29
  43. data/test/csvtool/infrastructure/csv/csv_splitter_test.rb +68 -0
  44. data/test/csvtool/infrastructure/csv/csv_stats_scanner_test.rb +68 -0
  45. data/test/csvtool/infrastructure/output/csv_split_manifest_writer_test.rb +25 -0
  46. data/test/csvtool/infrastructure/output/csv_stats_file_writer_test.rb +38 -0
  47. data/test/csvtool/interface/cli/menu_loop_test.rb +104 -130
  48. data/test/csvtool/interface/cli/prompts/chunk_size_prompt_test.rb +17 -0
  49. data/test/csvtool/interface/cli/prompts/split_manifest_prompt_test.rb +42 -0
  50. data/test/csvtool/interface/cli/prompts/split_output_prompt_test.rb +22 -0
  51. data/test/csvtool/interface/cli/workflows/builders/csv_split_session_builder_test.rb +30 -0
  52. data/test/csvtool/interface/cli/workflows/builders/csv_stats_session_builder_test.rb +19 -0
  53. data/test/csvtool/interface/cli/workflows/presenters/csv_split_presenter_test.rb +26 -0
  54. data/test/csvtool/interface/cli/workflows/presenters/csv_stats_presenter_test.rb +37 -0
  55. data/test/csvtool/interface/cli/workflows/run_csv_split_workflow_test.rb +200 -0
  56. data/test/csvtool/interface/cli/workflows/run_csv_stats_workflow_test.rb +146 -0
  57. data/test/csvtool/interface/cli/workflows/steps/csv_split/build_session_step_test.rb +40 -0
  58. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step_test.rb +64 -0
  59. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step_test.rb +30 -0
  60. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step_test.rb +32 -0
  61. data/test/csvtool/interface/cli/workflows/steps/csv_split/execute_step_test.rb +83 -0
  62. data/test/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step_test.rb +36 -0
  63. data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step_test.rb +49 -0
  64. data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step_test.rb +61 -0
  65. data/test/csvtool/interface/cli/workflows/steps/csv_stats/execute_step_test.rb +65 -0
  66. data/test/fixtures/split_people_25.csv +26 -0
  67. metadata +58 -1
data/lib/csvtool/cli.rb CHANGED
@@ -7,6 +7,8 @@ require "csvtool/interface/cli/workflows/run_row_extraction_workflow"
7
7
  require "csvtool/interface/cli/workflows/run_row_randomization_workflow"
8
8
  require "csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow"
9
9
  require "csvtool/interface/cli/workflows/run_csv_parity_workflow"
10
+ require "csvtool/interface/cli/workflows/run_csv_split_workflow"
11
+ require "csvtool/interface/cli/workflows/run_csv_stats_workflow"
10
12
  require "csvtool/interface/cli/errors/presenter"
11
13
  require "csvtool/infrastructure/csv/header_reader"
12
14
  require "csvtool/infrastructure/csv/value_streamer"
@@ -20,6 +22,8 @@ module Csvtool
20
22
  "Randomize rows",
21
23
  "Dedupe using another CSV",
22
24
  "Validate parity",
25
+ "Split CSV into chunks",
26
+ "CSV stats summary",
23
27
  "Exit"
24
28
  ].freeze
25
29
 
@@ -54,6 +58,8 @@ module Csvtool
54
58
  randomize_rows_action = -> { Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: @stdin, stdout: @stdout).call }
55
59
  dedupe_action = -> { Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow.new(stdin: @stdin, stdout: @stdout).call }
56
60
  parity_action = -> { Interface::CLI::Workflows::RunCsvParityWorkflow.new(stdin: @stdin, stdout: @stdout).call }
61
+ split_action = -> { Interface::CLI::Workflows::RunCsvSplitWorkflow.new(stdin: @stdin, stdout: @stdout).call }
62
+ stats_action = -> { Interface::CLI::Workflows::RunCsvStatsWorkflow.new(stdin: @stdin, stdout: @stdout).call }
57
63
  Interface::CLI::MenuLoop.new(
58
64
  stdin: @stdin,
59
65
  stdout: @stdout,
@@ -62,7 +68,9 @@ module Csvtool
62
68
  extract_rows_action: extract_rows_action,
63
69
  randomize_rows_action: randomize_rows_action,
64
70
  dedupe_action: dedupe_action,
65
- parity_action: parity_action
71
+ parity_action: parity_action,
72
+ split_action: split_action,
73
+ stats_action: stats_action
66
74
  ).run
67
75
  end
68
76
 
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvSplitSession
6
+ class SplitOptions
7
+ attr_reader :chunk_size, :output_directory, :file_prefix, :overwrite_existing, :write_manifest, :manifest_path
8
+
9
+ def initialize(
10
+ chunk_size:,
11
+ output_directory: nil,
12
+ file_prefix: nil,
13
+ overwrite_existing: false,
14
+ write_manifest: false,
15
+ manifest_path: nil
16
+ )
17
+ @chunk_size = Integer(chunk_size)
18
+ @output_directory = output_directory
19
+ @file_prefix = file_prefix
20
+ @overwrite_existing = overwrite_existing
21
+ @write_manifest = write_manifest
22
+ @manifest_path = manifest_path
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvSplitSession
6
+ class SplitSession
7
+ attr_reader :source, :options
8
+
9
+ def self.start(source:, options:)
10
+ new(source: source, options: options)
11
+ end
12
+
13
+ def initialize(source:, options:)
14
+ @source = source
15
+ @options = options
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvSplitSession
6
+ class SplitSource
7
+ attr_reader :path, :separator, :headers_present
8
+
9
+ def initialize(path:, separator:, headers_present:)
10
+ @path = path
11
+ @separator = separator
12
+ @headers_present = headers_present
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvStatsSession
6
+ class StatsOptions
7
+ def initialize; end
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvStatsSession
6
+ class StatsSession
7
+ attr_reader :source, :options, :output_destination
8
+
9
+ def self.start(source:, options:)
10
+ new(source: source, options: options)
11
+ end
12
+
13
+ def initialize(source:, options:, output_destination: nil)
14
+ @source = source
15
+ @options = options
16
+ @output_destination = output_destination
17
+ end
18
+
19
+ def with_output_destination(output_destination)
20
+ self.class.new(source: source, options: options, output_destination: output_destination)
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvStatsSession
6
+ class StatsSource
7
+ attr_reader :path, :separator, :headers_present
8
+
9
+ def initialize(path:, separator:, headers_present:)
10
+ @path = path
11
+ @separator = separator
12
+ @headers_present = headers_present
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module CSV
8
+ class CsvSplitter
9
+ class OutputFileExistsError < StandardError
10
+ attr_reader :path
11
+
12
+ def initialize(path)
13
+ super("output file exists: #{path}")
14
+ @path = path
15
+ end
16
+ end
17
+
18
+ def call(file_path:, col_sep:, headers_present:, chunk_size:, output_directory:, file_prefix:, overwrite_existing:)
19
+ ext = File.extname(file_path)
20
+ ext = ".csv" if ext.empty?
21
+ sequence = 0
22
+ data_rows = 0
23
+ chunk_paths = []
24
+ chunk_row_counts = []
25
+ rows_in_chunk = 0
26
+ current_csv = nil
27
+
28
+ write_mode_headers = nil
29
+ write_headers = headers_present
30
+
31
+ ::CSV.foreach(file_path, headers: headers_present, col_sep: col_sep) do |row|
32
+ if current_csv.nil? || rows_in_chunk >= chunk_size
33
+ current_csv&.close
34
+ sequence += 1
35
+ rows_in_chunk = 0
36
+ path = File.join(output_directory, format("%<prefix>s_part_%<num>03d%<ext>s", prefix: file_prefix, num: sequence, ext: ext))
37
+ raise OutputFileExistsError.new(path) if File.exist?(path) && !overwrite_existing
38
+
39
+ chunk_paths << path
40
+ chunk_row_counts << 0
41
+ write_mode_headers = headers_present ? row.headers : nil
42
+ current_csv = ::CSV.open(path, "w", write_headers: write_headers, headers: write_mode_headers, col_sep: col_sep)
43
+ end
44
+
45
+ fields = headers_present ? row.fields : row
46
+ current_csv << fields
47
+ rows_in_chunk += 1
48
+ chunk_row_counts[-1] += 1
49
+ data_rows += 1
50
+ end
51
+
52
+ {
53
+ chunk_paths: chunk_paths,
54
+ chunk_count: chunk_paths.length,
55
+ data_rows: data_rows,
56
+ chunk_row_counts: chunk_row_counts
57
+ }
58
+ ensure
59
+ current_csv&.close unless current_csv&.closed?
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module CSV
8
+ class CsvStatsScanner
9
+ def initialize(csv: ::CSV)
10
+ @csv = csv
11
+ end
12
+
13
+ def call(file_path:, col_sep:, headers_present:)
14
+ data_row_count = 0
15
+ headers = nil
16
+ column_count = 0
17
+ column_stats = []
18
+
19
+ # Streaming scan: memory grows with per-column metrics, not row count.
20
+ @csv.foreach(file_path, headers: headers_present, col_sep: col_sep) do |row|
21
+ if headers_present
22
+ headers ||= row.headers
23
+ column_count = headers.length
24
+ if column_stats.empty?
25
+ column_stats = headers.map { |name| { name: name, blank_count: 0, non_blank_count: 0 } }
26
+ end
27
+ fields = row.fields
28
+ fields.fill(nil, fields.length...column_count)
29
+ fields.each_with_index { |value, index| apply_value(column_stats[index], value) }
30
+ data_row_count += 1
31
+ else
32
+ fields = row.is_a?(::CSV::Row) ? row.fields : row
33
+ column_count = [column_count, fields.length].max
34
+ while column_stats.length < column_count
35
+ column_stats << {
36
+ name: "column_#{column_stats.length + 1}",
37
+ blank_count: 0,
38
+ non_blank_count: 0
39
+ }
40
+ end
41
+ fields.fill(nil, fields.length...column_count)
42
+ fields.each_with_index { |value, index| apply_value(column_stats[index], value) }
43
+ data_row_count += 1
44
+ end
45
+ end
46
+
47
+ {
48
+ row_count: data_row_count,
49
+ column_count: column_count,
50
+ headers: headers,
51
+ column_stats: column_stats
52
+ }
53
+ end
54
+
55
+ private
56
+
57
+ def apply_value(stats, value)
58
+ if value.nil? || value.strip.empty?
59
+ stats[:blank_count] += 1
60
+ else
61
+ stats[:non_blank_count] += 1
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module Output
8
+ class CsvSplitManifestWriter
9
+ def call(path:, chunk_paths:, chunk_row_counts:)
10
+ ::CSV.open(path, "w") do |csv|
11
+ csv << %w[chunk_index chunk_path row_count]
12
+ chunk_paths.each_with_index do |chunk_path, index|
13
+ csv << [index + 1, chunk_path, chunk_row_counts[index]]
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module Output
8
+ class CsvStatsFileWriter
9
+ def call(path:, data:)
10
+ ::CSV.open(path, "w") do |csv|
11
+ csv << %w[metric value]
12
+ csv << ["row_count", data[:row_count]]
13
+ csv << ["column_count", data[:column_count]]
14
+ unless data[:headers].nil? || data[:headers].empty?
15
+ csv << ["headers", data[:headers].join("|")]
16
+ end
17
+ data.fetch(:column_stats, []).each do |stats|
18
+ csv << ["column.#{stats[:name]}.non_blank", stats[:non_blank_count]]
19
+ csv << ["column.#{stats[:name]}.blank", stats[:blank_count]]
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -33,6 +33,10 @@ module Csvtool
33
33
  @stdout.puts "Cannot write output file: #{path} (#{error_class})"
34
34
  end
35
35
 
36
+ def output_file_exists(path)
37
+ @stdout.puts "Output file already exists: #{path}"
38
+ end
39
+
36
40
  def empty_output_path
37
41
  @stdout.puts "Output file path cannot be empty."
38
42
  end
@@ -53,6 +57,10 @@ module Csvtool
53
57
  @stdout.puts "Seed must be an integer."
54
58
  end
55
59
 
60
+ def invalid_chunk_size
61
+ @stdout.puts "Chunk size must be a positive integer."
62
+ end
63
+
56
64
  def canceled
57
65
  @stdout.puts "Canceled."
58
66
  end
@@ -4,7 +4,7 @@ module Csvtool
4
4
  module Interface
5
5
  module CLI
6
6
  class MenuLoop
7
- def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:, parity_action:)
7
+ def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:, parity_action:, split_action:, stats_action:)
8
8
  @stdin = stdin
9
9
  @stdout = stdout
10
10
  @menu_options = menu_options
@@ -13,6 +13,8 @@ module Csvtool
13
13
  @randomize_rows_action = randomize_rows_action
14
14
  @dedupe_action = dedupe_action
15
15
  @parity_action = parity_action
16
+ @split_action = split_action
17
+ @stats_action = stats_action
16
18
  end
17
19
 
18
20
  def run
@@ -34,9 +36,13 @@ module Csvtool
34
36
  when "5"
35
37
  @parity_action.call
36
38
  when "6"
39
+ @split_action.call
40
+ when "7"
41
+ @stats_action.call
42
+ when "8"
37
43
  return 0
38
44
  else
39
- @stdout.puts "Please choose 1, 2, 3, 4, 5, or 6."
45
+ @stdout.puts "Please choose 1, 2, 3, 4, 5, 6, 7, or 8."
40
46
  end
41
47
  end
42
48
  end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Prompts
7
+ class ChunkSizePrompt
8
+ def initialize(stdin:, stdout:)
9
+ @stdin = stdin
10
+ @stdout = stdout
11
+ end
12
+
13
+ def call
14
+ @stdout.print "Rows per chunk: "
15
+ @stdin.gets&.strip.to_s
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Prompts
7
+ class SplitManifestPrompt
8
+ def initialize(stdin:, stdout:, yes_no_prompt:)
9
+ @stdin = stdin
10
+ @stdout = stdout
11
+ @yes_no_prompt = yes_no_prompt
12
+ end
13
+
14
+ def call(default_path:)
15
+ write_manifest = @yes_no_prompt.call(
16
+ label: "Write manifest file? [y/N]: ",
17
+ default: false
18
+ )
19
+ return { write_manifest: false, manifest_path: nil } unless write_manifest
20
+
21
+ @stdout.print "Manifest file path [#{default_path}]: "
22
+ path = @stdin.gets&.strip.to_s
23
+ path = default_path if path.empty?
24
+ { write_manifest: true, manifest_path: path }
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Prompts
7
+ class SplitOutputPrompt
8
+ def initialize(stdin:, stdout:, yes_no_prompt:)
9
+ @stdin = stdin
10
+ @stdout = stdout
11
+ @yes_no_prompt = yes_no_prompt
12
+ end
13
+
14
+ def call(default_directory:, default_prefix:)
15
+ @stdout.print "Output directory [#{default_directory}]: "
16
+ output_directory = @stdin.gets&.strip.to_s
17
+ output_directory = default_directory if output_directory.empty?
18
+
19
+ @stdout.print "Output file prefix [#{default_prefix}]: "
20
+ file_prefix = @stdin.gets&.strip.to_s
21
+ file_prefix = default_prefix if file_prefix.empty?
22
+
23
+ overwrite_existing = @yes_no_prompt.call(
24
+ label: "Overwrite existing chunk files? [y/N]: ",
25
+ default: false
26
+ )
27
+
28
+ {
29
+ output_directory: output_directory,
30
+ file_prefix: file_prefix,
31
+ overwrite_existing: overwrite_existing
32
+ }
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/domain/csv_split_session/split_source"
4
+ require "csvtool/domain/csv_split_session/split_options"
5
+ require "csvtool/domain/csv_split_session/split_session"
6
+
7
+ module Csvtool
8
+ module Interface
9
+ module CLI
10
+ module Workflows
11
+ module Builders
12
+ class CsvSplitSessionBuilder
13
+ def call(
14
+ file_path:,
15
+ col_sep:,
16
+ headers_present:,
17
+ chunk_size:,
18
+ output_directory: nil,
19
+ file_prefix: nil,
20
+ overwrite_existing: false,
21
+ write_manifest: false,
22
+ manifest_path: nil
23
+ )
24
+ source = Domain::CsvSplitSession::SplitSource.new(
25
+ path: file_path,
26
+ separator: col_sep,
27
+ headers_present: headers_present
28
+ )
29
+ options = Domain::CsvSplitSession::SplitOptions.new(
30
+ chunk_size: chunk_size,
31
+ output_directory: output_directory,
32
+ file_prefix: file_prefix,
33
+ overwrite_existing: overwrite_existing,
34
+ write_manifest: write_manifest,
35
+ manifest_path: manifest_path
36
+ )
37
+ Domain::CsvSplitSession::SplitSession.start(source: source, options: options)
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/domain/csv_stats_session/stats_source"
4
+ require "csvtool/domain/csv_stats_session/stats_options"
5
+ require "csvtool/domain/csv_stats_session/stats_session"
6
+
7
+ module Csvtool
8
+ module Interface
9
+ module CLI
10
+ module Workflows
11
+ module Builders
12
+ class CsvStatsSessionBuilder
13
+ def call(file_path:, col_sep:, headers_present:, destination:)
14
+ source = Domain::CsvStatsSession::StatsSource.new(
15
+ path: file_path,
16
+ separator: col_sep,
17
+ headers_present: headers_present
18
+ )
19
+ options = Domain::CsvStatsSession::StatsOptions.new
20
+ session = Domain::CsvStatsSession::StatsSession.start(source: source, options: options)
21
+ session.with_output_destination(destination)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Workflows
7
+ module Presenters
8
+ class CsvSplitPresenter
9
+ def initialize(stdout:)
10
+ @stdout = stdout
11
+ end
12
+
13
+ def print_summary(data)
14
+ @stdout.puts "Split complete."
15
+ @stdout.puts "Chunk size: #{data[:chunk_size]}"
16
+ @stdout.puts "Data rows: #{data[:data_rows]}"
17
+ @stdout.puts "Chunks written: #{data[:chunk_count]}"
18
+ @stdout.puts "Manifest: #{data[:manifest_path]}" if data[:manifest_path]
19
+ data[:chunk_paths].each { |path| @stdout.puts path }
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Workflows
7
+ module Presenters
8
+ class CsvStatsPresenter
9
+ def initialize(stdout:)
10
+ @stdout = stdout
11
+ end
12
+
13
+ def print_summary(data)
14
+ @stdout.puts "CSV Stats Summary"
15
+ @stdout.puts "Rows: #{data[:row_count]}"
16
+ @stdout.puts "Columns: #{data[:column_count]}"
17
+ @stdout.puts "Headers: #{data[:headers].join(', ')}" unless data[:headers].nil? || data[:headers].empty?
18
+ return if data[:column_stats].nil? || data[:column_stats].empty?
19
+
20
+ @stdout.puts "Column completeness:"
21
+ data[:column_stats].each do |stats|
22
+ @stdout.puts " #{stats[:name]}: non_blank=#{stats[:non_blank_count]} blank=#{stats[:blank_count]}"
23
+ end
24
+ end
25
+
26
+ def print_file_written(path)
27
+ @stdout.puts "Wrote output to #{path}"
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end