csvops 0.7.0.alpha → 0.9.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +80 -20
  3. data/docs/architecture.md +67 -4
  4. data/docs/cli-output-conventions.md +49 -0
  5. data/docs/release-v0.8.0-alpha.md +88 -0
  6. data/docs/release-v0.9.0-alpha.md +80 -0
  7. data/lib/csvtool/application/use_cases/run_csv_stats.rb +64 -0
  8. data/lib/csvtool/cli.rb +136 -12
  9. data/lib/csvtool/domain/csv_stats_session/stats_options.rb +11 -0
  10. data/lib/csvtool/domain/csv_stats_session/stats_session.rb +25 -0
  11. data/lib/csvtool/domain/csv_stats_session/stats_source.rb +17 -0
  12. data/lib/csvtool/infrastructure/csv/csv_stats_scanner.rb +67 -0
  13. data/lib/csvtool/infrastructure/output/csv_stats_file_writer.rb +26 -0
  14. data/lib/csvtool/interface/cli/menu_loop.rb +9 -5
  15. data/lib/csvtool/interface/cli/output/color_policy.rb +25 -0
  16. data/lib/csvtool/interface/cli/output/colorizer.rb +27 -0
  17. data/lib/csvtool/interface/cli/output/formatters/csv_row_formatter.rb +19 -0
  18. data/lib/csvtool/interface/cli/output/formatters/stats_formatter.rb +57 -0
  19. data/lib/csvtool/interface/cli/output/streams.rb +22 -0
  20. data/lib/csvtool/interface/cli/output/table_renderer.rb +70 -0
  21. data/lib/csvtool/interface/cli/workflows/builders/csv_stats_session_builder.rb +28 -0
  22. data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +17 -5
  23. data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +15 -4
  24. data/lib/csvtool/interface/cli/workflows/presenters/csv_split_presenter.rb +15 -6
  25. data/lib/csvtool/interface/cli/workflows/presenters/csv_stats_presenter.rb +43 -0
  26. data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +5 -4
  27. data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +5 -4
  28. data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +9 -8
  29. data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +6 -5
  30. data/lib/csvtool/interface/cli/workflows/run_csv_split_workflow.rb +11 -10
  31. data/lib/csvtool/interface/cli/workflows/run_csv_stats_workflow.rb +78 -0
  32. data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +9 -8
  33. data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +7 -6
  34. data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +8 -7
  35. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step.rb +25 -0
  36. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step.rb +27 -0
  37. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step.rb +31 -0
  38. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/execute_step.rb +27 -0
  39. data/lib/csvtool/version.rb +1 -1
  40. data/test/csvtool/application/use_cases/run_csv_stats_test.rb +165 -0
  41. data/test/csvtool/cli_test.rb +376 -68
  42. data/test/csvtool/cli_unit_test.rb +5 -5
  43. data/test/csvtool/infrastructure/csv/csv_stats_scanner_test.rb +68 -0
  44. data/test/csvtool/infrastructure/output/csv_stats_file_writer_test.rb +38 -0
  45. data/test/csvtool/interface/cli/menu_loop_test.rb +34 -11
  46. data/test/csvtool/interface/cli/output/color_policy_test.rb +40 -0
  47. data/test/csvtool/interface/cli/output/colorizer_test.rb +28 -0
  48. data/test/csvtool/interface/cli/output/formatters/csv_row_formatter_test.rb +22 -0
  49. data/test/csvtool/interface/cli/output/formatters/stats_formatter_test.rb +51 -0
  50. data/test/csvtool/interface/cli/output/streams_test.rb +25 -0
  51. data/test/csvtool/interface/cli/output/table_renderer_test.rb +36 -0
  52. data/test/csvtool/interface/cli/workflows/builders/csv_stats_session_builder_test.rb +19 -0
  53. data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +4 -1
  54. data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +5 -1
  55. data/test/csvtool/interface/cli/workflows/presenters/csv_split_presenter_test.rb +22 -4
  56. data/test/csvtool/interface/cli/workflows/presenters/csv_stats_presenter_test.rb +39 -0
  57. data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +10 -7
  58. data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +3 -1
  59. data/test/csvtool/interface/cli/workflows/run_csv_split_workflow_test.rb +5 -3
  60. data/test/csvtool/interface/cli/workflows/run_csv_stats_workflow_test.rb +151 -0
  61. data/test/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step_test.rb +36 -0
  62. data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step_test.rb +49 -0
  63. data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step_test.rb +61 -0
  64. data/test/csvtool/interface/cli/workflows/steps/csv_stats/execute_step_test.rb +65 -0
  65. metadata +39 -1
data/lib/csvtool/cli.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "csv"
4
+ require "json"
4
5
  require "csvtool/interface/cli/menu_loop"
5
6
  require "csvtool/interface/cli/workflows/run_extraction_workflow"
6
7
  require "csvtool/interface/cli/workflows/run_row_extraction_workflow"
@@ -8,10 +9,21 @@ require "csvtool/interface/cli/workflows/run_row_randomization_workflow"
8
9
  require "csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow"
9
10
  require "csvtool/interface/cli/workflows/run_csv_parity_workflow"
10
11
  require "csvtool/interface/cli/workflows/run_csv_split_workflow"
12
+ require "csvtool/interface/cli/workflows/run_csv_stats_workflow"
11
13
  require "csvtool/interface/cli/errors/presenter"
14
+ require "csvtool/interface/cli/output/table_renderer"
15
+ require "csvtool/interface/cli/output/streams"
16
+ require "csvtool/interface/cli/output/color_policy"
17
+ require "csvtool/interface/cli/output/colorizer"
18
+ require "csvtool/interface/cli/output/formatters/stats_formatter"
12
19
  require "csvtool/infrastructure/csv/header_reader"
13
20
  require "csvtool/infrastructure/csv/value_streamer"
14
21
  require "csvtool/infrastructure/output/console_writer"
22
+ require "csvtool/application/use_cases/run_csv_stats"
23
+ require "csvtool/domain/csv_stats_session/stats_source"
24
+ require "csvtool/domain/csv_stats_session/stats_options"
25
+ require "csvtool/domain/csv_stats_session/stats_session"
26
+ require "csvtool/domain/shared/output_destination"
15
27
 
16
28
  module Csvtool
17
29
  class CLI
@@ -22,18 +34,20 @@ module Csvtool
22
34
  "Dedupe using another CSV",
23
35
  "Validate parity",
24
36
  "Split CSV into chunks",
37
+ "CSV stats summary",
25
38
  "Exit"
26
39
  ].freeze
27
40
 
28
- def self.start(argv, stdin:, stdout:, stderr:)
29
- new(argv, stdin: stdin, stdout: stdout, stderr: stderr).run
41
+ def self.start(argv, stdin:, stdout:, stderr:, env: ENV)
42
+ new(argv, stdin: stdin, stdout: stdout, stderr: stderr, env: env).run
30
43
  end
31
44
 
32
- def initialize(argv, stdin:, stdout:, stderr:)
45
+ def initialize(argv, stdin:, stdout:, stderr:, env: ENV)
33
46
  @argv = argv
34
47
  @stdin = stdin
35
48
  @stdout = stdout
36
49
  @stderr = stderr
50
+ @env = env
37
51
  end
38
52
 
39
53
  def run
@@ -42,6 +56,8 @@ module Csvtool
42
56
  run_menu_loop
43
57
  when "column"
44
58
  run_column_command
59
+ when "stats"
60
+ run_stats_command
45
61
  else
46
62
  print_usage
47
63
  1
@@ -51,22 +67,26 @@ module Csvtool
51
67
  private
52
68
 
53
69
  def run_menu_loop
54
- extract_column_action = -> { Interface::CLI::Workflows::RunExtractionWorkflow.new(stdin: @stdin, stdout: @stdout).call }
55
- extract_rows_action = -> { Interface::CLI::Workflows::RunRowExtractionWorkflow.new(stdin: @stdin, stdout: @stdout).call }
56
- randomize_rows_action = -> { Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: @stdin, stdout: @stdout).call }
57
- dedupe_action = -> { Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow.new(stdin: @stdin, stdout: @stdout).call }
58
- parity_action = -> { Interface::CLI::Workflows::RunCsvParityWorkflow.new(stdin: @stdin, stdout: @stdout).call }
59
- split_action = -> { Interface::CLI::Workflows::RunCsvSplitWorkflow.new(stdin: @stdin, stdout: @stdout).call }
70
+ streams = Interface::CLI::Output::Streams.build(data: @stdout, ui: @stderr)
71
+ extract_column_action = -> { Interface::CLI::Workflows::RunExtractionWorkflow.new(stdin: @stdin, stdout: streams.data, stderr: streams.ui).call }
72
+ extract_rows_action = -> { Interface::CLI::Workflows::RunRowExtractionWorkflow.new(stdin: @stdin, stdout: streams.data, stderr: streams.ui).call }
73
+ randomize_rows_action = -> { Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: @stdin, stdout: streams.data, stderr: streams.ui).call }
74
+ dedupe_action = -> { Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow.new(stdin: @stdin, stdout: streams.data, stderr: streams.ui).call }
75
+ parity_action = -> { Interface::CLI::Workflows::RunCsvParityWorkflow.new(stdin: @stdin, stdout: streams.data, stderr: streams.ui).call }
76
+ split_action = -> { Interface::CLI::Workflows::RunCsvSplitWorkflow.new(stdin: @stdin, stdout: streams.data, stderr: streams.ui).call }
77
+ stats_action = -> { Interface::CLI::Workflows::RunCsvStatsWorkflow.new(stdin: @stdin, stdout: streams.data, stderr: streams.ui).call }
60
78
  Interface::CLI::MenuLoop.new(
61
79
  stdin: @stdin,
62
- stdout: @stdout,
80
+ stdout: streams.data,
81
+ stderr: streams.ui,
63
82
  menu_options: MENU_OPTIONS,
64
83
  extract_column_action: extract_column_action,
65
84
  extract_rows_action: extract_rows_action,
66
85
  randomize_rows_action: randomize_rows_action,
67
86
  dedupe_action: dedupe_action,
68
87
  parity_action: parity_action,
69
- split_action: split_action
88
+ split_action: split_action,
89
+ stats_action: stats_action
70
90
  ).run
71
91
  end
72
92
 
@@ -74,6 +94,7 @@ module Csvtool
74
94
  @stderr.puts "Usage:"
75
95
  @stderr.puts " csvtool menu"
76
96
  @stderr.puts " csvtool column <file> <column>"
97
+ @stderr.puts " csvtool stats <file> [--format text|json|csv] [--color auto|always|never]"
77
98
  end
78
99
 
79
100
  def run_column_command
@@ -84,7 +105,7 @@ module Csvtool
84
105
  return 1
85
106
  end
86
107
 
87
- errors = Interface::CLI::Errors::Presenter.new(stdout: @stdout)
108
+ errors = Interface::CLI::Errors::Presenter.new(stdout: @stderr)
88
109
  return errors.file_not_found(file_path) || 1 unless File.file?(file_path)
89
110
 
90
111
  header_reader = Infrastructure::CSV::HeaderReader.new
@@ -103,5 +124,108 @@ module Csvtool
103
124
  errors.cannot_read_file(file_path)
104
125
  1
105
126
  end
127
+
128
+ def run_stats_command
129
+ file_path, format, color_mode = parse_stats_args(@argv[1..])
130
+ unless file_path
131
+ print_usage
132
+ return 1
133
+ end
134
+
135
+ errors = Interface::CLI::Errors::Presenter.new(stdout: @stderr)
136
+ unless %w[text json csv].include?(format)
137
+ @stderr.puts "Invalid format: #{format}"
138
+ return 1
139
+ end
140
+ unless %w[auto always never].include?(color_mode)
141
+ @stderr.puts "Invalid color mode: #{color_mode}"
142
+ return 1
143
+ end
144
+ source = Domain::CsvStatsSession::StatsSource.new(path: file_path, separator: ",", headers_present: true)
145
+ options = Domain::CsvStatsSession::StatsOptions.new
146
+ destination = Domain::Shared::OutputDestination.console
147
+ session = Domain::CsvStatsSession::StatsSession.start(source: source, options: options).with_output_destination(destination)
148
+ result = Application::UseCases::RunCsvStats.new.call(session: session)
149
+
150
+ unless result.ok?
151
+ case result.error
152
+ when :file_not_found
153
+ errors.file_not_found(result.data[:path])
154
+ when :could_not_parse_csv
155
+ errors.could_not_parse_csv
156
+ when :cannot_read_file
157
+ errors.cannot_read_file(result.data[:path])
158
+ else
159
+ @stderr.puts "Unknown error."
160
+ end
161
+ return 1
162
+ end
163
+
164
+ formatter = Interface::CLI::Output::Formatters::StatsFormatter.new(
165
+ table_renderer: Interface::CLI::Output::TableRenderer.new
166
+ )
167
+ output = formatter.call(data: result.data, format: format, max_width: terminal_width)
168
+ print_stats_output(output, format: format, color_mode: color_mode)
169
+
170
+ 0
171
+ end
172
+
173
+ def parse_stats_args(args)
174
+ file_path = args[0]
175
+ format = "text"
176
+ color_mode = "auto"
177
+ index = 1
178
+ while index < args.length
179
+ arg = args[index]
180
+ if arg.start_with?("--format=")
181
+ format = arg.split("=", 2)[1]
182
+ elsif arg == "--format"
183
+ format = args[index + 1].to_s
184
+ index += 1
185
+ elsif arg.start_with?("--color=")
186
+ color_mode = arg.split("=", 2)[1]
187
+ elsif arg == "--color"
188
+ color_mode = args[index + 1].to_s
189
+ index += 1
190
+ end
191
+ index += 1
192
+ end
193
+ [file_path, format, color_mode]
194
+ end
195
+
196
+ def print_stats_output(output, format:, color_mode:)
197
+ if format == "text"
198
+ policy = Interface::CLI::Output::ColorPolicy.new(mode: color_mode, io: @stdout, env: @env)
199
+ colorizer = Interface::CLI::Output::Colorizer.new(policy: policy)
200
+ text = apply_text_color(output, colorizer: colorizer)
201
+ @stdout.puts text
202
+ else
203
+ @stdout.puts output
204
+ end
205
+ end
206
+
207
+ def apply_text_color(text, colorizer:)
208
+ text.lines.map do |line|
209
+ line = line.chomp
210
+ case line
211
+ when "CSV Stats Summary"
212
+ colorizer.call(line, code: "1;36")
213
+ when "Column completeness:"
214
+ colorizer.call(line, code: "1")
215
+ when /\A(Metric|Value|Column|Non-blank|Blank)(\s+\|.*)?\z/
216
+ colorizer.call(line, code: "1")
217
+ else
218
+ line
219
+ end
220
+ end.join("\n")
221
+ end
222
+
223
+ def terminal_width
224
+ columns = @env["COLUMNS"].to_i
225
+ return columns if columns.positive?
226
+ return @stdout.winsize[1] if @stdout.respond_to?(:winsize)
227
+
228
+ 80
229
+ end
106
230
  end
107
231
  end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvStatsSession
6
+ class StatsOptions
7
+ def initialize; end
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvStatsSession
6
+ class StatsSession
7
+ attr_reader :source, :options, :output_destination
8
+
9
+ def self.start(source:, options:)
10
+ new(source: source, options: options)
11
+ end
12
+
13
+ def initialize(source:, options:, output_destination: nil)
14
+ @source = source
15
+ @options = options
16
+ @output_destination = output_destination
17
+ end
18
+
19
+ def with_output_destination(output_destination)
20
+ self.class.new(source: source, options: options, output_destination: output_destination)
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module CsvStatsSession
6
+ class StatsSource
7
+ attr_reader :path, :separator, :headers_present
8
+
9
+ def initialize(path:, separator:, headers_present:)
10
+ @path = path
11
+ @separator = separator
12
+ @headers_present = headers_present
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module CSV
8
+ class CsvStatsScanner
9
+ def initialize(csv: ::CSV)
10
+ @csv = csv
11
+ end
12
+
13
+ def call(file_path:, col_sep:, headers_present:)
14
+ data_row_count = 0
15
+ headers = nil
16
+ column_count = 0
17
+ column_stats = []
18
+
19
+ # Streaming scan: memory grows with per-column metrics, not row count.
20
+ @csv.foreach(file_path, headers: headers_present, col_sep: col_sep) do |row|
21
+ if headers_present
22
+ headers ||= row.headers
23
+ column_count = headers.length
24
+ if column_stats.empty?
25
+ column_stats = headers.map { |name| { name: name, blank_count: 0, non_blank_count: 0 } }
26
+ end
27
+ fields = row.fields
28
+ fields.fill(nil, fields.length...column_count)
29
+ fields.each_with_index { |value, index| apply_value(column_stats[index], value) }
30
+ data_row_count += 1
31
+ else
32
+ fields = row.is_a?(::CSV::Row) ? row.fields : row
33
+ column_count = [column_count, fields.length].max
34
+ while column_stats.length < column_count
35
+ column_stats << {
36
+ name: "column_#{column_stats.length + 1}",
37
+ blank_count: 0,
38
+ non_blank_count: 0
39
+ }
40
+ end
41
+ fields.fill(nil, fields.length...column_count)
42
+ fields.each_with_index { |value, index| apply_value(column_stats[index], value) }
43
+ data_row_count += 1
44
+ end
45
+ end
46
+
47
+ {
48
+ row_count: data_row_count,
49
+ column_count: column_count,
50
+ headers: headers,
51
+ column_stats: column_stats
52
+ }
53
+ end
54
+
55
+ private
56
+
57
+ def apply_value(stats, value)
58
+ if value.nil? || value.strip.empty?
59
+ stats[:blank_count] += 1
60
+ else
61
+ stats[:non_blank_count] += 1
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module Output
8
+ class CsvStatsFileWriter
9
+ def call(path:, data:)
10
+ ::CSV.open(path, "w") do |csv|
11
+ csv << %w[metric value]
12
+ csv << ["row_count", data[:row_count]]
13
+ csv << ["column_count", data[:column_count]]
14
+ unless data[:headers].nil? || data[:headers].empty?
15
+ csv << ["headers", data[:headers].join("|")]
16
+ end
17
+ data.fetch(:column_stats, []).each do |stats|
18
+ csv << ["column.#{stats[:name]}.non_blank", stats[:non_blank_count]]
19
+ csv << ["column.#{stats[:name]}.blank", stats[:blank_count]]
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -4,9 +4,10 @@ module Csvtool
4
4
  module Interface
5
5
  module CLI
6
6
  class MenuLoop
7
- def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:, parity_action:, split_action:)
7
+ def initialize(stdin:, stdout:, stderr: stdout, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:, parity_action:, split_action:, stats_action:)
8
8
  @stdin = stdin
9
9
  @stdout = stdout
10
+ @stderr = stderr
10
11
  @menu_options = menu_options
11
12
  @extract_column_action = extract_column_action
12
13
  @extract_rows_action = extract_rows_action
@@ -14,12 +15,13 @@ module Csvtool
14
15
  @dedupe_action = dedupe_action
15
16
  @parity_action = parity_action
16
17
  @split_action = split_action
18
+ @stats_action = stats_action
17
19
  end
18
20
 
19
21
  def run
20
22
  loop do
21
23
  print_menu
22
- @stdout.print "> "
24
+ @stderr.print "> "
23
25
  choice = @stdin.gets
24
26
  return 0 if choice.nil?
25
27
 
@@ -37,9 +39,11 @@ module Csvtool
37
39
  when "6"
38
40
  @split_action.call
39
41
  when "7"
42
+ @stats_action.call
43
+ when "8"
40
44
  return 0
41
45
  else
42
- @stdout.puts "Please choose 1, 2, 3, 4, 5, 6, or 7."
46
+ @stderr.puts "Please choose 1, 2, 3, 4, 5, 6, 7, or 8."
43
47
  end
44
48
  end
45
49
  end
@@ -47,9 +51,9 @@ module Csvtool
47
51
  private
48
52
 
49
53
  def print_menu
50
- @stdout.puts "CSV Tool Menu"
54
+ @stderr.puts "CSV Tool Menu"
51
55
  @menu_options.each_with_index do |option, index|
52
- @stdout.puts "#{index + 1}. #{option}"
56
+ @stderr.puts "#{index + 1}. #{option}"
53
57
  end
54
58
  end
55
59
  end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Output
7
+ class ColorPolicy
8
+ def initialize(mode:, io:, env: ENV)
9
+ @mode = mode
10
+ @io = io
11
+ @env = env
12
+ end
13
+
14
+ def enabled?
15
+ return true if @mode == "always"
16
+ return false if @mode == "never"
17
+ return false if @env["NO_COLOR"]
18
+
19
+ @io.respond_to?(:tty?) && @io.tty?
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/interface/cli/output/color_policy"
4
+
5
+ module Csvtool
6
+ module Interface
7
+ module CLI
8
+ module Output
9
+ class Colorizer
10
+ def initialize(policy:)
11
+ @policy = policy
12
+ end
13
+
14
+ def call(text, code:)
15
+ return text unless @policy.enabled?
16
+
17
+ "\e[#{code}m#{text}\e[0m"
18
+ end
19
+
20
+ def self.auto(io:, env: ENV)
21
+ new(policy: ColorPolicy.new(mode: "auto", io: io, env: env))
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Interface
7
+ module CLI
8
+ module Output
9
+ module Formatters
10
+ class CsvRowFormatter
11
+ def call(fields:, col_sep:)
12
+ ::CSV.generate_line(fields, row_sep: "", col_sep: col_sep).chomp
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Csvtool
6
+ module Interface
7
+ module CLI
8
+ module Output
9
+ module Formatters
10
+ class StatsFormatter
11
+ def initialize(table_renderer:)
12
+ @table_renderer = table_renderer
13
+ end
14
+
15
+ def call(data:, format:, max_width: 80)
16
+ case format
17
+ when "json"
18
+ JSON.generate(data)
19
+ when "csv"
20
+ csv_lines(data).join("\n")
21
+ else
22
+ text_lines(data, max_width: max_width).join("\n")
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def csv_lines(data)
29
+ lines = ["metric,value", "row_count,#{data[:row_count]}", "column_count,#{data[:column_count]}"]
30
+ lines << "headers,#{data[:headers].join('|')}" unless data[:headers].nil? || data[:headers].empty?
31
+ data.fetch(:column_stats, []).each do |stats|
32
+ lines << "column.#{stats[:name]}.non_blank,#{stats[:non_blank_count]}"
33
+ lines << "column.#{stats[:name]}.blank,#{stats[:blank_count]}"
34
+ end
35
+ lines
36
+ end
37
+
38
+ def text_lines(data, max_width:)
39
+ lines = ["CSV Stats Summary"]
40
+ summary_rows = [["Rows", data[:row_count].to_s], ["Columns", data[:column_count].to_s]]
41
+ summary_rows << ["Headers", data[:headers].join(", ")] unless data[:headers].nil? || data[:headers].empty?
42
+ lines << @table_renderer.render(headers: ["Metric", "Value"], rows: summary_rows, max_width: max_width)
43
+
44
+ return lines if data[:column_stats].nil? || data[:column_stats].empty?
45
+
46
+ lines << ""
47
+ lines << "Column completeness:"
48
+ rows = data[:column_stats].map { |stats| [stats[:name], stats[:non_blank_count].to_s, stats[:blank_count].to_s] }
49
+ lines << @table_renderer.render(headers: ["Column", "Non-blank", "Blank"], rows: rows, max_width: max_width)
50
+ lines
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Output
7
+ class Streams
8
+ attr_reader :data, :ui
9
+
10
+ def self.build(data:, ui: data)
11
+ new(data: data, ui: ui)
12
+ end
13
+
14
+ def initialize(data:, ui:)
15
+ @data = data
16
+ @ui = ui
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Output
7
+ class TableRenderer
8
+ MIN_COLUMN_WIDTH = 4
9
+
10
+ def render(headers:, rows:, max_width: 80)
11
+ widths = compute_widths(headers, rows)
12
+ widths = fit_widths(widths, max_width)
13
+
14
+ lines = []
15
+ lines << format_row(headers, widths)
16
+ lines << separator(widths)
17
+ rows.each { |row| lines << format_row(row, widths) }
18
+ lines.join("\n")
19
+ end
20
+
21
+ private
22
+
23
+ def compute_widths(headers, rows)
24
+ widths = headers.map { |header| header.to_s.length }
25
+ rows.each do |row|
26
+ row.each_with_index do |cell, index|
27
+ widths[index] = [widths[index], cell.to_s.length].max
28
+ end
29
+ end
30
+ widths
31
+ end
32
+
33
+ def fit_widths(widths, max_width)
34
+ return widths if total_width(widths) <= max_width
35
+
36
+ adjusted = widths.dup
37
+ while total_width(adjusted) > max_width
38
+ index = adjusted.each_with_index.max_by { |width, _i| width }[1]
39
+ break if adjusted[index] <= MIN_COLUMN_WIDTH
40
+
41
+ adjusted[index] -= 1
42
+ end
43
+ adjusted
44
+ end
45
+
46
+ def total_width(widths)
47
+ widths.sum + (3 * (widths.length - 1))
48
+ end
49
+
50
+ def separator(widths)
51
+ widths.map { |width| "-" * width }.join("-+-")
52
+ end
53
+
54
+ def format_row(row, widths)
55
+ row.each_with_index.map do |cell, index|
56
+ truncate(cell.to_s, widths[index]).ljust(widths[index])
57
+ end.join(" | ")
58
+ end
59
+
60
+ def truncate(text, width)
61
+ return text if text.length <= width
62
+ return text[0, width] if width < MIN_COLUMN_WIDTH
63
+
64
+ "#{text[0, width - 3]}..."
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/domain/csv_stats_session/stats_source"
4
+ require "csvtool/domain/csv_stats_session/stats_options"
5
+ require "csvtool/domain/csv_stats_session/stats_session"
6
+
7
+ module Csvtool
8
+ module Interface
9
+ module CLI
10
+ module Workflows
11
+ module Builders
12
+ class CsvStatsSessionBuilder
13
+ def call(file_path:, col_sep:, headers_present:, destination:)
14
+ source = Domain::CsvStatsSession::StatsSource.new(
15
+ path: file_path,
16
+ separator: col_sep,
17
+ headers_present: headers_present
18
+ )
19
+ options = Domain::CsvStatsSession::StatsOptions.new
20
+ session = Domain::CsvStatsSession::StatsSession.start(source: source, options: options)
21
+ session.with_output_destination(destination)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end