csvops 0.7.0.alpha → 0.9.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +80 -20
- data/docs/architecture.md +67 -4
- data/docs/cli-output-conventions.md +49 -0
- data/docs/release-v0.8.0-alpha.md +88 -0
- data/docs/release-v0.9.0-alpha.md +80 -0
- data/lib/csvtool/application/use_cases/run_csv_stats.rb +64 -0
- data/lib/csvtool/cli.rb +136 -12
- data/lib/csvtool/domain/csv_stats_session/stats_options.rb +11 -0
- data/lib/csvtool/domain/csv_stats_session/stats_session.rb +25 -0
- data/lib/csvtool/domain/csv_stats_session/stats_source.rb +17 -0
- data/lib/csvtool/infrastructure/csv/csv_stats_scanner.rb +67 -0
- data/lib/csvtool/infrastructure/output/csv_stats_file_writer.rb +26 -0
- data/lib/csvtool/interface/cli/menu_loop.rb +9 -5
- data/lib/csvtool/interface/cli/output/color_policy.rb +25 -0
- data/lib/csvtool/interface/cli/output/colorizer.rb +27 -0
- data/lib/csvtool/interface/cli/output/formatters/csv_row_formatter.rb +19 -0
- data/lib/csvtool/interface/cli/output/formatters/stats_formatter.rb +57 -0
- data/lib/csvtool/interface/cli/output/streams.rb +22 -0
- data/lib/csvtool/interface/cli/output/table_renderer.rb +70 -0
- data/lib/csvtool/interface/cli/workflows/builders/csv_stats_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +17 -5
- data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +15 -4
- data/lib/csvtool/interface/cli/workflows/presenters/csv_split_presenter.rb +15 -6
- data/lib/csvtool/interface/cli/workflows/presenters/csv_stats_presenter.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +5 -4
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +5 -4
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +9 -8
- data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +6 -5
- data/lib/csvtool/interface/cli/workflows/run_csv_split_workflow.rb +11 -10
- data/lib/csvtool/interface/cli/workflows/run_csv_stats_workflow.rb +78 -0
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +9 -8
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +7 -6
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +8 -7
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step.rb +27 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step.rb +31 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/execute_step.rb +27 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/run_csv_stats_test.rb +165 -0
- data/test/csvtool/cli_test.rb +376 -68
- data/test/csvtool/cli_unit_test.rb +5 -5
- data/test/csvtool/infrastructure/csv/csv_stats_scanner_test.rb +68 -0
- data/test/csvtool/infrastructure/output/csv_stats_file_writer_test.rb +38 -0
- data/test/csvtool/interface/cli/menu_loop_test.rb +34 -11
- data/test/csvtool/interface/cli/output/color_policy_test.rb +40 -0
- data/test/csvtool/interface/cli/output/colorizer_test.rb +28 -0
- data/test/csvtool/interface/cli/output/formatters/csv_row_formatter_test.rb +22 -0
- data/test/csvtool/interface/cli/output/formatters/stats_formatter_test.rb +51 -0
- data/test/csvtool/interface/cli/output/streams_test.rb +25 -0
- data/test/csvtool/interface/cli/output/table_renderer_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/csv_stats_session_builder_test.rb +19 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +4 -1
- data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +5 -1
- data/test/csvtool/interface/cli/workflows/presenters/csv_split_presenter_test.rb +22 -4
- data/test/csvtool/interface/cli/workflows/presenters/csv_stats_presenter_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +10 -7
- data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +3 -1
- data/test/csvtool/interface/cli/workflows/run_csv_split_workflow_test.rb +5 -3
- data/test/csvtool/interface/cli/workflows/run_csv_stats_workflow_test.rb +151 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step_test.rb +49 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step_test.rb +61 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/execute_step_test.rb +65 -0
- metadata +39 -1
data/lib/csvtool/cli.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "csv"
|
|
4
|
+
require "json"
|
|
4
5
|
require "csvtool/interface/cli/menu_loop"
|
|
5
6
|
require "csvtool/interface/cli/workflows/run_extraction_workflow"
|
|
6
7
|
require "csvtool/interface/cli/workflows/run_row_extraction_workflow"
|
|
@@ -8,10 +9,21 @@ require "csvtool/interface/cli/workflows/run_row_randomization_workflow"
|
|
|
8
9
|
require "csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow"
|
|
9
10
|
require "csvtool/interface/cli/workflows/run_csv_parity_workflow"
|
|
10
11
|
require "csvtool/interface/cli/workflows/run_csv_split_workflow"
|
|
12
|
+
require "csvtool/interface/cli/workflows/run_csv_stats_workflow"
|
|
11
13
|
require "csvtool/interface/cli/errors/presenter"
|
|
14
|
+
require "csvtool/interface/cli/output/table_renderer"
|
|
15
|
+
require "csvtool/interface/cli/output/streams"
|
|
16
|
+
require "csvtool/interface/cli/output/color_policy"
|
|
17
|
+
require "csvtool/interface/cli/output/colorizer"
|
|
18
|
+
require "csvtool/interface/cli/output/formatters/stats_formatter"
|
|
12
19
|
require "csvtool/infrastructure/csv/header_reader"
|
|
13
20
|
require "csvtool/infrastructure/csv/value_streamer"
|
|
14
21
|
require "csvtool/infrastructure/output/console_writer"
|
|
22
|
+
require "csvtool/application/use_cases/run_csv_stats"
|
|
23
|
+
require "csvtool/domain/csv_stats_session/stats_source"
|
|
24
|
+
require "csvtool/domain/csv_stats_session/stats_options"
|
|
25
|
+
require "csvtool/domain/csv_stats_session/stats_session"
|
|
26
|
+
require "csvtool/domain/shared/output_destination"
|
|
15
27
|
|
|
16
28
|
module Csvtool
|
|
17
29
|
class CLI
|
|
@@ -22,18 +34,20 @@ module Csvtool
|
|
|
22
34
|
"Dedupe using another CSV",
|
|
23
35
|
"Validate parity",
|
|
24
36
|
"Split CSV into chunks",
|
|
37
|
+
"CSV stats summary",
|
|
25
38
|
"Exit"
|
|
26
39
|
].freeze
|
|
27
40
|
|
|
28
|
-
def self.start(argv, stdin:, stdout:, stderr:)
|
|
29
|
-
new(argv, stdin: stdin, stdout: stdout, stderr: stderr).run
|
|
41
|
+
def self.start(argv, stdin:, stdout:, stderr:, env: ENV)
|
|
42
|
+
new(argv, stdin: stdin, stdout: stdout, stderr: stderr, env: env).run
|
|
30
43
|
end
|
|
31
44
|
|
|
32
|
-
def initialize(argv, stdin:, stdout:, stderr:)
|
|
45
|
+
def initialize(argv, stdin:, stdout:, stderr:, env: ENV)
|
|
33
46
|
@argv = argv
|
|
34
47
|
@stdin = stdin
|
|
35
48
|
@stdout = stdout
|
|
36
49
|
@stderr = stderr
|
|
50
|
+
@env = env
|
|
37
51
|
end
|
|
38
52
|
|
|
39
53
|
def run
|
|
@@ -42,6 +56,8 @@ module Csvtool
|
|
|
42
56
|
run_menu_loop
|
|
43
57
|
when "column"
|
|
44
58
|
run_column_command
|
|
59
|
+
when "stats"
|
|
60
|
+
run_stats_command
|
|
45
61
|
else
|
|
46
62
|
print_usage
|
|
47
63
|
1
|
|
@@ -51,22 +67,26 @@ module Csvtool
|
|
|
51
67
|
private
|
|
52
68
|
|
|
53
69
|
def run_menu_loop
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
70
|
+
streams = Interface::CLI::Output::Streams.build(data: @stdout, ui: @stderr)
|
|
71
|
+
extract_column_action = -> { Interface::CLI::Workflows::RunExtractionWorkflow.new(stdin: @stdin, stdout: streams.data, stderr: streams.ui).call }
|
|
72
|
+
extract_rows_action = -> { Interface::CLI::Workflows::RunRowExtractionWorkflow.new(stdin: @stdin, stdout: streams.data, stderr: streams.ui).call }
|
|
73
|
+
randomize_rows_action = -> { Interface::CLI::Workflows::RunRowRandomizationWorkflow.new(stdin: @stdin, stdout: streams.data, stderr: streams.ui).call }
|
|
74
|
+
dedupe_action = -> { Interface::CLI::Workflows::RunCrossCsvDedupeWorkflow.new(stdin: @stdin, stdout: streams.data, stderr: streams.ui).call }
|
|
75
|
+
parity_action = -> { Interface::CLI::Workflows::RunCsvParityWorkflow.new(stdin: @stdin, stdout: streams.data, stderr: streams.ui).call }
|
|
76
|
+
split_action = -> { Interface::CLI::Workflows::RunCsvSplitWorkflow.new(stdin: @stdin, stdout: streams.data, stderr: streams.ui).call }
|
|
77
|
+
stats_action = -> { Interface::CLI::Workflows::RunCsvStatsWorkflow.new(stdin: @stdin, stdout: streams.data, stderr: streams.ui).call }
|
|
60
78
|
Interface::CLI::MenuLoop.new(
|
|
61
79
|
stdin: @stdin,
|
|
62
|
-
stdout:
|
|
80
|
+
stdout: streams.data,
|
|
81
|
+
stderr: streams.ui,
|
|
63
82
|
menu_options: MENU_OPTIONS,
|
|
64
83
|
extract_column_action: extract_column_action,
|
|
65
84
|
extract_rows_action: extract_rows_action,
|
|
66
85
|
randomize_rows_action: randomize_rows_action,
|
|
67
86
|
dedupe_action: dedupe_action,
|
|
68
87
|
parity_action: parity_action,
|
|
69
|
-
split_action: split_action
|
|
88
|
+
split_action: split_action,
|
|
89
|
+
stats_action: stats_action
|
|
70
90
|
).run
|
|
71
91
|
end
|
|
72
92
|
|
|
@@ -74,6 +94,7 @@ module Csvtool
|
|
|
74
94
|
@stderr.puts "Usage:"
|
|
75
95
|
@stderr.puts " csvtool menu"
|
|
76
96
|
@stderr.puts " csvtool column <file> <column>"
|
|
97
|
+
@stderr.puts " csvtool stats <file> [--format text|json|csv] [--color auto|always|never]"
|
|
77
98
|
end
|
|
78
99
|
|
|
79
100
|
def run_column_command
|
|
@@ -84,7 +105,7 @@ module Csvtool
|
|
|
84
105
|
return 1
|
|
85
106
|
end
|
|
86
107
|
|
|
87
|
-
errors = Interface::CLI::Errors::Presenter.new(stdout: @
|
|
108
|
+
errors = Interface::CLI::Errors::Presenter.new(stdout: @stderr)
|
|
88
109
|
return errors.file_not_found(file_path) || 1 unless File.file?(file_path)
|
|
89
110
|
|
|
90
111
|
header_reader = Infrastructure::CSV::HeaderReader.new
|
|
@@ -103,5 +124,108 @@ module Csvtool
|
|
|
103
124
|
errors.cannot_read_file(file_path)
|
|
104
125
|
1
|
|
105
126
|
end
|
|
127
|
+
|
|
128
|
+
def run_stats_command
|
|
129
|
+
file_path, format, color_mode = parse_stats_args(@argv[1..])
|
|
130
|
+
unless file_path
|
|
131
|
+
print_usage
|
|
132
|
+
return 1
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
errors = Interface::CLI::Errors::Presenter.new(stdout: @stderr)
|
|
136
|
+
unless %w[text json csv].include?(format)
|
|
137
|
+
@stderr.puts "Invalid format: #{format}"
|
|
138
|
+
return 1
|
|
139
|
+
end
|
|
140
|
+
unless %w[auto always never].include?(color_mode)
|
|
141
|
+
@stderr.puts "Invalid color mode: #{color_mode}"
|
|
142
|
+
return 1
|
|
143
|
+
end
|
|
144
|
+
source = Domain::CsvStatsSession::StatsSource.new(path: file_path, separator: ",", headers_present: true)
|
|
145
|
+
options = Domain::CsvStatsSession::StatsOptions.new
|
|
146
|
+
destination = Domain::Shared::OutputDestination.console
|
|
147
|
+
session = Domain::CsvStatsSession::StatsSession.start(source: source, options: options).with_output_destination(destination)
|
|
148
|
+
result = Application::UseCases::RunCsvStats.new.call(session: session)
|
|
149
|
+
|
|
150
|
+
unless result.ok?
|
|
151
|
+
case result.error
|
|
152
|
+
when :file_not_found
|
|
153
|
+
errors.file_not_found(result.data[:path])
|
|
154
|
+
when :could_not_parse_csv
|
|
155
|
+
errors.could_not_parse_csv
|
|
156
|
+
when :cannot_read_file
|
|
157
|
+
errors.cannot_read_file(result.data[:path])
|
|
158
|
+
else
|
|
159
|
+
@stderr.puts "Unknown error."
|
|
160
|
+
end
|
|
161
|
+
return 1
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
formatter = Interface::CLI::Output::Formatters::StatsFormatter.new(
|
|
165
|
+
table_renderer: Interface::CLI::Output::TableRenderer.new
|
|
166
|
+
)
|
|
167
|
+
output = formatter.call(data: result.data, format: format, max_width: terminal_width)
|
|
168
|
+
print_stats_output(output, format: format, color_mode: color_mode)
|
|
169
|
+
|
|
170
|
+
0
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def parse_stats_args(args)
|
|
174
|
+
file_path = args[0]
|
|
175
|
+
format = "text"
|
|
176
|
+
color_mode = "auto"
|
|
177
|
+
index = 1
|
|
178
|
+
while index < args.length
|
|
179
|
+
arg = args[index]
|
|
180
|
+
if arg.start_with?("--format=")
|
|
181
|
+
format = arg.split("=", 2)[1]
|
|
182
|
+
elsif arg == "--format"
|
|
183
|
+
format = args[index + 1].to_s
|
|
184
|
+
index += 1
|
|
185
|
+
elsif arg.start_with?("--color=")
|
|
186
|
+
color_mode = arg.split("=", 2)[1]
|
|
187
|
+
elsif arg == "--color"
|
|
188
|
+
color_mode = args[index + 1].to_s
|
|
189
|
+
index += 1
|
|
190
|
+
end
|
|
191
|
+
index += 1
|
|
192
|
+
end
|
|
193
|
+
[file_path, format, color_mode]
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def print_stats_output(output, format:, color_mode:)
|
|
197
|
+
if format == "text"
|
|
198
|
+
policy = Interface::CLI::Output::ColorPolicy.new(mode: color_mode, io: @stdout, env: @env)
|
|
199
|
+
colorizer = Interface::CLI::Output::Colorizer.new(policy: policy)
|
|
200
|
+
text = apply_text_color(output, colorizer: colorizer)
|
|
201
|
+
@stdout.puts text
|
|
202
|
+
else
|
|
203
|
+
@stdout.puts output
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def apply_text_color(text, colorizer:)
|
|
208
|
+
text.lines.map do |line|
|
|
209
|
+
line = line.chomp
|
|
210
|
+
case line
|
|
211
|
+
when "CSV Stats Summary"
|
|
212
|
+
colorizer.call(line, code: "1;36")
|
|
213
|
+
when "Column completeness:"
|
|
214
|
+
colorizer.call(line, code: "1")
|
|
215
|
+
when /\A(Metric|Value|Column|Non-blank|Blank)(\s+\|.*)?\z/
|
|
216
|
+
colorizer.call(line, code: "1")
|
|
217
|
+
else
|
|
218
|
+
line
|
|
219
|
+
end
|
|
220
|
+
end.join("\n")
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def terminal_width
|
|
224
|
+
columns = @env["COLUMNS"].to_i
|
|
225
|
+
return columns if columns.positive?
|
|
226
|
+
return @stdout.winsize[1] if @stdout.respond_to?(:winsize)
|
|
227
|
+
|
|
228
|
+
80
|
|
229
|
+
end
|
|
106
230
|
end
|
|
107
231
|
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvStatsSession
|
|
6
|
+
class StatsSession
|
|
7
|
+
attr_reader :source, :options, :output_destination
|
|
8
|
+
|
|
9
|
+
def self.start(source:, options:)
|
|
10
|
+
new(source: source, options: options)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def initialize(source:, options:, output_destination: nil)
|
|
14
|
+
@source = source
|
|
15
|
+
@options = options
|
|
16
|
+
@output_destination = output_destination
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def with_output_destination(output_destination)
|
|
20
|
+
self.class.new(source: source, options: options, output_destination: output_destination)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module CsvStatsSession
|
|
6
|
+
class StatsSource
|
|
7
|
+
attr_reader :path, :separator, :headers_present
|
|
8
|
+
|
|
9
|
+
def initialize(path:, separator:, headers_present:)
|
|
10
|
+
@path = path
|
|
11
|
+
@separator = separator
|
|
12
|
+
@headers_present = headers_present
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module CSV
|
|
8
|
+
class CsvStatsScanner
|
|
9
|
+
def initialize(csv: ::CSV)
|
|
10
|
+
@csv = csv
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(file_path:, col_sep:, headers_present:)
|
|
14
|
+
data_row_count = 0
|
|
15
|
+
headers = nil
|
|
16
|
+
column_count = 0
|
|
17
|
+
column_stats = []
|
|
18
|
+
|
|
19
|
+
# Streaming scan: memory grows with per-column metrics, not row count.
|
|
20
|
+
@csv.foreach(file_path, headers: headers_present, col_sep: col_sep) do |row|
|
|
21
|
+
if headers_present
|
|
22
|
+
headers ||= row.headers
|
|
23
|
+
column_count = headers.length
|
|
24
|
+
if column_stats.empty?
|
|
25
|
+
column_stats = headers.map { |name| { name: name, blank_count: 0, non_blank_count: 0 } }
|
|
26
|
+
end
|
|
27
|
+
fields = row.fields
|
|
28
|
+
fields.fill(nil, fields.length...column_count)
|
|
29
|
+
fields.each_with_index { |value, index| apply_value(column_stats[index], value) }
|
|
30
|
+
data_row_count += 1
|
|
31
|
+
else
|
|
32
|
+
fields = row.is_a?(::CSV::Row) ? row.fields : row
|
|
33
|
+
column_count = [column_count, fields.length].max
|
|
34
|
+
while column_stats.length < column_count
|
|
35
|
+
column_stats << {
|
|
36
|
+
name: "column_#{column_stats.length + 1}",
|
|
37
|
+
blank_count: 0,
|
|
38
|
+
non_blank_count: 0
|
|
39
|
+
}
|
|
40
|
+
end
|
|
41
|
+
fields.fill(nil, fields.length...column_count)
|
|
42
|
+
fields.each_with_index { |value, index| apply_value(column_stats[index], value) }
|
|
43
|
+
data_row_count += 1
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
{
|
|
48
|
+
row_count: data_row_count,
|
|
49
|
+
column_count: column_count,
|
|
50
|
+
headers: headers,
|
|
51
|
+
column_stats: column_stats
|
|
52
|
+
}
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
def apply_value(stats, value)
|
|
58
|
+
if value.nil? || value.strip.empty?
|
|
59
|
+
stats[:blank_count] += 1
|
|
60
|
+
else
|
|
61
|
+
stats[:non_blank_count] += 1
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module Output
|
|
8
|
+
class CsvStatsFileWriter
|
|
9
|
+
def call(path:, data:)
|
|
10
|
+
::CSV.open(path, "w") do |csv|
|
|
11
|
+
csv << %w[metric value]
|
|
12
|
+
csv << ["row_count", data[:row_count]]
|
|
13
|
+
csv << ["column_count", data[:column_count]]
|
|
14
|
+
unless data[:headers].nil? || data[:headers].empty?
|
|
15
|
+
csv << ["headers", data[:headers].join("|")]
|
|
16
|
+
end
|
|
17
|
+
data.fetch(:column_stats, []).each do |stats|
|
|
18
|
+
csv << ["column.#{stats[:name]}.non_blank", stats[:non_blank_count]]
|
|
19
|
+
csv << ["column.#{stats[:name]}.blank", stats[:blank_count]]
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -4,9 +4,10 @@ module Csvtool
|
|
|
4
4
|
module Interface
|
|
5
5
|
module CLI
|
|
6
6
|
class MenuLoop
|
|
7
|
-
def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:, parity_action:, split_action:)
|
|
7
|
+
def initialize(stdin:, stdout:, stderr: stdout, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:, dedupe_action:, parity_action:, split_action:, stats_action:)
|
|
8
8
|
@stdin = stdin
|
|
9
9
|
@stdout = stdout
|
|
10
|
+
@stderr = stderr
|
|
10
11
|
@menu_options = menu_options
|
|
11
12
|
@extract_column_action = extract_column_action
|
|
12
13
|
@extract_rows_action = extract_rows_action
|
|
@@ -14,12 +15,13 @@ module Csvtool
|
|
|
14
15
|
@dedupe_action = dedupe_action
|
|
15
16
|
@parity_action = parity_action
|
|
16
17
|
@split_action = split_action
|
|
18
|
+
@stats_action = stats_action
|
|
17
19
|
end
|
|
18
20
|
|
|
19
21
|
def run
|
|
20
22
|
loop do
|
|
21
23
|
print_menu
|
|
22
|
-
@
|
|
24
|
+
@stderr.print "> "
|
|
23
25
|
choice = @stdin.gets
|
|
24
26
|
return 0 if choice.nil?
|
|
25
27
|
|
|
@@ -37,9 +39,11 @@ module Csvtool
|
|
|
37
39
|
when "6"
|
|
38
40
|
@split_action.call
|
|
39
41
|
when "7"
|
|
42
|
+
@stats_action.call
|
|
43
|
+
when "8"
|
|
40
44
|
return 0
|
|
41
45
|
else
|
|
42
|
-
@
|
|
46
|
+
@stderr.puts "Please choose 1, 2, 3, 4, 5, 6, 7, or 8."
|
|
43
47
|
end
|
|
44
48
|
end
|
|
45
49
|
end
|
|
@@ -47,9 +51,9 @@ module Csvtool
|
|
|
47
51
|
private
|
|
48
52
|
|
|
49
53
|
def print_menu
|
|
50
|
-
@
|
|
54
|
+
@stderr.puts "CSV Tool Menu"
|
|
51
55
|
@menu_options.each_with_index do |option, index|
|
|
52
|
-
@
|
|
56
|
+
@stderr.puts "#{index + 1}. #{option}"
|
|
53
57
|
end
|
|
54
58
|
end
|
|
55
59
|
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Output
|
|
7
|
+
class ColorPolicy
|
|
8
|
+
def initialize(mode:, io:, env: ENV)
|
|
9
|
+
@mode = mode
|
|
10
|
+
@io = io
|
|
11
|
+
@env = env
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def enabled?
|
|
15
|
+
return true if @mode == "always"
|
|
16
|
+
return false if @mode == "never"
|
|
17
|
+
return false if @env["NO_COLOR"]
|
|
18
|
+
|
|
19
|
+
@io.respond_to?(:tty?) && @io.tty?
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/interface/cli/output/color_policy"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Interface
|
|
7
|
+
module CLI
|
|
8
|
+
module Output
|
|
9
|
+
class Colorizer
|
|
10
|
+
def initialize(policy:)
|
|
11
|
+
@policy = policy
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def call(text, code:)
|
|
15
|
+
return text unless @policy.enabled?
|
|
16
|
+
|
|
17
|
+
"\e[#{code}m#{text}\e[0m"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def self.auto(io:, env: ENV)
|
|
21
|
+
new(policy: ColorPolicy.new(mode: "auto", io: io, env: env))
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Interface
|
|
7
|
+
module CLI
|
|
8
|
+
module Output
|
|
9
|
+
module Formatters
|
|
10
|
+
class CsvRowFormatter
|
|
11
|
+
def call(fields:, col_sep:)
|
|
12
|
+
::CSV.generate_line(fields, row_sep: "", col_sep: col_sep).chomp
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Csvtool
|
|
6
|
+
module Interface
|
|
7
|
+
module CLI
|
|
8
|
+
module Output
|
|
9
|
+
module Formatters
|
|
10
|
+
class StatsFormatter
|
|
11
|
+
def initialize(table_renderer:)
|
|
12
|
+
@table_renderer = table_renderer
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def call(data:, format:, max_width: 80)
|
|
16
|
+
case format
|
|
17
|
+
when "json"
|
|
18
|
+
JSON.generate(data)
|
|
19
|
+
when "csv"
|
|
20
|
+
csv_lines(data).join("\n")
|
|
21
|
+
else
|
|
22
|
+
text_lines(data, max_width: max_width).join("\n")
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def csv_lines(data)
|
|
29
|
+
lines = ["metric,value", "row_count,#{data[:row_count]}", "column_count,#{data[:column_count]}"]
|
|
30
|
+
lines << "headers,#{data[:headers].join('|')}" unless data[:headers].nil? || data[:headers].empty?
|
|
31
|
+
data.fetch(:column_stats, []).each do |stats|
|
|
32
|
+
lines << "column.#{stats[:name]}.non_blank,#{stats[:non_blank_count]}"
|
|
33
|
+
lines << "column.#{stats[:name]}.blank,#{stats[:blank_count]}"
|
|
34
|
+
end
|
|
35
|
+
lines
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def text_lines(data, max_width:)
|
|
39
|
+
lines = ["CSV Stats Summary"]
|
|
40
|
+
summary_rows = [["Rows", data[:row_count].to_s], ["Columns", data[:column_count].to_s]]
|
|
41
|
+
summary_rows << ["Headers", data[:headers].join(", ")] unless data[:headers].nil? || data[:headers].empty?
|
|
42
|
+
lines << @table_renderer.render(headers: ["Metric", "Value"], rows: summary_rows, max_width: max_width)
|
|
43
|
+
|
|
44
|
+
return lines if data[:column_stats].nil? || data[:column_stats].empty?
|
|
45
|
+
|
|
46
|
+
lines << ""
|
|
47
|
+
lines << "Column completeness:"
|
|
48
|
+
rows = data[:column_stats].map { |stats| [stats[:name], stats[:non_blank_count].to_s, stats[:blank_count].to_s] }
|
|
49
|
+
lines << @table_renderer.render(headers: ["Column", "Non-blank", "Blank"], rows: rows, max_width: max_width)
|
|
50
|
+
lines
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Output
|
|
7
|
+
class Streams
|
|
8
|
+
attr_reader :data, :ui
|
|
9
|
+
|
|
10
|
+
def self.build(data:, ui: data)
|
|
11
|
+
new(data: data, ui: ui)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def initialize(data:, ui:)
|
|
15
|
+
@data = data
|
|
16
|
+
@ui = ui
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Output
|
|
7
|
+
class TableRenderer
|
|
8
|
+
MIN_COLUMN_WIDTH = 4
|
|
9
|
+
|
|
10
|
+
def render(headers:, rows:, max_width: 80)
|
|
11
|
+
widths = compute_widths(headers, rows)
|
|
12
|
+
widths = fit_widths(widths, max_width)
|
|
13
|
+
|
|
14
|
+
lines = []
|
|
15
|
+
lines << format_row(headers, widths)
|
|
16
|
+
lines << separator(widths)
|
|
17
|
+
rows.each { |row| lines << format_row(row, widths) }
|
|
18
|
+
lines.join("\n")
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def compute_widths(headers, rows)
|
|
24
|
+
widths = headers.map { |header| header.to_s.length }
|
|
25
|
+
rows.each do |row|
|
|
26
|
+
row.each_with_index do |cell, index|
|
|
27
|
+
widths[index] = [widths[index], cell.to_s.length].max
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
widths
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def fit_widths(widths, max_width)
|
|
34
|
+
return widths if total_width(widths) <= max_width
|
|
35
|
+
|
|
36
|
+
adjusted = widths.dup
|
|
37
|
+
while total_width(adjusted) > max_width
|
|
38
|
+
index = adjusted.each_with_index.max_by { |width, _i| width }[1]
|
|
39
|
+
break if adjusted[index] <= MIN_COLUMN_WIDTH
|
|
40
|
+
|
|
41
|
+
adjusted[index] -= 1
|
|
42
|
+
end
|
|
43
|
+
adjusted
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def total_width(widths)
|
|
47
|
+
widths.sum + (3 * (widths.length - 1))
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def separator(widths)
|
|
51
|
+
widths.map { |width| "-" * width }.join("-+-")
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def format_row(row, widths)
|
|
55
|
+
row.each_with_index.map do |cell, index|
|
|
56
|
+
truncate(cell.to_s, widths[index]).ljust(widths[index])
|
|
57
|
+
end.join(" | ")
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def truncate(text, width)
|
|
61
|
+
return text if text.length <= width
|
|
62
|
+
return text[0, width] if width < MIN_COLUMN_WIDTH
|
|
63
|
+
|
|
64
|
+
"#{text[0, width - 3]}..."
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/domain/csv_stats_session/stats_source"
|
|
4
|
+
require "csvtool/domain/csv_stats_session/stats_options"
|
|
5
|
+
require "csvtool/domain/csv_stats_session/stats_session"
|
|
6
|
+
|
|
7
|
+
module Csvtool
|
|
8
|
+
module Interface
|
|
9
|
+
module CLI
|
|
10
|
+
module Workflows
|
|
11
|
+
module Builders
|
|
12
|
+
class CsvStatsSessionBuilder
|
|
13
|
+
def call(file_path:, col_sep:, headers_present:, destination:)
|
|
14
|
+
source = Domain::CsvStatsSession::StatsSource.new(
|
|
15
|
+
path: file_path,
|
|
16
|
+
separator: col_sep,
|
|
17
|
+
headers_present: headers_present
|
|
18
|
+
)
|
|
19
|
+
options = Domain::CsvStatsSession::StatsOptions.new
|
|
20
|
+
session = Domain::CsvStatsSession::StatsSession.start(source: source, options: options)
|
|
21
|
+
session.with_output_destination(destination)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|