csvops 0.7.0.alpha → 0.8.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +61 -21
- data/docs/architecture.md +64 -4
- data/docs/release-v0.8.0-alpha.md +88 -0
- data/lib/csvtool/application/use_cases/run_csv_stats.rb +64 -0
- data/lib/csvtool/cli.rb +5 -1
- data/lib/csvtool/domain/csv_stats_session/stats_options.rb +11 -0
- data/lib/csvtool/domain/csv_stats_session/stats_session.rb +25 -0
- data/lib/csvtool/domain/csv_stats_session/stats_source.rb +17 -0
- data/lib/csvtool/infrastructure/csv/csv_stats_scanner.rb +67 -0
- data/lib/csvtool/infrastructure/output/csv_stats_file_writer.rb +26 -0
- data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
- data/lib/csvtool/interface/cli/workflows/builders/csv_stats_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/csv_stats_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_csv_stats_workflow.rb +77 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step.rb +27 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step.rb +31 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/execute_step.rb +27 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/run_csv_stats_test.rb +165 -0
- data/test/csvtool/cli_test.rb +93 -30
- data/test/csvtool/infrastructure/csv/csv_stats_scanner_test.rb +68 -0
- data/test/csvtool/infrastructure/output/csv_stats_file_writer_test.rb +38 -0
- data/test/csvtool/interface/cli/menu_loop_test.rb +34 -11
- data/test/csvtool/interface/cli/workflows/builders/csv_stats_session_builder_test.rb +19 -0
- data/test/csvtool/interface/cli/workflows/presenters/csv_stats_presenter_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/run_csv_stats_workflow_test.rb +146 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step_test.rb +49 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step_test.rb +61 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/execute_step_test.rb +65 -0
- metadata +25 -1
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Presenters
|
|
8
|
+
class CsvStatsPresenter
|
|
9
|
+
def initialize(stdout:)
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def print_summary(data)
|
|
14
|
+
@stdout.puts "CSV Stats Summary"
|
|
15
|
+
@stdout.puts "Rows: #{data[:row_count]}"
|
|
16
|
+
@stdout.puts "Columns: #{data[:column_count]}"
|
|
17
|
+
@stdout.puts "Headers: #{data[:headers].join(', ')}" unless data[:headers].nil? || data[:headers].empty?
|
|
18
|
+
return if data[:column_stats].nil? || data[:column_stats].empty?
|
|
19
|
+
|
|
20
|
+
@stdout.puts "Column completeness:"
|
|
21
|
+
data[:column_stats].each do |stats|
|
|
22
|
+
@stdout.puts " #{stats[:name]}: non_blank=#{stats[:non_blank_count]} blank=#{stats[:blank_count]}"
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def print_file_written(path)
|
|
27
|
+
@stdout.puts "Wrote output to #{path}"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csvtool/application/use_cases/run_csv_stats"
|
|
4
|
+
require "csvtool/interface/cli/errors/presenter"
|
|
5
|
+
require "csvtool/interface/cli/prompts/file_path_prompt"
|
|
6
|
+
require "csvtool/interface/cli/prompts/separator_prompt"
|
|
7
|
+
require "csvtool/interface/cli/prompts/headers_present_prompt"
|
|
8
|
+
require "csvtool/interface/cli/prompts/output_destination_prompt"
|
|
9
|
+
require "csvtool/interface/cli/workflows/builders/csv_stats_session_builder"
|
|
10
|
+
require "csvtool/interface/cli/workflows/presenters/csv_stats_presenter"
|
|
11
|
+
require "csvtool/interface/cli/workflows/support/result_error_handler"
|
|
12
|
+
require "csvtool/interface/cli/workflows/support/output_destination_mapper"
|
|
13
|
+
require "csvtool/interface/cli/workflows/steps/workflow_step_pipeline"
|
|
14
|
+
require "csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step"
|
|
15
|
+
require "csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step"
|
|
16
|
+
require "csvtool/interface/cli/workflows/steps/csv_stats/build_session_step"
|
|
17
|
+
require "csvtool/interface/cli/workflows/steps/csv_stats/execute_step"
|
|
18
|
+
|
|
19
|
+
module Csvtool
|
|
20
|
+
module Interface
|
|
21
|
+
module CLI
|
|
22
|
+
module Workflows
|
|
23
|
+
class RunCsvStatsWorkflow
|
|
24
|
+
def initialize(stdin:, stdout:, use_case: Application::UseCases::RunCsvStats.new)
|
|
25
|
+
@stdin = stdin
|
|
26
|
+
@stdout = stdout
|
|
27
|
+
@use_case = use_case
|
|
28
|
+
@errors = Interface::CLI::Errors::Presenter.new(stdout: stdout)
|
|
29
|
+
@session_builder = Builders::CsvStatsSessionBuilder.new
|
|
30
|
+
@presenter = Presenters::CsvStatsPresenter.new(stdout: stdout)
|
|
31
|
+
@output_destination_mapper = Support::OutputDestinationMapper.new
|
|
32
|
+
@result_error_handler = Support::ResultErrorHandler.new(errors: @errors)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def call
|
|
36
|
+
context = {
|
|
37
|
+
use_case: @use_case,
|
|
38
|
+
session_builder: @session_builder,
|
|
39
|
+
output_destination_mapper: @output_destination_mapper,
|
|
40
|
+
presenter: @presenter,
|
|
41
|
+
handle_error: method(:handle_error)
|
|
42
|
+
}
|
|
43
|
+
pipeline = Steps::WorkflowStepPipeline.new(steps: [
|
|
44
|
+
Steps::CsvStats::CollectInputsStep.new(
|
|
45
|
+
file_path_prompt: Interface::CLI::Prompts::FilePathPrompt.new(stdin: @stdin, stdout: @stdout),
|
|
46
|
+
separator_prompt: Interface::CLI::Prompts::SeparatorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors),
|
|
47
|
+
headers_present_prompt: Interface::CLI::Prompts::HeadersPresentPrompt.new(stdin: @stdin, stdout: @stdout)
|
|
48
|
+
),
|
|
49
|
+
Steps::CsvStats::CollectDestinationStep.new(
|
|
50
|
+
output_destination_prompt: Interface::CLI::Prompts::OutputDestinationPrompt.new(
|
|
51
|
+
stdin: @stdin,
|
|
52
|
+
stdout: @stdout,
|
|
53
|
+
errors: @errors
|
|
54
|
+
)
|
|
55
|
+
),
|
|
56
|
+
Steps::CsvStats::BuildSessionStep.new,
|
|
57
|
+
Steps::CsvStats::ExecuteStep.new
|
|
58
|
+
])
|
|
59
|
+
pipeline.call(context)
|
|
60
|
+
nil
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
def handle_error(result)
|
|
66
|
+
@result_error_handler.call(result, {
|
|
67
|
+
file_not_found: ->(r, errors) { errors.file_not_found(r.data[:path]) },
|
|
68
|
+
could_not_parse_csv: ->(_r, errors) { errors.could_not_parse_csv },
|
|
69
|
+
cannot_read_file: ->(r, errors) { errors.cannot_read_file(r.data[:path]) },
|
|
70
|
+
cannot_write_output_file: ->(r, errors) { errors.cannot_write_output_file(r.data[:path], r.data[:error_class]) }
|
|
71
|
+
})
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Steps
|
|
8
|
+
module CsvStats
|
|
9
|
+
class BuildSessionStep
|
|
10
|
+
def call(context)
|
|
11
|
+
context[:session] = context.fetch(:session_builder).call(
|
|
12
|
+
file_path: context.fetch(:file_path),
|
|
13
|
+
col_sep: context.fetch(:col_sep),
|
|
14
|
+
headers_present: context.fetch(:headers_present),
|
|
15
|
+
destination: context.fetch(:output_destination)
|
|
16
|
+
)
|
|
17
|
+
nil
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Steps
|
|
8
|
+
module CsvStats
|
|
9
|
+
class CollectDestinationStep
|
|
10
|
+
def initialize(output_destination_prompt:)
|
|
11
|
+
@output_destination_prompt = output_destination_prompt
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def call(context)
|
|
15
|
+
output_destination = @output_destination_prompt.call
|
|
16
|
+
return :halt if output_destination.nil?
|
|
17
|
+
|
|
18
|
+
context[:output_destination] = context.fetch(:output_destination_mapper).call(output_destination)
|
|
19
|
+
nil
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Steps
|
|
8
|
+
module CsvStats
|
|
9
|
+
class CollectInputsStep
|
|
10
|
+
def initialize(file_path_prompt:, separator_prompt:, headers_present_prompt:)
|
|
11
|
+
@file_path_prompt = file_path_prompt
|
|
12
|
+
@separator_prompt = separator_prompt
|
|
13
|
+
@headers_present_prompt = headers_present_prompt
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def call(context)
|
|
17
|
+
context[:file_path] = @file_path_prompt.call(label: "CSV file path: ")
|
|
18
|
+
col_sep = @separator_prompt.call
|
|
19
|
+
return :halt if col_sep.nil?
|
|
20
|
+
|
|
21
|
+
context[:col_sep] = col_sep
|
|
22
|
+
context[:headers_present] = @headers_present_prompt.call
|
|
23
|
+
nil
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Steps
|
|
8
|
+
module CsvStats
|
|
9
|
+
class ExecuteStep
|
|
10
|
+
def call(context)
|
|
11
|
+
result = context.fetch(:use_case).call(session: context.fetch(:session))
|
|
12
|
+
unless result.ok?
|
|
13
|
+
context.fetch(:handle_error).call(result)
|
|
14
|
+
return :halt
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
context.fetch(:presenter).print_summary(result.data)
|
|
18
|
+
context.fetch(:presenter).print_file_written(result.data[:output_path]) if result.data[:output_path]
|
|
19
|
+
nil
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
data/lib/csvtool/version.rb
CHANGED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/application/use_cases/run_csv_stats"
|
|
5
|
+
require "csvtool/domain/csv_stats_session/stats_source"
|
|
6
|
+
require "csvtool/domain/csv_stats_session/stats_options"
|
|
7
|
+
require "csvtool/domain/csv_stats_session/stats_session"
|
|
8
|
+
require "csvtool/domain/shared/output_destination"
|
|
9
|
+
require "tmpdir"
|
|
10
|
+
|
|
11
|
+
class RunCsvStatsTest < Minitest::Test
|
|
12
|
+
def fixture_path(name)
|
|
13
|
+
File.expand_path("../../../fixtures/#{name}", __dir__)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_returns_core_stats_summary
|
|
17
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
18
|
+
path: fixture_path("sample_people.csv"),
|
|
19
|
+
separator: ",",
|
|
20
|
+
headers_present: true
|
|
21
|
+
)
|
|
22
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
23
|
+
source: source,
|
|
24
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
28
|
+
|
|
29
|
+
assert result.ok?
|
|
30
|
+
assert_equal 3, result.data[:row_count]
|
|
31
|
+
assert_equal 2, result.data[:column_count]
|
|
32
|
+
assert_equal ["name", "city"], result.data[:headers]
|
|
33
|
+
assert_equal [
|
|
34
|
+
{ name: "name", blank_count: 0, non_blank_count: 3 },
|
|
35
|
+
{ name: "city", blank_count: 0, non_blank_count: 3 }
|
|
36
|
+
], result.data[:column_stats]
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def test_supports_tsv_separator
|
|
40
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
41
|
+
path: fixture_path("sample_people.tsv"),
|
|
42
|
+
separator: "\t",
|
|
43
|
+
headers_present: true
|
|
44
|
+
)
|
|
45
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
46
|
+
source: source,
|
|
47
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
51
|
+
|
|
52
|
+
assert result.ok?
|
|
53
|
+
assert_equal 3, result.data[:row_count]
|
|
54
|
+
assert_equal 2, result.data[:column_count]
|
|
55
|
+
assert_equal ["name", "city"], result.data[:headers]
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def test_supports_headerless_mode
|
|
59
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
60
|
+
path: fixture_path("sample_people_no_headers.csv"),
|
|
61
|
+
separator: ",",
|
|
62
|
+
headers_present: false
|
|
63
|
+
)
|
|
64
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
65
|
+
source: source,
|
|
66
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
70
|
+
|
|
71
|
+
assert result.ok?
|
|
72
|
+
assert_equal 3, result.data[:row_count]
|
|
73
|
+
assert_equal 2, result.data[:column_count]
|
|
74
|
+
assert_nil result.data[:headers]
|
|
75
|
+
assert_equal [
|
|
76
|
+
{ name: "column_1", blank_count: 0, non_blank_count: 3 },
|
|
77
|
+
{ name: "column_2", blank_count: 0, non_blank_count: 3 }
|
|
78
|
+
], result.data[:column_stats]
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def test_supports_custom_separator
|
|
82
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
83
|
+
path: fixture_path("sample_people_colon.txt"),
|
|
84
|
+
separator: ":",
|
|
85
|
+
headers_present: true
|
|
86
|
+
)
|
|
87
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
88
|
+
source: source,
|
|
89
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
93
|
+
|
|
94
|
+
assert result.ok?
|
|
95
|
+
assert_equal 3, result.data[:row_count]
|
|
96
|
+
assert_equal 2, result.data[:column_count]
|
|
97
|
+
assert_equal ["name", "city"], result.data[:headers]
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def test_computes_blank_and_non_blank_counts
|
|
101
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
102
|
+
path: fixture_path("sample_people_blanks.csv"),
|
|
103
|
+
separator: ",",
|
|
104
|
+
headers_present: true
|
|
105
|
+
)
|
|
106
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
107
|
+
source: source,
|
|
108
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
112
|
+
|
|
113
|
+
assert result.ok?
|
|
114
|
+
assert_equal [
|
|
115
|
+
{ name: "name", blank_count: 2, non_blank_count: 3 },
|
|
116
|
+
{ name: "city", blank_count: 1, non_blank_count: 4 }
|
|
117
|
+
], result.data[:column_stats]
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def test_writes_stats_to_file_when_file_output_selected
|
|
121
|
+
Dir.mktmpdir do |dir|
|
|
122
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
123
|
+
path: fixture_path("sample_people.csv"),
|
|
124
|
+
separator: ",",
|
|
125
|
+
headers_present: true
|
|
126
|
+
)
|
|
127
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
128
|
+
source: source,
|
|
129
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
130
|
+
).with_output_destination(Csvtool::Domain::Shared::OutputDestination.file(path: File.join(dir, "stats.csv")))
|
|
131
|
+
|
|
132
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
133
|
+
|
|
134
|
+
assert result.ok?
|
|
135
|
+
assert_equal session.output_destination.path, result.data[:output_path]
|
|
136
|
+
csv_text = File.read(session.output_destination.path)
|
|
137
|
+
assert_includes csv_text, "metric,value"
|
|
138
|
+
assert_includes csv_text, "row_count,3"
|
|
139
|
+
assert_includes csv_text, "column_count,2"
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def test_returns_cannot_write_output_file_when_writer_fails
|
|
144
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
145
|
+
path: fixture_path("sample_people.csv"),
|
|
146
|
+
separator: ",",
|
|
147
|
+
headers_present: true
|
|
148
|
+
)
|
|
149
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
150
|
+
source: source,
|
|
151
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
152
|
+
).with_output_destination(Csvtool::Domain::Shared::OutputDestination.file(path: "/tmp/out.csv"))
|
|
153
|
+
writer = Object.new
|
|
154
|
+
def writer.call(path:, data:)
|
|
155
|
+
raise Errno::EACCES, path
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new(csv_stats_file_writer: writer).call(session: session)
|
|
159
|
+
|
|
160
|
+
refute result.ok?
|
|
161
|
+
assert_equal :cannot_write_output_file, result.error
|
|
162
|
+
assert_equal "/tmp/out.csv", result.data[:path]
|
|
163
|
+
assert_equal Errno::EACCES, result.data[:error_class]
|
|
164
|
+
end
|
|
165
|
+
end
|