csvops 0.7.0.alpha → 0.9.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +80 -20
- data/docs/architecture.md +67 -4
- data/docs/cli-output-conventions.md +49 -0
- data/docs/release-v0.8.0-alpha.md +88 -0
- data/docs/release-v0.9.0-alpha.md +80 -0
- data/lib/csvtool/application/use_cases/run_csv_stats.rb +64 -0
- data/lib/csvtool/cli.rb +136 -12
- data/lib/csvtool/domain/csv_stats_session/stats_options.rb +11 -0
- data/lib/csvtool/domain/csv_stats_session/stats_session.rb +25 -0
- data/lib/csvtool/domain/csv_stats_session/stats_source.rb +17 -0
- data/lib/csvtool/infrastructure/csv/csv_stats_scanner.rb +67 -0
- data/lib/csvtool/infrastructure/output/csv_stats_file_writer.rb +26 -0
- data/lib/csvtool/interface/cli/menu_loop.rb +9 -5
- data/lib/csvtool/interface/cli/output/color_policy.rb +25 -0
- data/lib/csvtool/interface/cli/output/colorizer.rb +27 -0
- data/lib/csvtool/interface/cli/output/formatters/csv_row_formatter.rb +19 -0
- data/lib/csvtool/interface/cli/output/formatters/stats_formatter.rb +57 -0
- data/lib/csvtool/interface/cli/output/streams.rb +22 -0
- data/lib/csvtool/interface/cli/output/table_renderer.rb +70 -0
- data/lib/csvtool/interface/cli/workflows/builders/csv_stats_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +17 -5
- data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +15 -4
- data/lib/csvtool/interface/cli/workflows/presenters/csv_split_presenter.rb +15 -6
- data/lib/csvtool/interface/cli/workflows/presenters/csv_stats_presenter.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +5 -4
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +5 -4
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +9 -8
- data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +6 -5
- data/lib/csvtool/interface/cli/workflows/run_csv_split_workflow.rb +11 -10
- data/lib/csvtool/interface/cli/workflows/run_csv_stats_workflow.rb +78 -0
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +9 -8
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +7 -6
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +8 -7
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step.rb +27 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step.rb +31 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/execute_step.rb +27 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/run_csv_stats_test.rb +165 -0
- data/test/csvtool/cli_test.rb +376 -68
- data/test/csvtool/cli_unit_test.rb +5 -5
- data/test/csvtool/infrastructure/csv/csv_stats_scanner_test.rb +68 -0
- data/test/csvtool/infrastructure/output/csv_stats_file_writer_test.rb +38 -0
- data/test/csvtool/interface/cli/menu_loop_test.rb +34 -11
- data/test/csvtool/interface/cli/output/color_policy_test.rb +40 -0
- data/test/csvtool/interface/cli/output/colorizer_test.rb +28 -0
- data/test/csvtool/interface/cli/output/formatters/csv_row_formatter_test.rb +22 -0
- data/test/csvtool/interface/cli/output/formatters/stats_formatter_test.rb +51 -0
- data/test/csvtool/interface/cli/output/streams_test.rb +25 -0
- data/test/csvtool/interface/cli/output/table_renderer_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/csv_stats_session_builder_test.rb +19 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +4 -1
- data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +5 -1
- data/test/csvtool/interface/cli/workflows/presenters/csv_split_presenter_test.rb +22 -4
- data/test/csvtool/interface/cli/workflows/presenters/csv_stats_presenter_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +10 -7
- data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +3 -1
- data/test/csvtool/interface/cli/workflows/run_csv_split_workflow_test.rb +5 -3
- data/test/csvtool/interface/cli/workflows/run_csv_stats_workflow_test.rb +151 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step_test.rb +49 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step_test.rb +61 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/execute_step_test.rb +65 -0
- metadata +39 -1
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Workflows
|
|
7
|
+
module Steps
|
|
8
|
+
module CsvStats
|
|
9
|
+
class ExecuteStep
|
|
10
|
+
def call(context)
|
|
11
|
+
result = context.fetch(:use_case).call(session: context.fetch(:session))
|
|
12
|
+
unless result.ok?
|
|
13
|
+
context.fetch(:handle_error).call(result)
|
|
14
|
+
return :halt
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
context.fetch(:presenter).print_summary(result.data)
|
|
18
|
+
context.fetch(:presenter).print_file_written(result.data[:output_path]) if result.data[:output_path]
|
|
19
|
+
nil
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
data/lib/csvtool/version.rb
CHANGED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/application/use_cases/run_csv_stats"
|
|
5
|
+
require "csvtool/domain/csv_stats_session/stats_source"
|
|
6
|
+
require "csvtool/domain/csv_stats_session/stats_options"
|
|
7
|
+
require "csvtool/domain/csv_stats_session/stats_session"
|
|
8
|
+
require "csvtool/domain/shared/output_destination"
|
|
9
|
+
require "tmpdir"
|
|
10
|
+
|
|
11
|
+
class RunCsvStatsTest < Minitest::Test
|
|
12
|
+
def fixture_path(name)
|
|
13
|
+
File.expand_path("../../../fixtures/#{name}", __dir__)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_returns_core_stats_summary
|
|
17
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
18
|
+
path: fixture_path("sample_people.csv"),
|
|
19
|
+
separator: ",",
|
|
20
|
+
headers_present: true
|
|
21
|
+
)
|
|
22
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
23
|
+
source: source,
|
|
24
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
28
|
+
|
|
29
|
+
assert result.ok?
|
|
30
|
+
assert_equal 3, result.data[:row_count]
|
|
31
|
+
assert_equal 2, result.data[:column_count]
|
|
32
|
+
assert_equal ["name", "city"], result.data[:headers]
|
|
33
|
+
assert_equal [
|
|
34
|
+
{ name: "name", blank_count: 0, non_blank_count: 3 },
|
|
35
|
+
{ name: "city", blank_count: 0, non_blank_count: 3 }
|
|
36
|
+
], result.data[:column_stats]
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def test_supports_tsv_separator
|
|
40
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
41
|
+
path: fixture_path("sample_people.tsv"),
|
|
42
|
+
separator: "\t",
|
|
43
|
+
headers_present: true
|
|
44
|
+
)
|
|
45
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
46
|
+
source: source,
|
|
47
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
51
|
+
|
|
52
|
+
assert result.ok?
|
|
53
|
+
assert_equal 3, result.data[:row_count]
|
|
54
|
+
assert_equal 2, result.data[:column_count]
|
|
55
|
+
assert_equal ["name", "city"], result.data[:headers]
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def test_supports_headerless_mode
|
|
59
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
60
|
+
path: fixture_path("sample_people_no_headers.csv"),
|
|
61
|
+
separator: ",",
|
|
62
|
+
headers_present: false
|
|
63
|
+
)
|
|
64
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
65
|
+
source: source,
|
|
66
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
70
|
+
|
|
71
|
+
assert result.ok?
|
|
72
|
+
assert_equal 3, result.data[:row_count]
|
|
73
|
+
assert_equal 2, result.data[:column_count]
|
|
74
|
+
assert_nil result.data[:headers]
|
|
75
|
+
assert_equal [
|
|
76
|
+
{ name: "column_1", blank_count: 0, non_blank_count: 3 },
|
|
77
|
+
{ name: "column_2", blank_count: 0, non_blank_count: 3 }
|
|
78
|
+
], result.data[:column_stats]
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def test_supports_custom_separator
|
|
82
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
83
|
+
path: fixture_path("sample_people_colon.txt"),
|
|
84
|
+
separator: ":",
|
|
85
|
+
headers_present: true
|
|
86
|
+
)
|
|
87
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
88
|
+
source: source,
|
|
89
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
93
|
+
|
|
94
|
+
assert result.ok?
|
|
95
|
+
assert_equal 3, result.data[:row_count]
|
|
96
|
+
assert_equal 2, result.data[:column_count]
|
|
97
|
+
assert_equal ["name", "city"], result.data[:headers]
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def test_computes_blank_and_non_blank_counts
|
|
101
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
102
|
+
path: fixture_path("sample_people_blanks.csv"),
|
|
103
|
+
separator: ",",
|
|
104
|
+
headers_present: true
|
|
105
|
+
)
|
|
106
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
107
|
+
source: source,
|
|
108
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
112
|
+
|
|
113
|
+
assert result.ok?
|
|
114
|
+
assert_equal [
|
|
115
|
+
{ name: "name", blank_count: 2, non_blank_count: 3 },
|
|
116
|
+
{ name: "city", blank_count: 1, non_blank_count: 4 }
|
|
117
|
+
], result.data[:column_stats]
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def test_writes_stats_to_file_when_file_output_selected
|
|
121
|
+
Dir.mktmpdir do |dir|
|
|
122
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
123
|
+
path: fixture_path("sample_people.csv"),
|
|
124
|
+
separator: ",",
|
|
125
|
+
headers_present: true
|
|
126
|
+
)
|
|
127
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
128
|
+
source: source,
|
|
129
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
130
|
+
).with_output_destination(Csvtool::Domain::Shared::OutputDestination.file(path: File.join(dir, "stats.csv")))
|
|
131
|
+
|
|
132
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
133
|
+
|
|
134
|
+
assert result.ok?
|
|
135
|
+
assert_equal session.output_destination.path, result.data[:output_path]
|
|
136
|
+
csv_text = File.read(session.output_destination.path)
|
|
137
|
+
assert_includes csv_text, "metric,value"
|
|
138
|
+
assert_includes csv_text, "row_count,3"
|
|
139
|
+
assert_includes csv_text, "column_count,2"
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def test_returns_cannot_write_output_file_when_writer_fails
|
|
144
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
145
|
+
path: fixture_path("sample_people.csv"),
|
|
146
|
+
separator: ",",
|
|
147
|
+
headers_present: true
|
|
148
|
+
)
|
|
149
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
150
|
+
source: source,
|
|
151
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
152
|
+
).with_output_destination(Csvtool::Domain::Shared::OutputDestination.file(path: "/tmp/out.csv"))
|
|
153
|
+
writer = Object.new
|
|
154
|
+
def writer.call(path:, data:)
|
|
155
|
+
raise Errno::EACCES, path
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new(csv_stats_file_writer: writer).call(session: session)
|
|
159
|
+
|
|
160
|
+
refute result.ok?
|
|
161
|
+
assert_equal :cannot_write_output_file, result.error
|
|
162
|
+
assert_equal "/tmp/out.csv", result.data[:path]
|
|
163
|
+
assert_equal Errno::EACCES, result.data[:error_class]
|
|
164
|
+
end
|
|
165
|
+
end
|