csvops 0.7.0.alpha → 0.9.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +80 -20
  3. data/docs/architecture.md +67 -4
  4. data/docs/cli-output-conventions.md +49 -0
  5. data/docs/release-v0.8.0-alpha.md +88 -0
  6. data/docs/release-v0.9.0-alpha.md +80 -0
  7. data/lib/csvtool/application/use_cases/run_csv_stats.rb +64 -0
  8. data/lib/csvtool/cli.rb +136 -12
  9. data/lib/csvtool/domain/csv_stats_session/stats_options.rb +11 -0
  10. data/lib/csvtool/domain/csv_stats_session/stats_session.rb +25 -0
  11. data/lib/csvtool/domain/csv_stats_session/stats_source.rb +17 -0
  12. data/lib/csvtool/infrastructure/csv/csv_stats_scanner.rb +67 -0
  13. data/lib/csvtool/infrastructure/output/csv_stats_file_writer.rb +26 -0
  14. data/lib/csvtool/interface/cli/menu_loop.rb +9 -5
  15. data/lib/csvtool/interface/cli/output/color_policy.rb +25 -0
  16. data/lib/csvtool/interface/cli/output/colorizer.rb +27 -0
  17. data/lib/csvtool/interface/cli/output/formatters/csv_row_formatter.rb +19 -0
  18. data/lib/csvtool/interface/cli/output/formatters/stats_formatter.rb +57 -0
  19. data/lib/csvtool/interface/cli/output/streams.rb +22 -0
  20. data/lib/csvtool/interface/cli/output/table_renderer.rb +70 -0
  21. data/lib/csvtool/interface/cli/workflows/builders/csv_stats_session_builder.rb +28 -0
  22. data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +17 -5
  23. data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +15 -4
  24. data/lib/csvtool/interface/cli/workflows/presenters/csv_split_presenter.rb +15 -6
  25. data/lib/csvtool/interface/cli/workflows/presenters/csv_stats_presenter.rb +43 -0
  26. data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +5 -4
  27. data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +5 -4
  28. data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +9 -8
  29. data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +6 -5
  30. data/lib/csvtool/interface/cli/workflows/run_csv_split_workflow.rb +11 -10
  31. data/lib/csvtool/interface/cli/workflows/run_csv_stats_workflow.rb +78 -0
  32. data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +9 -8
  33. data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +7 -6
  34. data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +8 -7
  35. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step.rb +25 -0
  36. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step.rb +27 -0
  37. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step.rb +31 -0
  38. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/execute_step.rb +27 -0
  39. data/lib/csvtool/version.rb +1 -1
  40. data/test/csvtool/application/use_cases/run_csv_stats_test.rb +165 -0
  41. data/test/csvtool/cli_test.rb +376 -68
  42. data/test/csvtool/cli_unit_test.rb +5 -5
  43. data/test/csvtool/infrastructure/csv/csv_stats_scanner_test.rb +68 -0
  44. data/test/csvtool/infrastructure/output/csv_stats_file_writer_test.rb +38 -0
  45. data/test/csvtool/interface/cli/menu_loop_test.rb +34 -11
  46. data/test/csvtool/interface/cli/output/color_policy_test.rb +40 -0
  47. data/test/csvtool/interface/cli/output/colorizer_test.rb +28 -0
  48. data/test/csvtool/interface/cli/output/formatters/csv_row_formatter_test.rb +22 -0
  49. data/test/csvtool/interface/cli/output/formatters/stats_formatter_test.rb +51 -0
  50. data/test/csvtool/interface/cli/output/streams_test.rb +25 -0
  51. data/test/csvtool/interface/cli/output/table_renderer_test.rb +36 -0
  52. data/test/csvtool/interface/cli/workflows/builders/csv_stats_session_builder_test.rb +19 -0
  53. data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +4 -1
  54. data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +5 -1
  55. data/test/csvtool/interface/cli/workflows/presenters/csv_split_presenter_test.rb +22 -4
  56. data/test/csvtool/interface/cli/workflows/presenters/csv_stats_presenter_test.rb +39 -0
  57. data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +10 -7
  58. data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +3 -1
  59. data/test/csvtool/interface/cli/workflows/run_csv_split_workflow_test.rb +5 -3
  60. data/test/csvtool/interface/cli/workflows/run_csv_stats_workflow_test.rb +151 -0
  61. data/test/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step_test.rb +36 -0
  62. data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step_test.rb +49 -0
  63. data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step_test.rb +61 -0
  64. data/test/csvtool/interface/cli/workflows/steps/csv_stats/execute_step_test.rb +65 -0
  65. metadata +39 -1
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Workflows
7
+ module Steps
8
+ module CsvStats
9
+ class ExecuteStep
10
+ def call(context)
11
+ result = context.fetch(:use_case).call(session: context.fetch(:session))
12
+ unless result.ok?
13
+ context.fetch(:handle_error).call(result)
14
+ return :halt
15
+ end
16
+
17
+ context.fetch(:presenter).print_summary(result.data)
18
+ context.fetch(:presenter).print_file_written(result.data[:output_path]) if result.data[:output_path]
19
+ nil
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Csvtool
4
- VERSION = "0.7.0.alpha"
4
+ VERSION = "0.9.0.alpha"
5
5
  end
@@ -0,0 +1,165 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/application/use_cases/run_csv_stats"
5
+ require "csvtool/domain/csv_stats_session/stats_source"
6
+ require "csvtool/domain/csv_stats_session/stats_options"
7
+ require "csvtool/domain/csv_stats_session/stats_session"
8
+ require "csvtool/domain/shared/output_destination"
9
+ require "tmpdir"
10
+
11
+ class RunCsvStatsTest < Minitest::Test
12
+ def fixture_path(name)
13
+ File.expand_path("../../../fixtures/#{name}", __dir__)
14
+ end
15
+
16
+ def test_returns_core_stats_summary
17
+ source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
18
+ path: fixture_path("sample_people.csv"),
19
+ separator: ",",
20
+ headers_present: true
21
+ )
22
+ session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
23
+ source: source,
24
+ options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
25
+ )
26
+
27
+ result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
28
+
29
+ assert result.ok?
30
+ assert_equal 3, result.data[:row_count]
31
+ assert_equal 2, result.data[:column_count]
32
+ assert_equal ["name", "city"], result.data[:headers]
33
+ assert_equal [
34
+ { name: "name", blank_count: 0, non_blank_count: 3 },
35
+ { name: "city", blank_count: 0, non_blank_count: 3 }
36
+ ], result.data[:column_stats]
37
+ end
38
+
39
+ def test_supports_tsv_separator
40
+ source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
41
+ path: fixture_path("sample_people.tsv"),
42
+ separator: "\t",
43
+ headers_present: true
44
+ )
45
+ session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
46
+ source: source,
47
+ options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
48
+ )
49
+
50
+ result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
51
+
52
+ assert result.ok?
53
+ assert_equal 3, result.data[:row_count]
54
+ assert_equal 2, result.data[:column_count]
55
+ assert_equal ["name", "city"], result.data[:headers]
56
+ end
57
+
58
+ def test_supports_headerless_mode
59
+ source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
60
+ path: fixture_path("sample_people_no_headers.csv"),
61
+ separator: ",",
62
+ headers_present: false
63
+ )
64
+ session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
65
+ source: source,
66
+ options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
67
+ )
68
+
69
+ result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
70
+
71
+ assert result.ok?
72
+ assert_equal 3, result.data[:row_count]
73
+ assert_equal 2, result.data[:column_count]
74
+ assert_nil result.data[:headers]
75
+ assert_equal [
76
+ { name: "column_1", blank_count: 0, non_blank_count: 3 },
77
+ { name: "column_2", blank_count: 0, non_blank_count: 3 }
78
+ ], result.data[:column_stats]
79
+ end
80
+
81
+ def test_supports_custom_separator
82
+ source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
83
+ path: fixture_path("sample_people_colon.txt"),
84
+ separator: ":",
85
+ headers_present: true
86
+ )
87
+ session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
88
+ source: source,
89
+ options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
90
+ )
91
+
92
+ result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
93
+
94
+ assert result.ok?
95
+ assert_equal 3, result.data[:row_count]
96
+ assert_equal 2, result.data[:column_count]
97
+ assert_equal ["name", "city"], result.data[:headers]
98
+ end
99
+
100
+ def test_computes_blank_and_non_blank_counts
101
+ source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
102
+ path: fixture_path("sample_people_blanks.csv"),
103
+ separator: ",",
104
+ headers_present: true
105
+ )
106
+ session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
107
+ source: source,
108
+ options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
109
+ )
110
+
111
+ result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
112
+
113
+ assert result.ok?
114
+ assert_equal [
115
+ { name: "name", blank_count: 2, non_blank_count: 3 },
116
+ { name: "city", blank_count: 1, non_blank_count: 4 }
117
+ ], result.data[:column_stats]
118
+ end
119
+
120
+ def test_writes_stats_to_file_when_file_output_selected
121
+ Dir.mktmpdir do |dir|
122
+ source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
123
+ path: fixture_path("sample_people.csv"),
124
+ separator: ",",
125
+ headers_present: true
126
+ )
127
+ session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
128
+ source: source,
129
+ options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
130
+ ).with_output_destination(Csvtool::Domain::Shared::OutputDestination.file(path: File.join(dir, "stats.csv")))
131
+
132
+ result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
133
+
134
+ assert result.ok?
135
+ assert_equal session.output_destination.path, result.data[:output_path]
136
+ csv_text = File.read(session.output_destination.path)
137
+ assert_includes csv_text, "metric,value"
138
+ assert_includes csv_text, "row_count,3"
139
+ assert_includes csv_text, "column_count,2"
140
+ end
141
+ end
142
+
143
+ def test_returns_cannot_write_output_file_when_writer_fails
144
+ source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
145
+ path: fixture_path("sample_people.csv"),
146
+ separator: ",",
147
+ headers_present: true
148
+ )
149
+ session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
150
+ source: source,
151
+ options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
152
+ ).with_output_destination(Csvtool::Domain::Shared::OutputDestination.file(path: "/tmp/out.csv"))
153
+ writer = Object.new
154
+ def writer.call(path:, data:)
155
+ raise Errno::EACCES, path
156
+ end
157
+
158
+ result = Csvtool::Application::UseCases::RunCsvStats.new(csv_stats_file_writer: writer).call(session: session)
159
+
160
+ refute result.ok?
161
+ assert_equal :cannot_write_output_file, result.error
162
+ assert_equal "/tmp/out.csv", result.data[:path]
163
+ assert_equal Errno::EACCES, result.data[:error_class]
164
+ end
165
+ end