csvops 0.6.0.alpha → 0.8.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +103 -24
- data/docs/architecture.md +121 -4
- data/docs/release-v0.7.0-alpha.md +87 -0
- data/docs/release-v0.8.0-alpha.md +88 -0
- data/lib/csvtool/application/use_cases/run_csv_split.rb +97 -0
- data/lib/csvtool/application/use_cases/run_csv_stats.rb +64 -0
- data/lib/csvtool/cli.rb +9 -1
- data/lib/csvtool/domain/csv_split_session/split_options.rb +27 -0
- data/lib/csvtool/domain/csv_split_session/split_session.rb +20 -0
- data/lib/csvtool/domain/csv_split_session/split_source.rb +17 -0
- data/lib/csvtool/domain/csv_stats_session/stats_options.rb +11 -0
- data/lib/csvtool/domain/csv_stats_session/stats_session.rb +25 -0
- data/lib/csvtool/domain/csv_stats_session/stats_source.rb +17 -0
- data/lib/csvtool/infrastructure/csv/csv_splitter.rb +64 -0
- data/lib/csvtool/infrastructure/csv/csv_stats_scanner.rb +67 -0
- data/lib/csvtool/infrastructure/output/csv_split_manifest_writer.rb +20 -0
- data/lib/csvtool/infrastructure/output/csv_stats_file_writer.rb +26 -0
- data/lib/csvtool/interface/cli/errors/presenter.rb +8 -0
- data/lib/csvtool/interface/cli/menu_loop.rb +8 -2
- data/lib/csvtool/interface/cli/prompts/chunk_size_prompt.rb +21 -0
- data/lib/csvtool/interface/cli/prompts/split_manifest_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/split_output_prompt.rb +38 -0
- data/lib/csvtool/interface/cli/workflows/builders/csv_split_session_builder.rb +44 -0
- data/lib/csvtool/interface/cli/workflows/builders/csv_stats_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/csv_split_presenter.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/presenters/csv_stats_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_csv_split_workflow.rb +89 -0
- data/lib/csvtool/interface/cli/workflows/run_csv_stats_workflow.rb +77 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/build_session_step.rb +30 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step.rb +30 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step.rb +31 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_split/execute_step.rb +36 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step.rb +27 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step.rb +31 -0
- data/lib/csvtool/interface/cli/workflows/steps/csv_stats/execute_step.rb +27 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/run_csv_split_test.rb +124 -0
- data/test/csvtool/application/use_cases/run_csv_stats_test.rb +165 -0
- data/test/csvtool/cli_test.rb +139 -29
- data/test/csvtool/infrastructure/csv/csv_splitter_test.rb +68 -0
- data/test/csvtool/infrastructure/csv/csv_stats_scanner_test.rb +68 -0
- data/test/csvtool/infrastructure/output/csv_split_manifest_writer_test.rb +25 -0
- data/test/csvtool/infrastructure/output/csv_stats_file_writer_test.rb +38 -0
- data/test/csvtool/interface/cli/menu_loop_test.rb +104 -130
- data/test/csvtool/interface/cli/prompts/chunk_size_prompt_test.rb +17 -0
- data/test/csvtool/interface/cli/prompts/split_manifest_prompt_test.rb +42 -0
- data/test/csvtool/interface/cli/prompts/split_output_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/csv_split_session_builder_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/builders/csv_stats_session_builder_test.rb +19 -0
- data/test/csvtool/interface/cli/workflows/presenters/csv_split_presenter_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/csv_stats_presenter_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/run_csv_split_workflow_test.rb +200 -0
- data/test/csvtool/interface/cli/workflows/run_csv_stats_workflow_test.rb +146 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/build_session_step_test.rb +40 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step_test.rb +64 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step_test.rb +32 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_split/execute_step_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step_test.rb +49 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step_test.rb +61 -0
- data/test/csvtool/interface/cli/workflows/steps/csv_stats/execute_step_test.rb +65 -0
- data/test/fixtures/split_people_25.csv +26 -0
- metadata +58 -1
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/application/use_cases/run_csv_stats"
|
|
5
|
+
require "csvtool/domain/csv_stats_session/stats_source"
|
|
6
|
+
require "csvtool/domain/csv_stats_session/stats_options"
|
|
7
|
+
require "csvtool/domain/csv_stats_session/stats_session"
|
|
8
|
+
require "csvtool/domain/shared/output_destination"
|
|
9
|
+
require "tmpdir"
|
|
10
|
+
|
|
11
|
+
class RunCsvStatsTest < Minitest::Test
|
|
12
|
+
def fixture_path(name)
|
|
13
|
+
File.expand_path("../../../fixtures/#{name}", __dir__)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_returns_core_stats_summary
|
|
17
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
18
|
+
path: fixture_path("sample_people.csv"),
|
|
19
|
+
separator: ",",
|
|
20
|
+
headers_present: true
|
|
21
|
+
)
|
|
22
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
23
|
+
source: source,
|
|
24
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
28
|
+
|
|
29
|
+
assert result.ok?
|
|
30
|
+
assert_equal 3, result.data[:row_count]
|
|
31
|
+
assert_equal 2, result.data[:column_count]
|
|
32
|
+
assert_equal ["name", "city"], result.data[:headers]
|
|
33
|
+
assert_equal [
|
|
34
|
+
{ name: "name", blank_count: 0, non_blank_count: 3 },
|
|
35
|
+
{ name: "city", blank_count: 0, non_blank_count: 3 }
|
|
36
|
+
], result.data[:column_stats]
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def test_supports_tsv_separator
|
|
40
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
41
|
+
path: fixture_path("sample_people.tsv"),
|
|
42
|
+
separator: "\t",
|
|
43
|
+
headers_present: true
|
|
44
|
+
)
|
|
45
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
46
|
+
source: source,
|
|
47
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
51
|
+
|
|
52
|
+
assert result.ok?
|
|
53
|
+
assert_equal 3, result.data[:row_count]
|
|
54
|
+
assert_equal 2, result.data[:column_count]
|
|
55
|
+
assert_equal ["name", "city"], result.data[:headers]
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def test_supports_headerless_mode
|
|
59
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
60
|
+
path: fixture_path("sample_people_no_headers.csv"),
|
|
61
|
+
separator: ",",
|
|
62
|
+
headers_present: false
|
|
63
|
+
)
|
|
64
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
65
|
+
source: source,
|
|
66
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
70
|
+
|
|
71
|
+
assert result.ok?
|
|
72
|
+
assert_equal 3, result.data[:row_count]
|
|
73
|
+
assert_equal 2, result.data[:column_count]
|
|
74
|
+
assert_nil result.data[:headers]
|
|
75
|
+
assert_equal [
|
|
76
|
+
{ name: "column_1", blank_count: 0, non_blank_count: 3 },
|
|
77
|
+
{ name: "column_2", blank_count: 0, non_blank_count: 3 }
|
|
78
|
+
], result.data[:column_stats]
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def test_supports_custom_separator
|
|
82
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
83
|
+
path: fixture_path("sample_people_colon.txt"),
|
|
84
|
+
separator: ":",
|
|
85
|
+
headers_present: true
|
|
86
|
+
)
|
|
87
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
88
|
+
source: source,
|
|
89
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
93
|
+
|
|
94
|
+
assert result.ok?
|
|
95
|
+
assert_equal 3, result.data[:row_count]
|
|
96
|
+
assert_equal 2, result.data[:column_count]
|
|
97
|
+
assert_equal ["name", "city"], result.data[:headers]
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def test_computes_blank_and_non_blank_counts
|
|
101
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
102
|
+
path: fixture_path("sample_people_blanks.csv"),
|
|
103
|
+
separator: ",",
|
|
104
|
+
headers_present: true
|
|
105
|
+
)
|
|
106
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
107
|
+
source: source,
|
|
108
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
112
|
+
|
|
113
|
+
assert result.ok?
|
|
114
|
+
assert_equal [
|
|
115
|
+
{ name: "name", blank_count: 2, non_blank_count: 3 },
|
|
116
|
+
{ name: "city", blank_count: 1, non_blank_count: 4 }
|
|
117
|
+
], result.data[:column_stats]
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def test_writes_stats_to_file_when_file_output_selected
|
|
121
|
+
Dir.mktmpdir do |dir|
|
|
122
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
123
|
+
path: fixture_path("sample_people.csv"),
|
|
124
|
+
separator: ",",
|
|
125
|
+
headers_present: true
|
|
126
|
+
)
|
|
127
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
128
|
+
source: source,
|
|
129
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
130
|
+
).with_output_destination(Csvtool::Domain::Shared::OutputDestination.file(path: File.join(dir, "stats.csv")))
|
|
131
|
+
|
|
132
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
|
|
133
|
+
|
|
134
|
+
assert result.ok?
|
|
135
|
+
assert_equal session.output_destination.path, result.data[:output_path]
|
|
136
|
+
csv_text = File.read(session.output_destination.path)
|
|
137
|
+
assert_includes csv_text, "metric,value"
|
|
138
|
+
assert_includes csv_text, "row_count,3"
|
|
139
|
+
assert_includes csv_text, "column_count,2"
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def test_returns_cannot_write_output_file_when_writer_fails
|
|
144
|
+
source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
|
|
145
|
+
path: fixture_path("sample_people.csv"),
|
|
146
|
+
separator: ",",
|
|
147
|
+
headers_present: true
|
|
148
|
+
)
|
|
149
|
+
session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
|
|
150
|
+
source: source,
|
|
151
|
+
options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
|
|
152
|
+
).with_output_destination(Csvtool::Domain::Shared::OutputDestination.file(path: "/tmp/out.csv"))
|
|
153
|
+
writer = Object.new
|
|
154
|
+
def writer.call(path:, data:)
|
|
155
|
+
raise Errno::EACCES, path
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
result = Csvtool::Application::UseCases::RunCsvStats.new(csv_stats_file_writer: writer).call(session: session)
|
|
159
|
+
|
|
160
|
+
refute result.ok?
|
|
161
|
+
assert_equal :cannot_write_output_file, result.error
|
|
162
|
+
assert_equal "/tmp/out.csv", result.data[:path]
|
|
163
|
+
assert_equal Errno::EACCES, result.data[:error_class]
|
|
164
|
+
end
|
|
165
|
+
end
|
data/test/csvtool/cli_test.rb
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require_relative "../test_helper"
|
|
4
4
|
require "csvtool/cli"
|
|
5
5
|
require "tmpdir"
|
|
6
|
+
require "fileutils"
|
|
6
7
|
|
|
7
8
|
class TestCli < Minitest::Test
|
|
8
9
|
def fixture_path(name)
|
|
@@ -11,11 +12,120 @@ class TestCli < Minitest::Test
|
|
|
11
12
|
|
|
12
13
|
def test_menu_can_exit_cleanly
|
|
13
14
|
output = StringIO.new
|
|
14
|
-
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("
|
|
15
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("8\n"), stdout: output, stderr: StringIO.new)
|
|
15
16
|
assert_equal 0, status
|
|
16
17
|
assert_includes output.string, "CSV Tool Menu"
|
|
17
18
|
end
|
|
18
19
|
|
|
20
|
+
def test_stats_workflow_shell_can_run_and_return_to_menu
|
|
21
|
+
output = StringIO.new
|
|
22
|
+
input = [
|
|
23
|
+
"7",
|
|
24
|
+
fixture_path("sample_people.csv"),
|
|
25
|
+
"",
|
|
26
|
+
"",
|
|
27
|
+
"",
|
|
28
|
+
"8"
|
|
29
|
+
].join("\n") + "\n"
|
|
30
|
+
|
|
31
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
32
|
+
|
|
33
|
+
assert_equal 0, status
|
|
34
|
+
assert_includes output.string, "CSV Stats Summary"
|
|
35
|
+
assert_includes output.string, "Rows: 3"
|
|
36
|
+
assert_includes output.string, "Columns: 2"
|
|
37
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def test_stats_workflow_missing_file_returns_to_menu
|
|
41
|
+
output = StringIO.new
|
|
42
|
+
input = [
|
|
43
|
+
"7",
|
|
44
|
+
"/tmp/does-not-exist.csv",
|
|
45
|
+
"",
|
|
46
|
+
"",
|
|
47
|
+
"",
|
|
48
|
+
"8"
|
|
49
|
+
].join("\n") + "\n"
|
|
50
|
+
|
|
51
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
52
|
+
|
|
53
|
+
assert_equal 0, status
|
|
54
|
+
assert_includes output.string, "File not found: /tmp/does-not-exist.csv"
|
|
55
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def test_stats_workflow_can_write_output_to_file
|
|
59
|
+
output = StringIO.new
|
|
60
|
+
|
|
61
|
+
Dir.mktmpdir do |dir|
|
|
62
|
+
output_path = File.join(dir, "stats.csv")
|
|
63
|
+
input = [
|
|
64
|
+
"7",
|
|
65
|
+
fixture_path("sample_people.csv"),
|
|
66
|
+
"",
|
|
67
|
+
"",
|
|
68
|
+
"2",
|
|
69
|
+
output_path,
|
|
70
|
+
"8"
|
|
71
|
+
].join("\n") + "\n"
|
|
72
|
+
|
|
73
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
74
|
+
|
|
75
|
+
assert_equal 0, status
|
|
76
|
+
assert_includes output.string, "Wrote output to #{output_path}"
|
|
77
|
+
csv_text = File.read(output_path)
|
|
78
|
+
assert_includes csv_text, "metric,value"
|
|
79
|
+
assert_includes csv_text, "row_count,3"
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def test_split_workflow_splits_csv_in_menu_flow
|
|
84
|
+
output = StringIO.new
|
|
85
|
+
Dir.mktmpdir do |dir|
|
|
86
|
+
source_path = File.join(dir, "people.csv")
|
|
87
|
+
FileUtils.cp(fixture_path("split_people_25.csv"), source_path)
|
|
88
|
+
input = [
|
|
89
|
+
"6",
|
|
90
|
+
source_path,
|
|
91
|
+
"",
|
|
92
|
+
"",
|
|
93
|
+
"10",
|
|
94
|
+
"",
|
|
95
|
+
"",
|
|
96
|
+
"",
|
|
97
|
+
"",
|
|
98
|
+
"8"
|
|
99
|
+
].join("\n") + "\n"
|
|
100
|
+
|
|
101
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
102
|
+
|
|
103
|
+
assert_equal 0, status
|
|
104
|
+
assert_includes output.string, "Chunks written: 3"
|
|
105
|
+
assert File.file?(File.join(dir, "people_part_001.csv"))
|
|
106
|
+
assert File.file?(File.join(dir, "people_part_002.csv"))
|
|
107
|
+
assert File.file?(File.join(dir, "people_part_003.csv"))
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def test_split_workflow_invalid_chunk_size_returns_to_menu
|
|
112
|
+
output = StringIO.new
|
|
113
|
+
input = [
|
|
114
|
+
"6",
|
|
115
|
+
fixture_path("sample_people.csv"),
|
|
116
|
+
"",
|
|
117
|
+
"",
|
|
118
|
+
"0",
|
|
119
|
+
"8"
|
|
120
|
+
].join("\n") + "\n"
|
|
121
|
+
|
|
122
|
+
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
123
|
+
|
|
124
|
+
assert_equal 0, status
|
|
125
|
+
assert_includes output.string, "Chunk size must be a positive integer."
|
|
126
|
+
assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
|
|
127
|
+
end
|
|
128
|
+
|
|
19
129
|
def test_end_to_end_console_happy_path_prints_expected_values
|
|
20
130
|
input = [
|
|
21
131
|
"1",
|
|
@@ -26,7 +136,7 @@ class TestCli < Minitest::Test
|
|
|
26
136
|
"",
|
|
27
137
|
"y",
|
|
28
138
|
"",
|
|
29
|
-
"
|
|
139
|
+
"8"
|
|
30
140
|
].join("\n") + "\n"
|
|
31
141
|
|
|
32
142
|
output = StringIO.new
|
|
@@ -58,7 +168,7 @@ class TestCli < Minitest::Test
|
|
|
58
168
|
"2",
|
|
59
169
|
"3",
|
|
60
170
|
"",
|
|
61
|
-
"
|
|
171
|
+
"8"
|
|
62
172
|
].join("\n") + "\n"
|
|
63
173
|
|
|
64
174
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -79,7 +189,7 @@ class TestCli < Minitest::Test
|
|
|
79
189
|
"0",
|
|
80
190
|
"3",
|
|
81
191
|
"",
|
|
82
|
-
"
|
|
192
|
+
"8"
|
|
83
193
|
].join("\n") + "\n"
|
|
84
194
|
|
|
85
195
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -98,7 +208,7 @@ class TestCli < Minitest::Test
|
|
|
98
208
|
"2",
|
|
99
209
|
"3",
|
|
100
210
|
"",
|
|
101
|
-
"
|
|
211
|
+
"8"
|
|
102
212
|
].join("\n") + "\n"
|
|
103
213
|
|
|
104
214
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -119,7 +229,7 @@ class TestCli < Minitest::Test
|
|
|
119
229
|
"2",
|
|
120
230
|
"3",
|
|
121
231
|
"",
|
|
122
|
-
"
|
|
232
|
+
"8"
|
|
123
233
|
].join("\n") + "\n"
|
|
124
234
|
|
|
125
235
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -144,7 +254,7 @@ class TestCli < Minitest::Test
|
|
|
144
254
|
"3",
|
|
145
255
|
"2",
|
|
146
256
|
output_path,
|
|
147
|
-
"
|
|
257
|
+
"8"
|
|
148
258
|
].join("\n") + "\n"
|
|
149
259
|
|
|
150
260
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -164,7 +274,7 @@ class TestCli < Minitest::Test
|
|
|
164
274
|
"1",
|
|
165
275
|
"2",
|
|
166
276
|
"",
|
|
167
|
-
"
|
|
277
|
+
"8"
|
|
168
278
|
].join("\n") + "\n"
|
|
169
279
|
|
|
170
280
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -184,7 +294,7 @@ class TestCli < Minitest::Test
|
|
|
184
294
|
"",
|
|
185
295
|
"",
|
|
186
296
|
"",
|
|
187
|
-
"
|
|
297
|
+
"8"
|
|
188
298
|
].join("\n") + "\n"
|
|
189
299
|
|
|
190
300
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -209,7 +319,7 @@ class TestCli < Minitest::Test
|
|
|
209
319
|
"",
|
|
210
320
|
"2",
|
|
211
321
|
output_path,
|
|
212
|
-
"
|
|
322
|
+
"8"
|
|
213
323
|
].join("\n") + "\n"
|
|
214
324
|
|
|
215
325
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -231,7 +341,7 @@ class TestCli < Minitest::Test
|
|
|
231
341
|
"",
|
|
232
342
|
"",
|
|
233
343
|
"",
|
|
234
|
-
"
|
|
344
|
+
"8"
|
|
235
345
|
].join("\n") + "\n"
|
|
236
346
|
|
|
237
347
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -250,7 +360,7 @@ class TestCli < Minitest::Test
|
|
|
250
360
|
"n",
|
|
251
361
|
"",
|
|
252
362
|
"",
|
|
253
|
-
"
|
|
363
|
+
"8"
|
|
254
364
|
].join("\n") + "\n"
|
|
255
365
|
|
|
256
366
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -270,7 +380,7 @@ class TestCli < Minitest::Test
|
|
|
270
380
|
"",
|
|
271
381
|
"",
|
|
272
382
|
"abc",
|
|
273
|
-
"
|
|
383
|
+
"8"
|
|
274
384
|
].join("\n") + "\n"
|
|
275
385
|
|
|
276
386
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -295,7 +405,7 @@ class TestCli < Minitest::Test
|
|
|
295
405
|
"",
|
|
296
406
|
"",
|
|
297
407
|
"",
|
|
298
|
-
"
|
|
408
|
+
"8"
|
|
299
409
|
].join("\n") + "\n"
|
|
300
410
|
|
|
301
411
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -329,7 +439,7 @@ class TestCli < Minitest::Test
|
|
|
329
439
|
"",
|
|
330
440
|
"2",
|
|
331
441
|
output_path,
|
|
332
|
-
"
|
|
442
|
+
"8"
|
|
333
443
|
].join("\n") + "\n"
|
|
334
444
|
|
|
335
445
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -356,7 +466,7 @@ class TestCli < Minitest::Test
|
|
|
356
466
|
"",
|
|
357
467
|
"",
|
|
358
468
|
"",
|
|
359
|
-
"
|
|
469
|
+
"8"
|
|
360
470
|
].join("\n") + "\n"
|
|
361
471
|
|
|
362
472
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -382,7 +492,7 @@ class TestCli < Minitest::Test
|
|
|
382
492
|
"",
|
|
383
493
|
"",
|
|
384
494
|
"",
|
|
385
|
-
"
|
|
495
|
+
"8"
|
|
386
496
|
].join("\n") + "\n"
|
|
387
497
|
|
|
388
498
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -402,7 +512,7 @@ class TestCli < Minitest::Test
|
|
|
402
512
|
fixture_path("sample_people.csv"),
|
|
403
513
|
"",
|
|
404
514
|
"",
|
|
405
|
-
"
|
|
515
|
+
"8"
|
|
406
516
|
].join("\n") + "\n"
|
|
407
517
|
|
|
408
518
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -423,7 +533,7 @@ class TestCli < Minitest::Test
|
|
|
423
533
|
fixture_path("parity_people_reordered.tsv"),
|
|
424
534
|
"2",
|
|
425
535
|
"",
|
|
426
|
-
"
|
|
536
|
+
"8"
|
|
427
537
|
].join("\n") + "\n"
|
|
428
538
|
|
|
429
539
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -441,7 +551,7 @@ class TestCli < Minitest::Test
|
|
|
441
551
|
fixture_path("sample_people_no_headers.csv"),
|
|
442
552
|
"",
|
|
443
553
|
"n",
|
|
444
|
-
"
|
|
554
|
+
"8"
|
|
445
555
|
].join("\n") + "\n"
|
|
446
556
|
|
|
447
557
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -459,7 +569,7 @@ class TestCli < Minitest::Test
|
|
|
459
569
|
fixture_path("parity_people_header_mismatch.csv"),
|
|
460
570
|
"",
|
|
461
571
|
"",
|
|
462
|
-
"
|
|
572
|
+
"8"
|
|
463
573
|
].join("\n") + "\n"
|
|
464
574
|
|
|
465
575
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -477,7 +587,7 @@ class TestCli < Minitest::Test
|
|
|
477
587
|
fixture_path("parity_people_mismatch.csv"),
|
|
478
588
|
"",
|
|
479
589
|
"",
|
|
480
|
-
"
|
|
590
|
+
"8"
|
|
481
591
|
].join("\n") + "\n"
|
|
482
592
|
|
|
483
593
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -499,7 +609,7 @@ class TestCli < Minitest::Test
|
|
|
499
609
|
fixture_path("sample_people.csv"),
|
|
500
610
|
"",
|
|
501
611
|
"",
|
|
502
|
-
"
|
|
612
|
+
"8"
|
|
503
613
|
].join("\n") + "\n"
|
|
504
614
|
|
|
505
615
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -518,7 +628,7 @@ class TestCli < Minitest::Test
|
|
|
518
628
|
"/tmp/not-there-right.csv",
|
|
519
629
|
"",
|
|
520
630
|
"",
|
|
521
|
-
"
|
|
631
|
+
"8"
|
|
522
632
|
].join("\n") + "\n"
|
|
523
633
|
|
|
524
634
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -537,7 +647,7 @@ class TestCli < Minitest::Test
|
|
|
537
647
|
fixture_path("sample_people_bad_tail.csv"),
|
|
538
648
|
"",
|
|
539
649
|
"",
|
|
540
|
-
"
|
|
650
|
+
"8"
|
|
541
651
|
].join("\n") + "\n"
|
|
542
652
|
|
|
543
653
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -564,7 +674,7 @@ class TestCli < Minitest::Test
|
|
|
564
674
|
"y",
|
|
565
675
|
"2",
|
|
566
676
|
output_path,
|
|
567
|
-
"
|
|
677
|
+
"8"
|
|
568
678
|
].join("\n") + "\n"
|
|
569
679
|
|
|
570
680
|
status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
|
|
@@ -584,7 +694,7 @@ class TestCli < Minitest::Test
|
|
|
584
694
|
"1",
|
|
585
695
|
"",
|
|
586
696
|
"n",
|
|
587
|
-
"
|
|
697
|
+
"8"
|
|
588
698
|
].join("\n") + "\n"
|
|
589
699
|
|
|
590
700
|
output = StringIO.new
|
|
@@ -599,7 +709,7 @@ class TestCli < Minitest::Test
|
|
|
599
709
|
output = StringIO.new
|
|
600
710
|
status = Csvtool::CLI.start(
|
|
601
711
|
["menu"],
|
|
602
|
-
stdin: StringIO.new("1\n/tmp/does-not-exist.csv\n4\
|
|
712
|
+
stdin: StringIO.new("1\n/tmp/does-not-exist.csv\n4\n7\n"),
|
|
603
713
|
stdout: output,
|
|
604
714
|
stderr: StringIO.new
|
|
605
715
|
)
|
|
@@ -620,7 +730,7 @@ class TestCli < Minitest::Test
|
|
|
620
730
|
"y",
|
|
621
731
|
"2",
|
|
622
732
|
"/tmp/not-a-dir/out.csv",
|
|
623
|
-
"
|
|
733
|
+
"8"
|
|
624
734
|
].join("\n") + "\n"
|
|
625
735
|
|
|
626
736
|
output = StringIO.new
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/infrastructure/csv/csv_splitter"
|
|
5
|
+
require "tmpdir"
|
|
6
|
+
|
|
7
|
+
class CsvSplitterTest < Minitest::Test
|
|
8
|
+
def test_splits_large_file_in_order
|
|
9
|
+
splitter = Csvtool::Infrastructure::CSV::CsvSplitter.new
|
|
10
|
+
|
|
11
|
+
Dir.mktmpdir do |dir|
|
|
12
|
+
source_path = File.join(dir, "large.csv")
|
|
13
|
+
File.open(source_path, "w") do |f|
|
|
14
|
+
f.puts "id,value"
|
|
15
|
+
5_000.times { |i| f.puts "#{i + 1},v#{i + 1}" }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
stats = splitter.call(
|
|
19
|
+
file_path: source_path,
|
|
20
|
+
col_sep: ",",
|
|
21
|
+
headers_present: true,
|
|
22
|
+
chunk_size: 1_000,
|
|
23
|
+
output_directory: dir,
|
|
24
|
+
file_prefix: "large",
|
|
25
|
+
overwrite_existing: false
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
assert_equal 5, stats[:chunk_count]
|
|
29
|
+
assert_equal 5_000, stats[:data_rows]
|
|
30
|
+
assert_equal [1_000, 1_000, 1_000, 1_000, 1_000], stats[:chunk_row_counts]
|
|
31
|
+
|
|
32
|
+
first_chunk = File.read(File.join(dir, "large_part_001.csv")).lines.map(&:strip)
|
|
33
|
+
last_chunk = File.read(File.join(dir, "large_part_005.csv")).lines.map(&:strip)
|
|
34
|
+
assert_equal "id,value", first_chunk.first
|
|
35
|
+
assert_equal "1,v1", first_chunk[1]
|
|
36
|
+
assert_equal "1000,v1000", first_chunk[1000]
|
|
37
|
+
assert_equal "4001,v4001", last_chunk[1]
|
|
38
|
+
assert_equal "5000,v5000", last_chunk[1000]
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def test_streaming_split_handles_headerless_file
|
|
43
|
+
splitter = Csvtool::Infrastructure::CSV::CsvSplitter.new
|
|
44
|
+
|
|
45
|
+
Dir.mktmpdir do |dir|
|
|
46
|
+
source_path = File.join(dir, "large_no_headers.csv")
|
|
47
|
+
File.open(source_path, "w") do |f|
|
|
48
|
+
2_500.times { |i| f.puts "#{i + 1},v#{i + 1}" }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
stats = splitter.call(
|
|
52
|
+
file_path: source_path,
|
|
53
|
+
col_sep: ",",
|
|
54
|
+
headers_present: false,
|
|
55
|
+
chunk_size: 1_000,
|
|
56
|
+
output_directory: dir,
|
|
57
|
+
file_prefix: "large_no_headers",
|
|
58
|
+
overwrite_existing: false
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
assert_equal 3, stats[:chunk_count]
|
|
62
|
+
assert_equal 2_500, stats[:data_rows]
|
|
63
|
+
assert_equal [1_000, 1_000, 500], stats[:chunk_row_counts]
|
|
64
|
+
first_line = File.read(File.join(dir, "large_no_headers_part_001.csv")).lines.first.strip
|
|
65
|
+
assert_equal "1,v1", first_line
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csv"
|
|
5
|
+
require "csvtool/infrastructure/csv/csv_stats_scanner"
|
|
6
|
+
require "tmpdir"
|
|
7
|
+
|
|
8
|
+
class CsvStatsScannerTest < Minitest::Test
|
|
9
|
+
def fixture_path(name)
|
|
10
|
+
File.expand_path("../../../fixtures/#{name}", __dir__)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def test_scans_headers_mode_with_streaming_foreach
|
|
14
|
+
source = fixture_path("sample_people_blanks.csv")
|
|
15
|
+
csv = Object.new
|
|
16
|
+
received = nil
|
|
17
|
+
|
|
18
|
+
define_singleton_foreach(csv) do |path, headers:, col_sep:, &block|
|
|
19
|
+
received = { path: path, headers: headers, col_sep: col_sep }
|
|
20
|
+
::CSV.foreach(path, headers: headers, col_sep: col_sep, &block)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
result = Csvtool::Infrastructure::CSV::CsvStatsScanner.new(csv: csv).call(
|
|
24
|
+
file_path: source,
|
|
25
|
+
col_sep: ",",
|
|
26
|
+
headers_present: true
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
assert_equal({ path: source, headers: true, col_sep: "," }, received)
|
|
30
|
+
assert_equal 5, result[:row_count]
|
|
31
|
+
assert_equal 2, result[:column_count]
|
|
32
|
+
assert_equal ["name", "city"], result[:headers]
|
|
33
|
+
assert_equal [
|
|
34
|
+
{ name: "name", blank_count: 2, non_blank_count: 3 },
|
|
35
|
+
{ name: "city", blank_count: 1, non_blank_count: 4 }
|
|
36
|
+
], result[:column_stats]
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def test_scans_large_file_in_single_pass_shape
|
|
40
|
+
Dir.mktmpdir do |dir|
|
|
41
|
+
path = File.join(dir, "large.csv")
|
|
42
|
+
File.open(path, "w") do |f|
|
|
43
|
+
f.puts("id,value")
|
|
44
|
+
20_000.times { |i| f.puts("#{i},v#{i}") }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
result = Csvtool::Infrastructure::CSV::CsvStatsScanner.new.call(
|
|
48
|
+
file_path: path,
|
|
49
|
+
col_sep: ",",
|
|
50
|
+
headers_present: true
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
assert_equal 20_000, result[:row_count]
|
|
54
|
+
assert_equal 2, result[:column_count]
|
|
55
|
+
assert_equal ["id", "value"], result[:headers]
|
|
56
|
+
assert_equal [
|
|
57
|
+
{ name: "id", blank_count: 0, non_blank_count: 20_000 },
|
|
58
|
+
{ name: "value", blank_count: 0, non_blank_count: 20_000 }
|
|
59
|
+
], result[:column_stats]
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
def define_singleton_foreach(obj, &implementation)
|
|
66
|
+
obj.define_singleton_method(:foreach, &implementation)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../test_helper"
|
|
4
|
+
require "csvtool/infrastructure/output/csv_split_manifest_writer"
|
|
5
|
+
require "tmpdir"
|
|
6
|
+
|
|
7
|
+
class CsvSplitManifestWriterTest < Minitest::Test
|
|
8
|
+
def test_writes_manifest_csv
|
|
9
|
+
writer = Csvtool::Infrastructure::Output::CsvSplitManifestWriter.new
|
|
10
|
+
|
|
11
|
+
Dir.mktmpdir do |dir|
|
|
12
|
+
path = File.join(dir, "manifest.csv")
|
|
13
|
+
writer.call(
|
|
14
|
+
path: path,
|
|
15
|
+
chunk_paths: ["/tmp/a.csv", "/tmp/b.csv"],
|
|
16
|
+
chunk_row_counts: [10, 5]
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
lines = File.read(path).lines.map(&:strip)
|
|
20
|
+
assert_equal "chunk_index,chunk_path,row_count", lines[0]
|
|
21
|
+
assert_equal "1,/tmp/a.csv,10", lines[1]
|
|
22
|
+
assert_equal "2,/tmp/b.csv,5", lines[2]
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|