csvops 0.6.0.alpha → 0.8.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +103 -24
  3. data/docs/architecture.md +121 -4
  4. data/docs/release-v0.7.0-alpha.md +87 -0
  5. data/docs/release-v0.8.0-alpha.md +88 -0
  6. data/lib/csvtool/application/use_cases/run_csv_split.rb +97 -0
  7. data/lib/csvtool/application/use_cases/run_csv_stats.rb +64 -0
  8. data/lib/csvtool/cli.rb +9 -1
  9. data/lib/csvtool/domain/csv_split_session/split_options.rb +27 -0
  10. data/lib/csvtool/domain/csv_split_session/split_session.rb +20 -0
  11. data/lib/csvtool/domain/csv_split_session/split_source.rb +17 -0
  12. data/lib/csvtool/domain/csv_stats_session/stats_options.rb +11 -0
  13. data/lib/csvtool/domain/csv_stats_session/stats_session.rb +25 -0
  14. data/lib/csvtool/domain/csv_stats_session/stats_source.rb +17 -0
  15. data/lib/csvtool/infrastructure/csv/csv_splitter.rb +64 -0
  16. data/lib/csvtool/infrastructure/csv/csv_stats_scanner.rb +67 -0
  17. data/lib/csvtool/infrastructure/output/csv_split_manifest_writer.rb +20 -0
  18. data/lib/csvtool/infrastructure/output/csv_stats_file_writer.rb +26 -0
  19. data/lib/csvtool/interface/cli/errors/presenter.rb +8 -0
  20. data/lib/csvtool/interface/cli/menu_loop.rb +8 -2
  21. data/lib/csvtool/interface/cli/prompts/chunk_size_prompt.rb +21 -0
  22. data/lib/csvtool/interface/cli/prompts/split_manifest_prompt.rb +30 -0
  23. data/lib/csvtool/interface/cli/prompts/split_output_prompt.rb +38 -0
  24. data/lib/csvtool/interface/cli/workflows/builders/csv_split_session_builder.rb +44 -0
  25. data/lib/csvtool/interface/cli/workflows/builders/csv_stats_session_builder.rb +28 -0
  26. data/lib/csvtool/interface/cli/workflows/presenters/csv_split_presenter.rb +26 -0
  27. data/lib/csvtool/interface/cli/workflows/presenters/csv_stats_presenter.rb +34 -0
  28. data/lib/csvtool/interface/cli/workflows/run_csv_split_workflow.rb +89 -0
  29. data/lib/csvtool/interface/cli/workflows/run_csv_stats_workflow.rb +77 -0
  30. data/lib/csvtool/interface/cli/workflows/steps/csv_split/build_session_step.rb +30 -0
  31. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step.rb +43 -0
  32. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step.rb +30 -0
  33. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step.rb +31 -0
  34. data/lib/csvtool/interface/cli/workflows/steps/csv_split/execute_step.rb +36 -0
  35. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step.rb +25 -0
  36. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step.rb +27 -0
  37. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step.rb +31 -0
  38. data/lib/csvtool/interface/cli/workflows/steps/csv_stats/execute_step.rb +27 -0
  39. data/lib/csvtool/version.rb +1 -1
  40. data/test/csvtool/application/use_cases/run_csv_split_test.rb +124 -0
  41. data/test/csvtool/application/use_cases/run_csv_stats_test.rb +165 -0
  42. data/test/csvtool/cli_test.rb +139 -29
  43. data/test/csvtool/infrastructure/csv/csv_splitter_test.rb +68 -0
  44. data/test/csvtool/infrastructure/csv/csv_stats_scanner_test.rb +68 -0
  45. data/test/csvtool/infrastructure/output/csv_split_manifest_writer_test.rb +25 -0
  46. data/test/csvtool/infrastructure/output/csv_stats_file_writer_test.rb +38 -0
  47. data/test/csvtool/interface/cli/menu_loop_test.rb +104 -130
  48. data/test/csvtool/interface/cli/prompts/chunk_size_prompt_test.rb +17 -0
  49. data/test/csvtool/interface/cli/prompts/split_manifest_prompt_test.rb +42 -0
  50. data/test/csvtool/interface/cli/prompts/split_output_prompt_test.rb +22 -0
  51. data/test/csvtool/interface/cli/workflows/builders/csv_split_session_builder_test.rb +30 -0
  52. data/test/csvtool/interface/cli/workflows/builders/csv_stats_session_builder_test.rb +19 -0
  53. data/test/csvtool/interface/cli/workflows/presenters/csv_split_presenter_test.rb +26 -0
  54. data/test/csvtool/interface/cli/workflows/presenters/csv_stats_presenter_test.rb +37 -0
  55. data/test/csvtool/interface/cli/workflows/run_csv_split_workflow_test.rb +200 -0
  56. data/test/csvtool/interface/cli/workflows/run_csv_stats_workflow_test.rb +146 -0
  57. data/test/csvtool/interface/cli/workflows/steps/csv_split/build_session_step_test.rb +40 -0
  58. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step_test.rb +64 -0
  59. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step_test.rb +30 -0
  60. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step_test.rb +32 -0
  61. data/test/csvtool/interface/cli/workflows/steps/csv_split/execute_step_test.rb +83 -0
  62. data/test/csvtool/interface/cli/workflows/steps/csv_stats/build_session_step_test.rb +36 -0
  63. data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_destination_step_test.rb +49 -0
  64. data/test/csvtool/interface/cli/workflows/steps/csv_stats/collect_inputs_step_test.rb +61 -0
  65. data/test/csvtool/interface/cli/workflows/steps/csv_stats/execute_step_test.rb +65 -0
  66. data/test/fixtures/split_people_25.csv +26 -0
  67. metadata +58 -1
@@ -0,0 +1,165 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/application/use_cases/run_csv_stats"
5
+ require "csvtool/domain/csv_stats_session/stats_source"
6
+ require "csvtool/domain/csv_stats_session/stats_options"
7
+ require "csvtool/domain/csv_stats_session/stats_session"
8
+ require "csvtool/domain/shared/output_destination"
9
+ require "tmpdir"
10
+
11
+ class RunCsvStatsTest < Minitest::Test
12
+ def fixture_path(name)
13
+ File.expand_path("../../../fixtures/#{name}", __dir__)
14
+ end
15
+
16
+ def test_returns_core_stats_summary
17
+ source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
18
+ path: fixture_path("sample_people.csv"),
19
+ separator: ",",
20
+ headers_present: true
21
+ )
22
+ session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
23
+ source: source,
24
+ options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
25
+ )
26
+
27
+ result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
28
+
29
+ assert result.ok?
30
+ assert_equal 3, result.data[:row_count]
31
+ assert_equal 2, result.data[:column_count]
32
+ assert_equal ["name", "city"], result.data[:headers]
33
+ assert_equal [
34
+ { name: "name", blank_count: 0, non_blank_count: 3 },
35
+ { name: "city", blank_count: 0, non_blank_count: 3 }
36
+ ], result.data[:column_stats]
37
+ end
38
+
39
+ def test_supports_tsv_separator
40
+ source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
41
+ path: fixture_path("sample_people.tsv"),
42
+ separator: "\t",
43
+ headers_present: true
44
+ )
45
+ session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
46
+ source: source,
47
+ options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
48
+ )
49
+
50
+ result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
51
+
52
+ assert result.ok?
53
+ assert_equal 3, result.data[:row_count]
54
+ assert_equal 2, result.data[:column_count]
55
+ assert_equal ["name", "city"], result.data[:headers]
56
+ end
57
+
58
+ def test_supports_headerless_mode
59
+ source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
60
+ path: fixture_path("sample_people_no_headers.csv"),
61
+ separator: ",",
62
+ headers_present: false
63
+ )
64
+ session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
65
+ source: source,
66
+ options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
67
+ )
68
+
69
+ result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
70
+
71
+ assert result.ok?
72
+ assert_equal 3, result.data[:row_count]
73
+ assert_equal 2, result.data[:column_count]
74
+ assert_nil result.data[:headers]
75
+ assert_equal [
76
+ { name: "column_1", blank_count: 0, non_blank_count: 3 },
77
+ { name: "column_2", blank_count: 0, non_blank_count: 3 }
78
+ ], result.data[:column_stats]
79
+ end
80
+
81
+ def test_supports_custom_separator
82
+ source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
83
+ path: fixture_path("sample_people_colon.txt"),
84
+ separator: ":",
85
+ headers_present: true
86
+ )
87
+ session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
88
+ source: source,
89
+ options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
90
+ )
91
+
92
+ result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
93
+
94
+ assert result.ok?
95
+ assert_equal 3, result.data[:row_count]
96
+ assert_equal 2, result.data[:column_count]
97
+ assert_equal ["name", "city"], result.data[:headers]
98
+ end
99
+
100
+ def test_computes_blank_and_non_blank_counts
101
+ source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
102
+ path: fixture_path("sample_people_blanks.csv"),
103
+ separator: ",",
104
+ headers_present: true
105
+ )
106
+ session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
107
+ source: source,
108
+ options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
109
+ )
110
+
111
+ result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
112
+
113
+ assert result.ok?
114
+ assert_equal [
115
+ { name: "name", blank_count: 2, non_blank_count: 3 },
116
+ { name: "city", blank_count: 1, non_blank_count: 4 }
117
+ ], result.data[:column_stats]
118
+ end
119
+
120
+ def test_writes_stats_to_file_when_file_output_selected
121
+ Dir.mktmpdir do |dir|
122
+ source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
123
+ path: fixture_path("sample_people.csv"),
124
+ separator: ",",
125
+ headers_present: true
126
+ )
127
+ session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
128
+ source: source,
129
+ options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
130
+ ).with_output_destination(Csvtool::Domain::Shared::OutputDestination.file(path: File.join(dir, "stats.csv")))
131
+
132
+ result = Csvtool::Application::UseCases::RunCsvStats.new.call(session: session)
133
+
134
+ assert result.ok?
135
+ assert_equal session.output_destination.path, result.data[:output_path]
136
+ csv_text = File.read(session.output_destination.path)
137
+ assert_includes csv_text, "metric,value"
138
+ assert_includes csv_text, "row_count,3"
139
+ assert_includes csv_text, "column_count,2"
140
+ end
141
+ end
142
+
143
+ def test_returns_cannot_write_output_file_when_writer_fails
144
+ source = Csvtool::Domain::CsvStatsSession::StatsSource.new(
145
+ path: fixture_path("sample_people.csv"),
146
+ separator: ",",
147
+ headers_present: true
148
+ )
149
+ session = Csvtool::Domain::CsvStatsSession::StatsSession.start(
150
+ source: source,
151
+ options: Csvtool::Domain::CsvStatsSession::StatsOptions.new
152
+ ).with_output_destination(Csvtool::Domain::Shared::OutputDestination.file(path: "/tmp/out.csv"))
153
+ writer = Object.new
154
+ def writer.call(path:, data:)
155
+ raise Errno::EACCES, path
156
+ end
157
+
158
+ result = Csvtool::Application::UseCases::RunCsvStats.new(csv_stats_file_writer: writer).call(session: session)
159
+
160
+ refute result.ok?
161
+ assert_equal :cannot_write_output_file, result.error
162
+ assert_equal "/tmp/out.csv", result.data[:path]
163
+ assert_equal Errno::EACCES, result.data[:error_class]
164
+ end
165
+ end
@@ -3,6 +3,7 @@
3
3
  require_relative "../test_helper"
4
4
  require "csvtool/cli"
5
5
  require "tmpdir"
6
+ require "fileutils"
6
7
 
7
8
  class TestCli < Minitest::Test
8
9
  def fixture_path(name)
@@ -11,11 +12,120 @@ class TestCli < Minitest::Test
11
12
 
12
13
  def test_menu_can_exit_cleanly
13
14
  output = StringIO.new
14
- status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("6\n"), stdout: output, stderr: StringIO.new)
15
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new("8\n"), stdout: output, stderr: StringIO.new)
15
16
  assert_equal 0, status
16
17
  assert_includes output.string, "CSV Tool Menu"
17
18
  end
18
19
 
20
+ def test_stats_workflow_shell_can_run_and_return_to_menu
21
+ output = StringIO.new
22
+ input = [
23
+ "7",
24
+ fixture_path("sample_people.csv"),
25
+ "",
26
+ "",
27
+ "",
28
+ "8"
29
+ ].join("\n") + "\n"
30
+
31
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
32
+
33
+ assert_equal 0, status
34
+ assert_includes output.string, "CSV Stats Summary"
35
+ assert_includes output.string, "Rows: 3"
36
+ assert_includes output.string, "Columns: 2"
37
+ assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
38
+ end
39
+
40
+ def test_stats_workflow_missing_file_returns_to_menu
41
+ output = StringIO.new
42
+ input = [
43
+ "7",
44
+ "/tmp/does-not-exist.csv",
45
+ "",
46
+ "",
47
+ "",
48
+ "8"
49
+ ].join("\n") + "\n"
50
+
51
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
52
+
53
+ assert_equal 0, status
54
+ assert_includes output.string, "File not found: /tmp/does-not-exist.csv"
55
+ assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
56
+ end
57
+
58
+ def test_stats_workflow_can_write_output_to_file
59
+ output = StringIO.new
60
+
61
+ Dir.mktmpdir do |dir|
62
+ output_path = File.join(dir, "stats.csv")
63
+ input = [
64
+ "7",
65
+ fixture_path("sample_people.csv"),
66
+ "",
67
+ "",
68
+ "2",
69
+ output_path,
70
+ "8"
71
+ ].join("\n") + "\n"
72
+
73
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
74
+
75
+ assert_equal 0, status
76
+ assert_includes output.string, "Wrote output to #{output_path}"
77
+ csv_text = File.read(output_path)
78
+ assert_includes csv_text, "metric,value"
79
+ assert_includes csv_text, "row_count,3"
80
+ end
81
+ end
82
+
83
+ def test_split_workflow_splits_csv_in_menu_flow
84
+ output = StringIO.new
85
+ Dir.mktmpdir do |dir|
86
+ source_path = File.join(dir, "people.csv")
87
+ FileUtils.cp(fixture_path("split_people_25.csv"), source_path)
88
+ input = [
89
+ "6",
90
+ source_path,
91
+ "",
92
+ "",
93
+ "10",
94
+ "",
95
+ "",
96
+ "",
97
+ "",
98
+ "8"
99
+ ].join("\n") + "\n"
100
+
101
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
102
+
103
+ assert_equal 0, status
104
+ assert_includes output.string, "Chunks written: 3"
105
+ assert File.file?(File.join(dir, "people_part_001.csv"))
106
+ assert File.file?(File.join(dir, "people_part_002.csv"))
107
+ assert File.file?(File.join(dir, "people_part_003.csv"))
108
+ end
109
+ end
110
+
111
+ def test_split_workflow_invalid_chunk_size_returns_to_menu
112
+ output = StringIO.new
113
+ input = [
114
+ "6",
115
+ fixture_path("sample_people.csv"),
116
+ "",
117
+ "",
118
+ "0",
119
+ "8"
120
+ ].join("\n") + "\n"
121
+
122
+ status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
123
+
124
+ assert_equal 0, status
125
+ assert_includes output.string, "Chunk size must be a positive integer."
126
+ assert_operator output.string.scan("CSV Tool Menu").length, :>=, 2
127
+ end
128
+
19
129
  def test_end_to_end_console_happy_path_prints_expected_values
20
130
  input = [
21
131
  "1",
@@ -26,7 +136,7 @@ class TestCli < Minitest::Test
26
136
  "",
27
137
  "y",
28
138
  "",
29
- "6"
139
+ "8"
30
140
  ].join("\n") + "\n"
31
141
 
32
142
  output = StringIO.new
@@ -58,7 +168,7 @@ class TestCli < Minitest::Test
58
168
  "2",
59
169
  "3",
60
170
  "",
61
- "6"
171
+ "8"
62
172
  ].join("\n") + "\n"
63
173
 
64
174
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -79,7 +189,7 @@ class TestCli < Minitest::Test
79
189
  "0",
80
190
  "3",
81
191
  "",
82
- "6"
192
+ "8"
83
193
  ].join("\n") + "\n"
84
194
 
85
195
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -98,7 +208,7 @@ class TestCli < Minitest::Test
98
208
  "2",
99
209
  "3",
100
210
  "",
101
- "6"
211
+ "8"
102
212
  ].join("\n") + "\n"
103
213
 
104
214
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -119,7 +229,7 @@ class TestCli < Minitest::Test
119
229
  "2",
120
230
  "3",
121
231
  "",
122
- "6"
232
+ "8"
123
233
  ].join("\n") + "\n"
124
234
 
125
235
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -144,7 +254,7 @@ class TestCli < Minitest::Test
144
254
  "3",
145
255
  "2",
146
256
  output_path,
147
- "6"
257
+ "8"
148
258
  ].join("\n") + "\n"
149
259
 
150
260
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -164,7 +274,7 @@ class TestCli < Minitest::Test
164
274
  "1",
165
275
  "2",
166
276
  "",
167
- "6"
277
+ "8"
168
278
  ].join("\n") + "\n"
169
279
 
170
280
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -184,7 +294,7 @@ class TestCli < Minitest::Test
184
294
  "",
185
295
  "",
186
296
  "",
187
- "6"
297
+ "8"
188
298
  ].join("\n") + "\n"
189
299
 
190
300
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -209,7 +319,7 @@ class TestCli < Minitest::Test
209
319
  "",
210
320
  "2",
211
321
  output_path,
212
- "6"
322
+ "8"
213
323
  ].join("\n") + "\n"
214
324
 
215
325
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -231,7 +341,7 @@ class TestCli < Minitest::Test
231
341
  "",
232
342
  "",
233
343
  "",
234
- "6"
344
+ "8"
235
345
  ].join("\n") + "\n"
236
346
 
237
347
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -250,7 +360,7 @@ class TestCli < Minitest::Test
250
360
  "n",
251
361
  "",
252
362
  "",
253
- "6"
363
+ "8"
254
364
  ].join("\n") + "\n"
255
365
 
256
366
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -270,7 +380,7 @@ class TestCli < Minitest::Test
270
380
  "",
271
381
  "",
272
382
  "abc",
273
- "6"
383
+ "8"
274
384
  ].join("\n") + "\n"
275
385
 
276
386
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -295,7 +405,7 @@ class TestCli < Minitest::Test
295
405
  "",
296
406
  "",
297
407
  "",
298
- "6"
408
+ "8"
299
409
  ].join("\n") + "\n"
300
410
 
301
411
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -329,7 +439,7 @@ class TestCli < Minitest::Test
329
439
  "",
330
440
  "2",
331
441
  output_path,
332
- "6"
442
+ "8"
333
443
  ].join("\n") + "\n"
334
444
 
335
445
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -356,7 +466,7 @@ class TestCli < Minitest::Test
356
466
  "",
357
467
  "",
358
468
  "",
359
- "6"
469
+ "8"
360
470
  ].join("\n") + "\n"
361
471
 
362
472
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -382,7 +492,7 @@ class TestCli < Minitest::Test
382
492
  "",
383
493
  "",
384
494
  "",
385
- "6"
495
+ "8"
386
496
  ].join("\n") + "\n"
387
497
 
388
498
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -402,7 +512,7 @@ class TestCli < Minitest::Test
402
512
  fixture_path("sample_people.csv"),
403
513
  "",
404
514
  "",
405
- "6"
515
+ "8"
406
516
  ].join("\n") + "\n"
407
517
 
408
518
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -423,7 +533,7 @@ class TestCli < Minitest::Test
423
533
  fixture_path("parity_people_reordered.tsv"),
424
534
  "2",
425
535
  "",
426
- "6"
536
+ "8"
427
537
  ].join("\n") + "\n"
428
538
 
429
539
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -441,7 +551,7 @@ class TestCli < Minitest::Test
441
551
  fixture_path("sample_people_no_headers.csv"),
442
552
  "",
443
553
  "n",
444
- "6"
554
+ "8"
445
555
  ].join("\n") + "\n"
446
556
 
447
557
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -459,7 +569,7 @@ class TestCli < Minitest::Test
459
569
  fixture_path("parity_people_header_mismatch.csv"),
460
570
  "",
461
571
  "",
462
- "6"
572
+ "8"
463
573
  ].join("\n") + "\n"
464
574
 
465
575
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -477,7 +587,7 @@ class TestCli < Minitest::Test
477
587
  fixture_path("parity_people_mismatch.csv"),
478
588
  "",
479
589
  "",
480
- "6"
590
+ "8"
481
591
  ].join("\n") + "\n"
482
592
 
483
593
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -499,7 +609,7 @@ class TestCli < Minitest::Test
499
609
  fixture_path("sample_people.csv"),
500
610
  "",
501
611
  "",
502
- "6"
612
+ "8"
503
613
  ].join("\n") + "\n"
504
614
 
505
615
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -518,7 +628,7 @@ class TestCli < Minitest::Test
518
628
  "/tmp/not-there-right.csv",
519
629
  "",
520
630
  "",
521
- "6"
631
+ "8"
522
632
  ].join("\n") + "\n"
523
633
 
524
634
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -537,7 +647,7 @@ class TestCli < Minitest::Test
537
647
  fixture_path("sample_people_bad_tail.csv"),
538
648
  "",
539
649
  "",
540
- "6"
650
+ "8"
541
651
  ].join("\n") + "\n"
542
652
 
543
653
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -564,7 +674,7 @@ class TestCli < Minitest::Test
564
674
  "y",
565
675
  "2",
566
676
  output_path,
567
- "6"
677
+ "8"
568
678
  ].join("\n") + "\n"
569
679
 
570
680
  status = Csvtool::CLI.start(["menu"], stdin: StringIO.new(input), stdout: output, stderr: StringIO.new)
@@ -584,7 +694,7 @@ class TestCli < Minitest::Test
584
694
  "1",
585
695
  "",
586
696
  "n",
587
- "6"
697
+ "8"
588
698
  ].join("\n") + "\n"
589
699
 
590
700
  output = StringIO.new
@@ -599,7 +709,7 @@ class TestCli < Minitest::Test
599
709
  output = StringIO.new
600
710
  status = Csvtool::CLI.start(
601
711
  ["menu"],
602
- stdin: StringIO.new("1\n/tmp/does-not-exist.csv\n4\n6\n"),
712
+ stdin: StringIO.new("1\n/tmp/does-not-exist.csv\n4\n7\n"),
603
713
  stdout: output,
604
714
  stderr: StringIO.new
605
715
  )
@@ -620,7 +730,7 @@ class TestCli < Minitest::Test
620
730
  "y",
621
731
  "2",
622
732
  "/tmp/not-a-dir/out.csv",
623
- "6"
733
+ "8"
624
734
  ].join("\n") + "\n"
625
735
 
626
736
  output = StringIO.new
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/infrastructure/csv/csv_splitter"
5
+ require "tmpdir"
6
+
7
+ class CsvSplitterTest < Minitest::Test
8
+ def test_splits_large_file_in_order
9
+ splitter = Csvtool::Infrastructure::CSV::CsvSplitter.new
10
+
11
+ Dir.mktmpdir do |dir|
12
+ source_path = File.join(dir, "large.csv")
13
+ File.open(source_path, "w") do |f|
14
+ f.puts "id,value"
15
+ 5_000.times { |i| f.puts "#{i + 1},v#{i + 1}" }
16
+ end
17
+
18
+ stats = splitter.call(
19
+ file_path: source_path,
20
+ col_sep: ",",
21
+ headers_present: true,
22
+ chunk_size: 1_000,
23
+ output_directory: dir,
24
+ file_prefix: "large",
25
+ overwrite_existing: false
26
+ )
27
+
28
+ assert_equal 5, stats[:chunk_count]
29
+ assert_equal 5_000, stats[:data_rows]
30
+ assert_equal [1_000, 1_000, 1_000, 1_000, 1_000], stats[:chunk_row_counts]
31
+
32
+ first_chunk = File.read(File.join(dir, "large_part_001.csv")).lines.map(&:strip)
33
+ last_chunk = File.read(File.join(dir, "large_part_005.csv")).lines.map(&:strip)
34
+ assert_equal "id,value", first_chunk.first
35
+ assert_equal "1,v1", first_chunk[1]
36
+ assert_equal "1000,v1000", first_chunk[1000]
37
+ assert_equal "4001,v4001", last_chunk[1]
38
+ assert_equal "5000,v5000", last_chunk[1000]
39
+ end
40
+ end
41
+
42
+ def test_streaming_split_handles_headerless_file
43
+ splitter = Csvtool::Infrastructure::CSV::CsvSplitter.new
44
+
45
+ Dir.mktmpdir do |dir|
46
+ source_path = File.join(dir, "large_no_headers.csv")
47
+ File.open(source_path, "w") do |f|
48
+ 2_500.times { |i| f.puts "#{i + 1},v#{i + 1}" }
49
+ end
50
+
51
+ stats = splitter.call(
52
+ file_path: source_path,
53
+ col_sep: ",",
54
+ headers_present: false,
55
+ chunk_size: 1_000,
56
+ output_directory: dir,
57
+ file_prefix: "large_no_headers",
58
+ overwrite_existing: false
59
+ )
60
+
61
+ assert_equal 3, stats[:chunk_count]
62
+ assert_equal 2_500, stats[:data_rows]
63
+ assert_equal [1_000, 1_000, 500], stats[:chunk_row_counts]
64
+ first_line = File.read(File.join(dir, "large_no_headers_part_001.csv")).lines.first.strip
65
+ assert_equal "1,v1", first_line
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csv"
5
+ require "csvtool/infrastructure/csv/csv_stats_scanner"
6
+ require "tmpdir"
7
+
8
+ class CsvStatsScannerTest < Minitest::Test
9
+ def fixture_path(name)
10
+ File.expand_path("../../../fixtures/#{name}", __dir__)
11
+ end
12
+
13
+ def test_scans_headers_mode_with_streaming_foreach
14
+ source = fixture_path("sample_people_blanks.csv")
15
+ csv = Object.new
16
+ received = nil
17
+
18
+ define_singleton_foreach(csv) do |path, headers:, col_sep:, &block|
19
+ received = { path: path, headers: headers, col_sep: col_sep }
20
+ ::CSV.foreach(path, headers: headers, col_sep: col_sep, &block)
21
+ end
22
+
23
+ result = Csvtool::Infrastructure::CSV::CsvStatsScanner.new(csv: csv).call(
24
+ file_path: source,
25
+ col_sep: ",",
26
+ headers_present: true
27
+ )
28
+
29
+ assert_equal({ path: source, headers: true, col_sep: "," }, received)
30
+ assert_equal 5, result[:row_count]
31
+ assert_equal 2, result[:column_count]
32
+ assert_equal ["name", "city"], result[:headers]
33
+ assert_equal [
34
+ { name: "name", blank_count: 2, non_blank_count: 3 },
35
+ { name: "city", blank_count: 1, non_blank_count: 4 }
36
+ ], result[:column_stats]
37
+ end
38
+
39
+ def test_scans_large_file_in_single_pass_shape
40
+ Dir.mktmpdir do |dir|
41
+ path = File.join(dir, "large.csv")
42
+ File.open(path, "w") do |f|
43
+ f.puts("id,value")
44
+ 20_000.times { |i| f.puts("#{i},v#{i}") }
45
+ end
46
+
47
+ result = Csvtool::Infrastructure::CSV::CsvStatsScanner.new.call(
48
+ file_path: path,
49
+ col_sep: ",",
50
+ headers_present: true
51
+ )
52
+
53
+ assert_equal 20_000, result[:row_count]
54
+ assert_equal 2, result[:column_count]
55
+ assert_equal ["id", "value"], result[:headers]
56
+ assert_equal [
57
+ { name: "id", blank_count: 0, non_blank_count: 20_000 },
58
+ { name: "value", blank_count: 0, non_blank_count: 20_000 }
59
+ ], result[:column_stats]
60
+ end
61
+ end
62
+
63
+ private
64
+
65
+ def define_singleton_foreach(obj, &implementation)
66
+ obj.define_singleton_method(:foreach, &implementation)
67
+ end
68
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/infrastructure/output/csv_split_manifest_writer"
5
+ require "tmpdir"
6
+
7
+ class CsvSplitManifestWriterTest < Minitest::Test
8
+ def test_writes_manifest_csv
9
+ writer = Csvtool::Infrastructure::Output::CsvSplitManifestWriter.new
10
+
11
+ Dir.mktmpdir do |dir|
12
+ path = File.join(dir, "manifest.csv")
13
+ writer.call(
14
+ path: path,
15
+ chunk_paths: ["/tmp/a.csv", "/tmp/b.csv"],
16
+ chunk_row_counts: [10, 5]
17
+ )
18
+
19
+ lines = File.read(path).lines.map(&:strip)
20
+ assert_equal "chunk_index,chunk_path,row_count", lines[0]
21
+ assert_equal "1,/tmp/a.csv,10", lines[1]
22
+ assert_equal "2,/tmp/b.csv,5", lines[2]
23
+ end
24
+ end
25
+ end