csvops 0.5.0.alpha → 0.7.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +88 -7
  3. data/docs/architecture.md +119 -5
  4. data/docs/release-v0.6.0-alpha.md +84 -0
  5. data/docs/release-v0.7.0-alpha.md +87 -0
  6. data/lib/csvtool/application/use_cases/run_csv_parity.rb +70 -0
  7. data/lib/csvtool/application/use_cases/run_csv_split.rb +97 -0
  8. data/lib/csvtool/cli.rb +9 -1
  9. data/lib/csvtool/domain/csv_parity_session/parity_options.rb +22 -0
  10. data/lib/csvtool/domain/csv_parity_session/parity_session.rb +20 -0
  11. data/lib/csvtool/domain/csv_parity_session/source_pair.rb +19 -0
  12. data/lib/csvtool/domain/csv_split_session/split_options.rb +27 -0
  13. data/lib/csvtool/domain/csv_split_session/split_session.rb +20 -0
  14. data/lib/csvtool/domain/csv_split_session/split_source.rb +17 -0
  15. data/lib/csvtool/infrastructure/csv/csv_parity_comparator.rb +71 -0
  16. data/lib/csvtool/infrastructure/csv/csv_splitter.rb +64 -0
  17. data/lib/csvtool/infrastructure/output/csv_split_manifest_writer.rb +20 -0
  18. data/lib/csvtool/interface/cli/errors/presenter.rb +12 -0
  19. data/lib/csvtool/interface/cli/menu_loop.rb +8 -2
  20. data/lib/csvtool/interface/cli/prompts/chunk_size_prompt.rb +21 -0
  21. data/lib/csvtool/interface/cli/prompts/split_manifest_prompt.rb +30 -0
  22. data/lib/csvtool/interface/cli/prompts/split_output_prompt.rb +38 -0
  23. data/lib/csvtool/interface/cli/workflows/builders/csv_parity_session_builder.rb +33 -0
  24. data/lib/csvtool/interface/cli/workflows/builders/csv_split_session_builder.rb +44 -0
  25. data/lib/csvtool/interface/cli/workflows/presenters/csv_parity_presenter.rb +38 -0
  26. data/lib/csvtool/interface/cli/workflows/presenters/csv_split_presenter.rb +26 -0
  27. data/lib/csvtool/interface/cli/workflows/run_csv_parity_workflow.rb +66 -0
  28. data/lib/csvtool/interface/cli/workflows/run_csv_split_workflow.rb +89 -0
  29. data/lib/csvtool/interface/cli/workflows/steps/csv_split/build_session_step.rb +30 -0
  30. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step.rb +43 -0
  31. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step.rb +30 -0
  32. data/lib/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step.rb +31 -0
  33. data/lib/csvtool/interface/cli/workflows/steps/csv_split/execute_step.rb +36 -0
  34. data/lib/csvtool/interface/cli/workflows/steps/parity/build_session_step.rb +25 -0
  35. data/lib/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step.rb +32 -0
  36. data/lib/csvtool/interface/cli/workflows/steps/parity/execute_step.rb +26 -0
  37. data/lib/csvtool/version.rb +1 -1
  38. data/test/csvtool/application/use_cases/run_csv_parity_test.rb +160 -0
  39. data/test/csvtool/application/use_cases/run_csv_split_test.rb +124 -0
  40. data/test/csvtool/cli_test.rb +222 -21
  41. data/test/csvtool/cli_unit_test.rb +4 -4
  42. data/test/csvtool/domain/csv_parity_session/parity_options_test.rb +17 -0
  43. data/test/csvtool/domain/csv_parity_session/parity_session_test.rb +18 -0
  44. data/test/csvtool/domain/csv_parity_session/source_pair_test.rb +11 -0
  45. data/test/csvtool/infrastructure/csv/csv_parity_comparator_test.rb +78 -0
  46. data/test/csvtool/infrastructure/csv/csv_splitter_test.rb +68 -0
  47. data/test/csvtool/infrastructure/output/csv_split_manifest_writer_test.rb +25 -0
  48. data/test/csvtool/interface/cli/errors/presenter_test.rb +2 -0
  49. data/test/csvtool/interface/cli/menu_loop_test.rb +87 -93
  50. data/test/csvtool/interface/cli/prompts/chunk_size_prompt_test.rb +17 -0
  51. data/test/csvtool/interface/cli/prompts/split_manifest_prompt_test.rb +42 -0
  52. data/test/csvtool/interface/cli/prompts/split_output_prompt_test.rb +22 -0
  53. data/test/csvtool/interface/cli/workflows/builders/csv_parity_session_builder_test.rb +20 -0
  54. data/test/csvtool/interface/cli/workflows/builders/csv_split_session_builder_test.rb +30 -0
  55. data/test/csvtool/interface/cli/workflows/presenters/csv_parity_presenter_test.rb +43 -0
  56. data/test/csvtool/interface/cli/workflows/presenters/csv_split_presenter_test.rb +26 -0
  57. data/test/csvtool/interface/cli/workflows/run_csv_parity_workflow_test.rb +94 -0
  58. data/test/csvtool/interface/cli/workflows/run_csv_split_workflow_test.rb +200 -0
  59. data/test/csvtool/interface/cli/workflows/steps/csv_split/build_session_step_test.rb +40 -0
  60. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step_test.rb +64 -0
  61. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step_test.rb +30 -0
  62. data/test/csvtool/interface/cli/workflows/steps/csv_split/collect_output_step_test.rb +32 -0
  63. data/test/csvtool/interface/cli/workflows/steps/csv_split/execute_step_test.rb +83 -0
  64. data/test/csvtool/interface/cli/workflows/steps/parity/build_session_step_test.rb +41 -0
  65. data/test/csvtool/interface/cli/workflows/steps/parity/collect_inputs_step_test.rb +30 -0
  66. data/test/csvtool/interface/cli/workflows/steps/parity/execute_step_test.rb +40 -0
  67. data/test/fixtures/parity_duplicates_left.csv +4 -0
  68. data/test/fixtures/parity_duplicates_right.csv +3 -0
  69. data/test/fixtures/parity_people_header_mismatch.csv +4 -0
  70. data/test/fixtures/parity_people_many_reordered.csv +13 -0
  71. data/test/fixtures/parity_people_mismatch.csv +4 -0
  72. data/test/fixtures/parity_people_reordered.csv +4 -0
  73. data/test/fixtures/parity_people_reordered.tsv +4 -0
  74. data/test/fixtures/split_people_25.csv +26 -0
  75. metadata +64 -1
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csvtool/application/use_cases/run_csv_split"
4
+ require "csvtool/interface/cli/errors/presenter"
5
+ require "csvtool/interface/cli/prompts/file_path_prompt"
6
+ require "csvtool/interface/cli/prompts/separator_prompt"
7
+ require "csvtool/interface/cli/prompts/headers_present_prompt"
8
+ require "csvtool/interface/cli/prompts/chunk_size_prompt"
9
+ require "csvtool/interface/cli/prompts/yes_no_prompt"
10
+ require "csvtool/interface/cli/prompts/split_output_prompt"
11
+ require "csvtool/interface/cli/prompts/split_manifest_prompt"
12
+ require "csvtool/interface/cli/workflows/builders/csv_split_session_builder"
13
+ require "csvtool/interface/cli/workflows/presenters/csv_split_presenter"
14
+ require "csvtool/interface/cli/workflows/support/result_error_handler"
15
+ require "csvtool/interface/cli/workflows/steps/workflow_step_pipeline"
16
+ require "csvtool/interface/cli/workflows/steps/csv_split/collect_inputs_step"
17
+ require "csvtool/interface/cli/workflows/steps/csv_split/collect_output_step"
18
+ require "csvtool/interface/cli/workflows/steps/csv_split/collect_manifest_step"
19
+ require "csvtool/interface/cli/workflows/steps/csv_split/build_session_step"
20
+ require "csvtool/interface/cli/workflows/steps/csv_split/execute_step"
21
+
22
+ module Csvtool
23
+ module Interface
24
+ module CLI
25
+ module Workflows
26
+ class RunCsvSplitWorkflow
27
+ def initialize(stdin:, stdout:, use_case: Application::UseCases::RunCsvSplit.new)
28
+ @stdin = stdin
29
+ @stdout = stdout
30
+ @use_case = use_case
31
+ @errors = Interface::CLI::Errors::Presenter.new(stdout: stdout)
32
+ @session_builder = Builders::CsvSplitSessionBuilder.new
33
+ @presenter = Presenters::CsvSplitPresenter.new(stdout: stdout)
34
+ @result_error_handler = Support::ResultErrorHandler.new(errors: @errors)
35
+ end
36
+
37
+ def call
38
+ context = {
39
+ use_case: @use_case,
40
+ session_builder: @session_builder,
41
+ presenter: @presenter,
42
+ handle_error: method(:handle_error)
43
+ }
44
+ pipeline = Steps::WorkflowStepPipeline.new(steps: [
45
+ Steps::CsvSplit::CollectInputsStep.new(
46
+ file_path_prompt: Interface::CLI::Prompts::FilePathPrompt.new(stdin: @stdin, stdout: @stdout),
47
+ separator_prompt: Interface::CLI::Prompts::SeparatorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors),
48
+ headers_present_prompt: Interface::CLI::Prompts::HeadersPresentPrompt.new(stdin: @stdin, stdout: @stdout),
49
+ chunk_size_prompt: Interface::CLI::Prompts::ChunkSizePrompt.new(stdin: @stdin, stdout: @stdout),
50
+ errors: @errors
51
+ ),
52
+ Steps::CsvSplit::CollectOutputStep.new(
53
+ split_output_prompt: Interface::CLI::Prompts::SplitOutputPrompt.new(
54
+ stdin: @stdin,
55
+ stdout: @stdout,
56
+ yes_no_prompt: Interface::CLI::Prompts::YesNoPrompt.new(stdin: @stdin, stdout: @stdout)
57
+ )
58
+ ),
59
+ Steps::CsvSplit::CollectManifestStep.new(
60
+ split_manifest_prompt: Interface::CLI::Prompts::SplitManifestPrompt.new(
61
+ stdin: @stdin,
62
+ stdout: @stdout,
63
+ yes_no_prompt: Interface::CLI::Prompts::YesNoPrompt.new(stdin: @stdin, stdout: @stdout)
64
+ )
65
+ ),
66
+ Steps::CsvSplit::BuildSessionStep.new,
67
+ Steps::CsvSplit::ExecuteStep.new
68
+ ])
69
+ pipeline.call(context)
70
+ nil
71
+ end
72
+
73
+ private
74
+
75
+ def handle_error(result)
76
+ @result_error_handler.call(result, {
77
+ file_not_found: ->(r, errors) { errors.file_not_found(r.data[:path]) },
78
+ no_headers: ->(_r, errors) { errors.no_headers },
79
+ could_not_parse_csv: ->(_r, errors) { errors.could_not_parse_csv },
80
+ cannot_read_file: ->(r, errors) { errors.cannot_read_file(r.data[:path]) },
81
+ cannot_write_output_file: ->(r, errors) { errors.cannot_write_output_file(r.data[:path], r.data[:error_class]) },
82
+ output_file_exists: ->(r, errors) { errors.output_file_exists(r.data[:path]) }
83
+ })
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Workflows
7
+ module Steps
8
+ module CsvSplit
9
+ class BuildSessionStep
10
+ def call(context)
11
+ context[:session] = context.fetch(:session_builder).call(
12
+ file_path: context.fetch(:file_path),
13
+ col_sep: context.fetch(:col_sep),
14
+ headers_present: context.fetch(:headers_present),
15
+ chunk_size: context.fetch(:chunk_size),
16
+ output_directory: context[:output_directory],
17
+ file_prefix: context[:file_prefix],
18
+ overwrite_existing: context.fetch(:overwrite_existing, false),
19
+ write_manifest: context.fetch(:write_manifest, false),
20
+ manifest_path: context[:manifest_path]
21
+ )
22
+ nil
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Workflows
7
+ module Steps
8
+ module CsvSplit
9
+ class CollectInputsStep
10
+ def initialize(file_path_prompt:, separator_prompt:, headers_present_prompt:, chunk_size_prompt:, errors:)
11
+ @file_path_prompt = file_path_prompt
12
+ @separator_prompt = separator_prompt
13
+ @headers_present_prompt = headers_present_prompt
14
+ @chunk_size_prompt = chunk_size_prompt
15
+ @errors = errors
16
+ end
17
+
18
+ def call(context)
19
+ context[:file_path] = @file_path_prompt.call(label: "Source CSV file path: ")
20
+ col_sep = @separator_prompt.call
21
+ return :halt if col_sep.nil?
22
+
23
+ context[:col_sep] = col_sep
24
+ context[:headers_present] = @headers_present_prompt.call
25
+ chunk_size = Integer(@chunk_size_prompt.call)
26
+ if chunk_size <= 0
27
+ @errors.invalid_chunk_size
28
+ return :halt
29
+ end
30
+
31
+ context[:chunk_size] = chunk_size
32
+ nil
33
+ rescue ArgumentError, TypeError
34
+ @errors.invalid_chunk_size
35
+ :halt
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Workflows
7
+ module Steps
8
+ module CsvSplit
9
+ class CollectManifestStep
10
+ def initialize(split_manifest_prompt:)
11
+ @split_manifest_prompt = split_manifest_prompt
12
+ end
13
+
14
+ def call(context)
15
+ default_path = File.join(
16
+ context.fetch(:output_directory),
17
+ "#{context.fetch(:file_prefix)}_manifest.csv"
18
+ )
19
+ manifest = @split_manifest_prompt.call(default_path: default_path)
20
+ context[:write_manifest] = manifest[:write_manifest]
21
+ context[:manifest_path] = manifest[:manifest_path]
22
+ nil
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Workflows
7
+ module Steps
8
+ module CsvSplit
9
+ class CollectOutputStep
10
+ def initialize(split_output_prompt:)
11
+ @split_output_prompt = split_output_prompt
12
+ end
13
+
14
+ def call(context)
15
+ file_path = context.fetch(:file_path)
16
+ output = @split_output_prompt.call(
17
+ default_directory: File.dirname(file_path),
18
+ default_prefix: File.basename(file_path, ".*")
19
+ )
20
+ context[:output_directory] = output[:output_directory]
21
+ context[:file_prefix] = output[:file_prefix]
22
+ context[:overwrite_existing] = output[:overwrite_existing]
23
+ nil
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Workflows
7
+ module Steps
8
+ module CsvSplit
9
+ class ExecuteStep
10
+ def call(context)
11
+ headers_result = context.fetch(:use_case).read_headers(
12
+ file_path: context.fetch(:file_path),
13
+ col_sep: context.fetch(:col_sep),
14
+ headers_present: context.fetch(:headers_present)
15
+ )
16
+ unless headers_result.ok?
17
+ context.fetch(:handle_error).call(headers_result)
18
+ return :halt
19
+ end
20
+
21
+ result = context.fetch(:use_case).call(session: context.fetch(:session))
22
+ unless result.ok?
23
+ context.fetch(:handle_error).call(result)
24
+ return :halt
25
+ end
26
+
27
+ context.fetch(:presenter).print_summary(result.data.merge(chunk_size: context.fetch(:chunk_size)))
28
+ nil
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Workflows
7
+ module Steps
8
+ module Parity
9
+ class BuildSessionStep
10
+ def call(context)
11
+ context[:session] = context.fetch(:session_builder).call(
12
+ left_path: context.fetch(:left_path),
13
+ right_path: context.fetch(:right_path),
14
+ col_sep: context.fetch(:col_sep),
15
+ headers_present: context.fetch(:headers_present)
16
+ )
17
+ nil
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Workflows
7
+ module Steps
8
+ module Parity
9
+ class CollectInputsStep
10
+ def initialize(file_path_prompt:, separator_prompt:, headers_present_prompt:)
11
+ @file_path_prompt = file_path_prompt
12
+ @separator_prompt = separator_prompt
13
+ @headers_present_prompt = headers_present_prompt
14
+ end
15
+
16
+ def call(context)
17
+ context[:left_path] = @file_path_prompt.call(label: "Left CSV file path: ")
18
+ context[:right_path] = @file_path_prompt.call(label: "Right CSV file path: ")
19
+ col_sep = @separator_prompt.call
20
+ return :halt if col_sep.nil?
21
+
22
+ context[:col_sep] = col_sep
23
+ context[:headers_present] = @headers_present_prompt.call
24
+ nil
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Workflows
7
+ module Steps
8
+ module Parity
9
+ class ExecuteStep
10
+ def call(context)
11
+ result = context.fetch(:use_case).call(session: context.fetch(:session))
12
+ unless result.ok?
13
+ context.fetch(:handle_error).call(result)
14
+ return :halt
15
+ end
16
+
17
+ context.fetch(:presenter).print_summary(result.data)
18
+ nil
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Csvtool
4
- VERSION = "0.5.0.alpha"
4
+ VERSION = "0.7.0.alpha"
5
5
  end
@@ -0,0 +1,160 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/application/use_cases/run_csv_parity"
5
+ require "csvtool/domain/csv_parity_session/source_pair"
6
+ require "csvtool/domain/csv_parity_session/parity_options"
7
+ require "csvtool/domain/csv_parity_session/parity_session"
8
+
9
+ class RunCsvParityTest < Minitest::Test
10
+ class EaccesComparator
11
+ def call(left_path:, right_path:, col_sep:, headers_present:, sample_limit: 5)
12
+ error = Errno::EACCES.new("/tmp/protected.csv")
13
+ def error.path
14
+ "/tmp/protected.csv"
15
+ end
16
+ raise error
17
+ end
18
+ end
19
+
20
+ def fixture_path(name)
21
+ File.expand_path("../../../fixtures/#{name}", __dir__)
22
+ end
23
+
24
+ def build_session(left_path:, right_path:, separator: ",", headers_present: true)
25
+ source_pair = Csvtool::Domain::CsvParitySession::SourcePair.new(
26
+ left_path: left_path,
27
+ right_path: right_path
28
+ )
29
+ options = Csvtool::Domain::CsvParitySession::ParityOptions.new(
30
+ separator: separator,
31
+ headers_present: headers_present
32
+ )
33
+ Csvtool::Domain::CsvParitySession::ParitySession.start(
34
+ source_pair: source_pair,
35
+ options: options
36
+ )
37
+ end
38
+
39
+ def test_returns_match_for_equivalent_files
40
+ result = Csvtool::Application::UseCases::RunCsvParity.new.call(
41
+ session: build_session(
42
+ left_path: fixture_path("sample_people.csv"),
43
+ right_path: fixture_path("parity_people_reordered.csv")
44
+ )
45
+ )
46
+
47
+ assert_equal true, result.ok?
48
+ assert_equal true, result.data[:match]
49
+ assert_equal 0, result.data[:left_only_count]
50
+ assert_equal 0, result.data[:right_only_count]
51
+ end
52
+
53
+ def test_returns_mismatch_counts_for_non_equivalent_files
54
+ result = Csvtool::Application::UseCases::RunCsvParity.new.call(
55
+ session: build_session(
56
+ left_path: fixture_path("sample_people.csv"),
57
+ right_path: fixture_path("parity_people_mismatch.csv")
58
+ )
59
+ )
60
+
61
+ assert_equal true, result.ok?
62
+ assert_equal false, result.data[:match]
63
+ assert_equal 1, result.data[:left_only_count]
64
+ assert_equal 1, result.data[:right_only_count]
65
+ end
66
+
67
+ def test_duplicate_count_differences_are_detected
68
+ result = Csvtool::Application::UseCases::RunCsvParity.new.call(
69
+ session: build_session(
70
+ left_path: fixture_path("parity_duplicates_left.csv"),
71
+ right_path: fixture_path("parity_duplicates_right.csv")
72
+ )
73
+ )
74
+
75
+ assert_equal true, result.ok?
76
+ assert_equal false, result.data[:match]
77
+ assert_equal 1, result.data[:left_only_count]
78
+ assert_equal 0, result.data[:right_only_count]
79
+ assert_equal "1,Alice", result.data[:left_only_examples][0][:row]
80
+ assert_equal 1, result.data[:left_only_examples][0][:count_delta]
81
+ end
82
+
83
+ def test_headered_mode_fails_when_headers_do_not_match
84
+ result = Csvtool::Application::UseCases::RunCsvParity.new.call(
85
+ session: build_session(
86
+ left_path: fixture_path("sample_people.csv"),
87
+ right_path: fixture_path("parity_people_header_mismatch.csv")
88
+ )
89
+ )
90
+
91
+ assert_equal false, result.ok?
92
+ assert_equal :header_mismatch, result.error
93
+ end
94
+
95
+ def test_headerless_mode_compares_all_rows_as_data
96
+ result = Csvtool::Application::UseCases::RunCsvParity.new.call(
97
+ session: build_session(
98
+ left_path: fixture_path("sample_people_no_headers.csv"),
99
+ right_path: fixture_path("sample_people_no_headers.csv"),
100
+ headers_present: false
101
+ )
102
+ )
103
+
104
+ assert_equal true, result.ok?
105
+ assert_equal true, result.data[:match]
106
+ end
107
+
108
+ def test_returns_file_not_found_for_left_side
109
+ result = Csvtool::Application::UseCases::RunCsvParity.new.call(
110
+ session: build_session(
111
+ left_path: "/tmp/nope-left.csv",
112
+ right_path: fixture_path("sample_people.csv")
113
+ )
114
+ )
115
+
116
+ assert_equal false, result.ok?
117
+ assert_equal :file_not_found, result.error
118
+ assert_equal "/tmp/nope-left.csv", result.data[:path]
119
+ end
120
+
121
+ def test_returns_file_not_found_for_right_side
122
+ result = Csvtool::Application::UseCases::RunCsvParity.new.call(
123
+ session: build_session(
124
+ left_path: fixture_path("sample_people.csv"),
125
+ right_path: "/tmp/nope-right.csv"
126
+ )
127
+ )
128
+
129
+ assert_equal false, result.ok?
130
+ assert_equal :file_not_found, result.error
131
+ assert_equal "/tmp/nope-right.csv", result.data[:path]
132
+ end
133
+
134
+ def test_returns_parse_error_for_malformed_csv
135
+ result = Csvtool::Application::UseCases::RunCsvParity.new.call(
136
+ session: build_session(
137
+ left_path: fixture_path("sample_people.csv"),
138
+ right_path: fixture_path("sample_people_bad_tail.csv")
139
+ )
140
+ )
141
+
142
+ assert_equal false, result.ok?
143
+ assert_equal :could_not_parse_csv, result.error
144
+ end
145
+
146
+ def test_returns_cannot_read_file_when_eacces_is_raised
147
+ result = Csvtool::Application::UseCases::RunCsvParity.new(
148
+ comparator: EaccesComparator.new
149
+ ).call(
150
+ session: build_session(
151
+ left_path: fixture_path("sample_people.csv"),
152
+ right_path: fixture_path("sample_people.csv")
153
+ )
154
+ )
155
+
156
+ assert_equal false, result.ok?
157
+ assert_equal :cannot_read_file, result.error
158
+ assert_equal "/tmp/protected.csv", result.data[:path]
159
+ end
160
+ end
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../test_helper"
4
+ require "csvtool/application/use_cases/run_csv_split"
5
+ require "csvtool/domain/csv_split_session/split_source"
6
+ require "csvtool/domain/csv_split_session/split_options"
7
+ require "csvtool/domain/csv_split_session/split_session"
8
+ require "tmpdir"
9
+ require "fileutils"
10
+
11
+ class RunCsvSplitTest < Minitest::Test
12
+ def fixture_path(name)
13
+ File.expand_path("../../../fixtures/#{name}", __dir__)
14
+ end
15
+
16
+ def test_splits_25_rows_into_10_10_5_with_headers
17
+ use_case = Csvtool::Application::UseCases::RunCsvSplit.new
18
+
19
+ Dir.mktmpdir do |dir|
20
+ source_path = File.join(dir, "people.csv")
21
+ FileUtils.cp(fixture_path("split_people_25.csv"), source_path)
22
+
23
+ source = Csvtool::Domain::CsvSplitSession::SplitSource.new(path: source_path, separator: ",", headers_present: true)
24
+ options = Csvtool::Domain::CsvSplitSession::SplitOptions.new(chunk_size: 10)
25
+ session = Csvtool::Domain::CsvSplitSession::SplitSession.start(source: source, options: options)
26
+
27
+ result = use_case.call(session: session)
28
+
29
+ assert result.ok?
30
+ assert_equal 3, result.data[:chunk_count]
31
+ assert_equal 25, result.data[:data_rows]
32
+ assert_equal 3, result.data[:chunk_paths].length
33
+
34
+ chunk_1 = File.read(result.data[:chunk_paths][0]).lines.map(&:strip)
35
+ chunk_2 = File.read(result.data[:chunk_paths][1]).lines.map(&:strip)
36
+ chunk_3 = File.read(result.data[:chunk_paths][2]).lines.map(&:strip)
37
+
38
+ assert_equal 11, chunk_1.length
39
+ assert_equal 11, chunk_2.length
40
+ assert_equal 6, chunk_3.length
41
+ assert_equal "name,city", chunk_1.first
42
+ assert_equal "name,city", chunk_2.first
43
+ assert_equal "name,city", chunk_3.first
44
+ assert_equal "Name01,City01", chunk_1[1]
45
+ assert_equal "Name10,City10", chunk_1[10]
46
+ assert_equal "Name11,City11", chunk_2[1]
47
+ assert_equal "Name20,City20", chunk_2[10]
48
+ assert_equal "Name21,City21", chunk_3[1]
49
+ assert_equal "Name25,City25", chunk_3[5]
50
+ end
51
+ end
52
+
53
+ def test_returns_output_file_exists_when_overwrite_is_disabled
54
+ use_case = Csvtool::Application::UseCases::RunCsvSplit.new
55
+
56
+ Dir.mktmpdir do |dir|
57
+ source_path = File.join(dir, "people.csv")
58
+ FileUtils.cp(fixture_path("split_people_25.csv"), source_path)
59
+ File.write(File.join(dir, "people_part_001.csv"), "sentinel\n")
60
+
61
+ source = Csvtool::Domain::CsvSplitSession::SplitSource.new(path: source_path, separator: ",", headers_present: true)
62
+ options = Csvtool::Domain::CsvSplitSession::SplitOptions.new(chunk_size: 10, overwrite_existing: false)
63
+ session = Csvtool::Domain::CsvSplitSession::SplitSession.start(source: source, options: options)
64
+
65
+ result = use_case.call(session: session)
66
+
67
+ refute result.ok?
68
+ assert_equal :output_file_exists, result.error
69
+ assert_equal File.join(dir, "people_part_001.csv"), result.data[:path]
70
+ end
71
+ end
72
+
73
+ def test_creates_output_directory_when_it_does_not_exist
74
+ use_case = Csvtool::Application::UseCases::RunCsvSplit.new
75
+
76
+ Dir.mktmpdir do |dir|
77
+ source_path = File.join(dir, "people.csv")
78
+ output_dir = File.join(dir, "new_chunks")
79
+ FileUtils.cp(fixture_path("split_people_25.csv"), source_path)
80
+
81
+ source = Csvtool::Domain::CsvSplitSession::SplitSource.new(path: source_path, separator: ",", headers_present: true)
82
+ options = Csvtool::Domain::CsvSplitSession::SplitOptions.new(
83
+ chunk_size: 10,
84
+ output_directory: output_dir,
85
+ file_prefix: "batch"
86
+ )
87
+ session = Csvtool::Domain::CsvSplitSession::SplitSession.start(source: source, options: options)
88
+
89
+ result = use_case.call(session: session)
90
+
91
+ assert result.ok?
92
+ assert Dir.exist?(output_dir)
93
+ assert File.file?(File.join(output_dir, "batch_part_001.csv"))
94
+ end
95
+ end
96
+
97
+ def test_writes_manifest_when_enabled
98
+ use_case = Csvtool::Application::UseCases::RunCsvSplit.new
99
+
100
+ Dir.mktmpdir do |dir|
101
+ source_path = File.join(dir, "people.csv")
102
+ manifest_path = File.join(dir, "manifest.csv")
103
+ FileUtils.cp(fixture_path("split_people_25.csv"), source_path)
104
+
105
+ source = Csvtool::Domain::CsvSplitSession::SplitSource.new(path: source_path, separator: ",", headers_present: true)
106
+ options = Csvtool::Domain::CsvSplitSession::SplitOptions.new(
107
+ chunk_size: 10,
108
+ write_manifest: true,
109
+ manifest_path: manifest_path
110
+ )
111
+ session = Csvtool::Domain::CsvSplitSession::SplitSession.start(source: source, options: options)
112
+
113
+ result = use_case.call(session: session)
114
+
115
+ assert result.ok?
116
+ assert_equal manifest_path, result.data[:manifest_path]
117
+ lines = File.read(manifest_path).lines.map(&:strip)
118
+ assert_equal "chunk_index,chunk_path,row_count", lines.first
119
+ assert_includes lines[1], ",10"
120
+ assert_includes lines[2], ",10"
121
+ assert_includes lines[3], ",5"
122
+ end
123
+ end
124
+ end