csvops 0.3.0.alpha → 0.5.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +69 -149
  3. data/docs/architecture.md +396 -0
  4. data/docs/release-v0.4.0-alpha.md +87 -0
  5. data/docs/release-v0.5.0-alpha.md +89 -0
  6. data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +96 -0
  7. data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
  8. data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
  9. data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
  10. data/lib/csvtool/cli.rb +11 -7
  11. data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
  12. data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
  13. data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
  14. data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
  15. data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
  16. data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +1 -0
  17. data/lib/csvtool/domain/row_session/row_source.rb +3 -0
  18. data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
  19. data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
  20. data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
  21. data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
  22. data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
  23. data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
  24. data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
  25. data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
  26. data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
  27. data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
  28. data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
  29. data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
  30. data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
  31. data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
  32. data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
  33. data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
  34. data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
  35. data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
  36. data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
  37. data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
  38. data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
  39. data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +86 -0
  40. data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
  41. data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
  42. data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
  43. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
  44. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
  45. data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
  46. data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
  47. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
  48. data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
  49. data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
  50. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
  51. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
  52. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
  53. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
  54. data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
  55. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
  56. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
  57. data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
  58. data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
  59. data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
  60. data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
  61. data/lib/csvtool/version.rb +1 -1
  62. data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
  63. data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +141 -0
  64. data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
  65. data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
  66. data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
  67. data/test/csvtool/cli_test.rb +130 -16
  68. data/test/csvtool/cli_unit_test.rb +16 -3
  69. data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
  70. data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
  71. data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
  72. data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
  73. data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
  74. data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
  75. data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
  76. data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +2 -2
  77. data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +15 -1
  78. data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
  79. data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
  80. data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
  81. data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
  82. data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
  83. data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
  84. data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
  85. data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
  86. data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
  87. data/test/csvtool/interface/cli/menu_loop_test.rb +50 -13
  88. data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
  89. data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
  90. data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
  91. data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
  92. data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
  93. data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
  94. data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
  95. data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
  96. data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
  97. data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
  98. data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
  99. data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
  100. data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
  101. data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
  102. data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
  103. data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
  104. data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
  105. data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
  106. data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
  107. data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
  108. data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
  109. data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
  110. data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
  111. data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
  112. data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
  113. data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
  114. data/test/fixtures/dedupe_reference.csv +3 -0
  115. data/test/fixtures/dedupe_reference.tsv +3 -0
  116. data/test/fixtures/dedupe_reference_all.csv +5 -0
  117. data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
  118. data/test/fixtures/dedupe_reference_none.csv +2 -0
  119. data/test/fixtures/dedupe_reference_normalization.csv +3 -0
  120. data/test/fixtures/dedupe_source.csv +6 -0
  121. data/test/fixtures/dedupe_source.tsv +6 -0
  122. data/test/fixtures/dedupe_source_no_headers.csv +5 -0
  123. data/test/fixtures/dedupe_source_normalization.csv +4 -0
  124. metadata +93 -8
  125. data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +0 -31
  126. data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
  127. data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
  128. data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +0 -21
  129. data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
@@ -0,0 +1,87 @@
1
+ # Release Checklist: v0.4.0-alpha
2
+
3
+ ## 1. Verify environment
4
+
5
+ ```bash
6
+ ruby -v
7
+ bundle -v
8
+ ```
9
+
10
+ Expected:
11
+ - Ruby `3.3.0`
12
+
13
+ ## 2. Install dependencies
14
+
15
+ ```bash
16
+ bundle install
17
+ ```
18
+
19
+ ## 3. Run quality checks
20
+
21
+ ```bash
22
+ bundle exec rake test
23
+ ```
24
+
25
+ ## 4. Smoke test CLI commands
26
+
27
+ ```bash
28
+ bundle exec csvtool menu
29
+ bundle exec csvtool column test/fixtures/sample_people.csv name
30
+ ```
31
+
32
+ ## 5. Smoke test workflows
33
+
34
+ ### Row randomization workflow
35
+
36
+ Use menu option `3` (`Randomize rows`) and verify:
37
+ - headered CSV output keeps header in first row
38
+ - seeded mode is reproducible
39
+ - file output path writes valid CSV
40
+ - headerless mode randomizes all rows
41
+
42
+ ### Cross-CSV dedupe workflow
43
+
44
+ Use menu option `4` (`Dedupe using another CSV`) and verify:
45
+ - headered + comma happy path produces expected retained rows
46
+ - headerless + index selectors work
47
+ - TSV separators work
48
+ - normalization toggles (`trim`, `case-insensitive`) behave as expected
49
+ - diagnostics render for `no matches` and `all removed`
50
+ - file output mode writes expected CSV
51
+
52
+ ## 6. Build and validate gem package
53
+
54
+ ```bash
55
+ gem build csvops.gemspec
56
+ gem install ./csvops-0.4.0.alpha.gem
57
+ csvtool menu
58
+ ```
59
+
60
+ ## 7. Commit release prep
61
+
62
+ ```bash
63
+ git add -A
64
+ git commit -m "chore(release): prepare v0.4.0-alpha"
65
+ ```
66
+
67
+ ## 8. Tag release
68
+
69
+ ```bash
70
+ git tag -a v0.4.0-alpha -m "v0.4.0-alpha"
71
+ git push origin main --tags
72
+ ```
73
+
74
+ ## 9. Publish gem (optional for alpha)
75
+
76
+ ```bash
77
+ gem push csvops-0.4.0.alpha.gem
78
+ ```
79
+
80
+ ## 10. Create GitHub release
81
+
82
+ Create release `v0.4.0-alpha` with:
83
+ - Cross-CSV dedupe workflow with normalization options and large-file streaming behavior
84
+ - Dedupe domain model (`CrossCsvDedupeSession`) with stronger invariants
85
+ - Shared-kernel `OutputDestination` value object across workflows
86
+ - Architecture/docs split (`README` + `docs/architecture.md`) with UML diagrams
87
+ - Dedupe boundary cleanup: CLI workflow (`RunCrossCsvDedupeWorkflow`) and application use-case separation
@@ -0,0 +1,89 @@
1
+ # Release Checklist: v0.5.0-alpha
2
+
3
+ ## 1. Verify environment
4
+
5
+ ```bash
6
+ ruby -v
7
+ bundle -v
8
+ ```
9
+
10
+ Expected:
11
+ - Ruby `3.3.x`
12
+
13
+ ## 2. Install dependencies
14
+
15
+ ```bash
16
+ bundle install
17
+ ```
18
+
19
+ ## 3. Run quality checks
20
+
21
+ ```bash
22
+ bundle exec rake test
23
+ ```
24
+
25
+ ## 4. Smoke test CLI commands
26
+
27
+ ```bash
28
+ bundle exec csvtool menu
29
+ bundle exec csvtool column test/fixtures/sample_people.csv name
30
+ ```
31
+
32
+ ## 5. Smoke test workflows
33
+
34
+ ### Row extraction workflow
35
+
36
+ Use menu option `2` (`Extract rows (range)`) and verify:
37
+ - headered CSV rows print correctly in console mode
38
+ - out-of-bounds row range shows friendly message
39
+ - file output mode writes expected CSV rows
40
+
41
+ ### Row randomization workflow
42
+
43
+ Use menu option `3` (`Randomize rows`) and verify:
44
+ - seeded mode is reproducible
45
+ - headered and headerless modes both work
46
+ - file output path writes valid randomized CSV
47
+
48
+ ### Cross-CSV dedupe workflow
49
+
50
+ Use menu option `4` (`Dedupe using another CSV`) and verify:
51
+ - expected retained rows for headered source/reference files
52
+ - separator/header-mode combinations still work
53
+ - file output mode writes expected deduped CSV
54
+
55
+ ## 6. Build and validate gem package
56
+
57
+ ```bash
58
+ gem build csvops.gemspec
59
+ gem install ./csvops-0.5.0.alpha.gem
60
+ csvtool menu
61
+ ```
62
+
63
+ ## 7. Commit release prep
64
+
65
+ ```bash
66
+ git add -A
67
+ git commit -m "chore(release): prepare v0.5.0-alpha"
68
+ ```
69
+
70
+ ## 8. Tag release
71
+
72
+ ```bash
73
+ git tag -a v0.5.0-alpha -m "v0.5.0-alpha"
74
+ git push origin main --tags
75
+ ```
76
+
77
+ ## 9. Publish gem (optional for alpha)
78
+
79
+ ```bash
80
+ gem push csvops-0.5.0.alpha.gem
81
+ ```
82
+
83
+ ## 10. Create GitHub release
84
+
85
+ Create release `v0.5.0-alpha` with:
86
+ - Use-case file-write boundary cleanup across all workflows
87
+ - New infrastructure file-writer adapters for row randomization and cross-CSV dedupe
88
+ - Final architecture boundary audit with guard test for direct write APIs in use cases
89
+ - Updated architecture diagrams to reflect current writer adapter dependencies
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+ require "csvtool/infrastructure/csv/header_reader"
5
+ require "csvtool/infrastructure/csv/cross_csv_deduper"
6
+ require "csvtool/infrastructure/csv/selector_validator"
7
+ require "csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer"
8
+
9
+ module Csvtool
10
+ module Application
11
+ module UseCases
12
+ class RunCrossCsvDedupe
13
+ Result = Struct.new(:ok, :error, :data, keyword_init: true) do
14
+ def ok?
15
+ ok
16
+ end
17
+ end
18
+
19
+ def initialize(
20
+ header_reader: Infrastructure::CSV::HeaderReader.new,
21
+ deduper: Infrastructure::CSV::CrossCsvDeduper.new,
22
+ selector_validator: Infrastructure::CSV::SelectorValidator.new(header_reader: header_reader),
23
+ csv_cross_csv_dedupe_file_writer: nil
24
+ )
25
+ @header_reader = header_reader
26
+ @deduper = deduper
27
+ @selector_validator = selector_validator
28
+ @csv_cross_csv_dedupe_file_writer = csv_cross_csv_dedupe_file_writer || Infrastructure::Output::CsvCrossCsvDedupeFileWriter.new(
29
+ deduper: @deduper
30
+ )
31
+ end
32
+
33
+ def call(session:, on_header: nil, on_row: nil)
34
+ current_read_path = session.source.path
35
+ return failure(:column_not_found) unless @selector_validator.valid?(profile: session.source, selector: session.key_mapping.source_selector)
36
+
37
+ current_read_path = session.reference.path
38
+ return failure(:column_not_found) unless @selector_validator.valid?(profile: session.reference, selector: session.key_mapping.reference_selector)
39
+
40
+ source_headers = session.source.headers_present? ? @header_reader.call(file_path: session.source.path, col_sep: session.source.separator) : nil
41
+ current_read_path = session.source.path
42
+
43
+ if session.output_destination.file?
44
+ write_file(session: session, source_headers: source_headers)
45
+ else
46
+ on_header.call(source_headers) if on_header && source_headers
47
+ stats = @deduper.each_retained(**dedupe_options(session)) do |fields|
48
+ on_row.call(fields) if on_row
49
+ end
50
+ success(stats: stats)
51
+ end
52
+ rescue CSV::MalformedCSVError
53
+ failure(:could_not_parse_csv)
54
+ rescue Errno::EACCES, Errno::ENOENT => e
55
+ if session.output_destination.file?
56
+ failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class)
57
+ else
58
+ failure(:cannot_read_file, path: current_read_path || session.source.path)
59
+ end
60
+ end
61
+
62
+ private
63
+
64
+ def write_file(session:, source_headers:)
65
+ stats = @csv_cross_csv_dedupe_file_writer.call(
66
+ path: session.output_destination.path,
67
+ headers: source_headers,
68
+ col_sep: session.source.separator,
69
+ dedupe_options: dedupe_options(session)
70
+ )
71
+ success(stats: stats, output_path: session.output_destination.path)
72
+ end
73
+
74
+ def dedupe_options(session)
75
+ {
76
+ source_path: session.source.path,
77
+ reference_path: session.reference.path,
78
+ source_selector: session.key_mapping.source_selector,
79
+ reference_selector: session.key_mapping.reference_selector,
80
+ source_col_sep: session.source.separator,
81
+ reference_col_sep: session.reference.separator,
82
+ match_options: session.match_options
83
+ }
84
+ end
85
+
86
+ def success(data)
87
+ Result.new(ok: true, error: nil, data: data)
88
+ end
89
+
90
+ def failure(code, data = {})
91
+ Result.new(ok: false, error: code, data: data)
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -1,64 +1,47 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "csv"
4
- require "csvtool/interface/cli/errors/presenter"
5
- require "csvtool/interface/cli/prompts/file_path_prompt"
6
- require "csvtool/interface/cli/prompts/separator_prompt"
7
- require "csvtool/interface/cli/prompts/column_selector_prompt"
8
- require "csvtool/interface/cli/prompts/skip_blanks_prompt"
9
- require "csvtool/interface/cli/prompts/confirm_prompt"
10
- require "csvtool/interface/cli/prompts/output_destination_prompt"
11
4
  require "csvtool/infrastructure/csv/header_reader"
12
5
  require "csvtool/infrastructure/csv/value_streamer"
13
- require "csvtool/services/preview_builder"
14
- require "csvtool/infrastructure/output/console_writer"
15
6
  require "csvtool/infrastructure/output/csv_file_writer"
16
- require "csvtool/domain/column_session/separator"
17
- require "csvtool/domain/column_session/csv_source"
18
- require "csvtool/domain/column_session/column_selection"
19
- require "csvtool/domain/column_session/extraction_options"
20
- require "csvtool/domain/column_session/extraction_value"
21
- require "csvtool/domain/column_session/preview"
22
- require "csvtool/domain/column_session/output_destination"
23
- require "csvtool/domain/column_session/column_session"
7
+ require "csvtool/services/preview_builder"
24
8
 
25
9
  module Csvtool
26
10
  module Application
27
11
  module UseCases
28
12
  class RunExtraction
29
- def initialize(stdin:, stdout:)
30
- @stdin = stdin
31
- @stdout = stdout
32
- @errors = Interface::CLI::Errors::Presenter.new(stdout: stdout)
33
- @header_reader = Infrastructure::CSV::HeaderReader.new
34
- @value_streamer = Infrastructure::CSV::ValueStreamer.new
35
- @preview_builder = Services::PreviewBuilder.new(value_streamer: @value_streamer)
13
+ Result = Struct.new(:ok, :error, :data, keyword_init: true) do
14
+ def ok?
15
+ ok
16
+ end
36
17
  end
37
18
 
38
- def call
39
- file_path = Interface::CLI::Prompts::FilePathPrompt.new(stdin: @stdin, stdout: @stdout).call
40
- return @errors.file_not_found(file_path) unless File.file?(file_path)
41
-
42
- col_sep = Interface::CLI::Prompts::SeparatorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors).call
43
- return if col_sep.nil?
44
- separator = Domain::ColumnSession::Separator.new(col_sep)
19
+ def initialize(
20
+ header_reader: Infrastructure::CSV::HeaderReader.new,
21
+ value_streamer: Infrastructure::CSV::ValueStreamer.new,
22
+ preview_builder: nil,
23
+ csv_file_writer: nil
24
+ )
25
+ @header_reader = header_reader
26
+ @value_streamer = value_streamer
27
+ @preview_builder = preview_builder || Services::PreviewBuilder.new(value_streamer: value_streamer)
28
+ @csv_file_writer = csv_file_writer || Infrastructure::Output::CsvFileWriter.new(value_streamer: @value_streamer)
29
+ end
45
30
 
46
- source = Domain::ColumnSession::CsvSource.new(path: file_path, separator: separator)
47
- headers = @header_reader.call(file_path: source.path, col_sep: source.separator.value)
48
- return @errors.no_headers if headers.empty?
31
+ def read_headers(file_path:, col_sep:)
32
+ return failure(:file_not_found, path: file_path) unless File.file?(file_path)
49
33
 
50
- column_name = Interface::CLI::Prompts::ColumnSelectorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors).call(headers)
51
- return if column_name.nil?
52
- column_selection = Domain::ColumnSession::ColumnSelection.new(name: column_name)
34
+ headers = @header_reader.call(file_path: file_path, col_sep: col_sep)
35
+ return failure(:no_headers) if headers.empty?
53
36
 
54
- skip_blanks = Interface::CLI::Prompts::SkipBlanksPrompt.new(stdin: @stdin, stdout: @stdout).call
55
- options = Domain::ColumnSession::ExtractionOptions.new(skip_blanks: skip_blanks, preview_limit: 10)
56
- session = Domain::ColumnSession::ColumnSession.start(
57
- source: source,
58
- column_selection: column_selection,
59
- options: options
60
- )
37
+ success(headers: headers)
38
+ rescue CSV::MalformedCSVError
39
+ failure(:could_not_parse_csv)
40
+ rescue Errno::EACCES
41
+ failure(:cannot_read_file, path: file_path)
42
+ end
61
43
 
44
+ def preview(session:)
62
45
  preview_values = @preview_builder.call(
63
46
  file_path: session.source.path,
64
47
  column_name: session.column_selection.name,
@@ -66,58 +49,50 @@ module Csvtool
66
49
  skip_blanks: session.options.skip_blanks?,
67
50
  limit: session.options.preview_limit
68
51
  )
69
- preview = Domain::ColumnSession::Preview.new(
70
- values: preview_values.map { |value| Domain::ColumnSession::ExtractionValue.new(value) }
71
- )
72
- session = session.with_preview(preview)
73
-
74
- confirmed = Interface::CLI::Prompts::ConfirmPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors).call(session.preview.to_strings)
75
- return unless confirmed
76
- session = session.confirm!
77
-
78
- output_destination = Interface::CLI::Prompts::OutputDestinationPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors).call
79
- return if output_destination.nil?
80
- domain_destination =
81
- if output_destination[:mode] == :file
82
- Domain::ColumnSession::OutputDestination.file(path: output_destination[:path])
83
- else
84
- Domain::ColumnSession::OutputDestination.console
85
- end
86
- session = session.with_output_destination(domain_destination)
87
-
88
- write_output(
89
- session.output_destination,
90
- file_path: session.source.path,
91
- column_name: session.column_selection.name,
92
- col_sep: session.source.separator.value,
93
- skip_blanks: session.options.skip_blanks?
94
- )
52
+ success(preview_values: preview_values)
95
53
  rescue CSV::MalformedCSVError
96
- @errors.could_not_parse_csv
54
+ failure(:could_not_parse_csv)
97
55
  rescue Errno::EACCES
98
- @errors.cannot_read_file(file_path)
56
+ failure(:cannot_read_file, path: session.source.path)
99
57
  end
100
58
 
101
- private
102
-
103
- def writer_for(output_destination)
104
- if output_destination.file?
105
- Infrastructure::Output::CsvFileWriter.new(stdout: @stdout, errors: @errors, value_streamer: @value_streamer)
59
+ def extract(session:, on_value: nil)
60
+ if session.output_destination.file?
61
+ @csv_file_writer.call(
62
+ output_path: session.output_destination.path,
63
+ file_path: session.source.path,
64
+ column_name: session.column_selection.name,
65
+ col_sep: session.source.separator.value,
66
+ skip_blanks: session.options.skip_blanks?
67
+ )
68
+ success(output_path: session.output_destination.path)
106
69
  else
107
- Infrastructure::Output::ConsoleWriter.new(stdout: @stdout, value_streamer: @value_streamer)
70
+ @value_streamer.each(
71
+ file_path: session.source.path,
72
+ column_name: session.column_selection.name,
73
+ col_sep: session.source.separator.value,
74
+ skip_blanks: session.options.skip_blanks?
75
+ ) { |value| on_value.call(value) if on_value }
76
+ success({})
108
77
  end
78
+ rescue CSV::MalformedCSVError
79
+ failure(:could_not_parse_csv)
80
+ rescue Errno::EACCES, Errno::ENOENT => e
81
+ if session.output_destination.file?
82
+ failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class)
83
+ else
84
+ failure(:cannot_read_file, path: session.source.path)
85
+ end
86
+ end
87
+
88
+ private
89
+
90
+ def success(data)
91
+ Result.new(ok: true, error: nil, data: data)
109
92
  end
110
93
 
111
- def write_output(output_destination, file_path:, column_name:, col_sep:, skip_blanks:)
112
- writer = writer_for(output_destination)
113
- args = {
114
- file_path: file_path,
115
- column_name: column_name,
116
- col_sep: col_sep,
117
- skip_blanks: skip_blanks
118
- }
119
- args[:output_path] = output_destination.path if output_destination.file?
120
- writer.call(**args)
94
+ def failure(code, data = {})
95
+ Result.new(ok: false, error: code, data: data)
121
96
  end
122
97
  end
123
98
  end
@@ -1,74 +1,46 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "csv"
4
- require "csvtool/interface/cli/errors/presenter"
5
- require "csvtool/interface/cli/prompts/file_path_prompt"
6
- require "csvtool/interface/cli/prompts/separator_prompt"
7
- require "csvtool/interface/cli/prompts/output_destination_prompt"
8
4
  require "csvtool/infrastructure/csv/header_reader"
9
5
  require "csvtool/infrastructure/csv/row_streamer"
10
- require "csvtool/infrastructure/output/csv_row_console_writer"
11
6
  require "csvtool/infrastructure/output/csv_row_file_writer"
12
- require "csvtool/domain/row_session/row_range"
13
- require "csvtool/domain/row_session/row_source"
14
- require "csvtool/domain/row_session/row_output_destination"
15
- require "csvtool/domain/row_session/row_session"
16
7
 
17
8
  module Csvtool
18
9
  module Application
19
10
  module UseCases
20
11
  class RunRowExtraction
21
- def initialize(stdin:, stdout:)
22
- @stdin = stdin
23
- @stdout = stdout
24
- @errors = Interface::CLI::Errors::Presenter.new(stdout: stdout)
25
- @header_reader = Infrastructure::CSV::HeaderReader.new
26
- @row_streamer = Infrastructure::CSV::RowStreamer.new
12
+ Result = Struct.new(:ok, :error, :data, keyword_init: true) do
13
+ def ok?
14
+ ok
15
+ end
27
16
  end
28
17
 
29
- def call
30
- file_path = Interface::CLI::Prompts::FilePathPrompt.new(stdin: @stdin, stdout: @stdout).call
31
- return @errors.file_not_found(file_path) unless File.file?(file_path)
32
-
33
- col_sep = Interface::CLI::Prompts::SeparatorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors).call
34
- return if col_sep.nil?
35
- source = Domain::RowSession::RowSource.new(path: file_path, separator: col_sep)
36
-
37
- @stdout.print "Start row (1-based, inclusive): "
38
- start_row_input = @stdin.gets&.strip.to_s
39
- @stdout.print "End row (1-based, inclusive): "
40
- end_row_input = @stdin.gets&.strip.to_s
18
+ def initialize(
19
+ header_reader: Infrastructure::CSV::HeaderReader.new,
20
+ row_streamer: Infrastructure::CSV::RowStreamer.new,
21
+ csv_row_file_writer: nil
22
+ )
23
+ @header_reader = header_reader
24
+ @row_streamer = row_streamer
25
+ @csv_row_file_writer = csv_row_file_writer || Infrastructure::Output::CsvRowFileWriter.new(row_streamer: @row_streamer)
26
+ end
41
27
 
42
- headers = @header_reader.call(file_path: source.path, col_sep: source.separator)
43
- return @errors.no_headers if headers.empty?
28
+ def read_headers(file_path:, col_sep:)
29
+ return failure(:file_not_found, path: file_path) unless File.file?(file_path)
44
30
 
45
- row_range = Domain::RowSession::RowRange.from_inputs(
46
- start_row_input: start_row_input,
47
- end_row_input: end_row_input
48
- )
49
- session = Domain::RowSession::RowSession.start(source: source, row_range: row_range)
31
+ headers = @header_reader.call(file_path: file_path, col_sep: col_sep)
32
+ return failure(:no_headers) if headers.empty?
50
33
 
51
- output_destination = Interface::CLI::Prompts::OutputDestinationPrompt.new(
52
- stdin: @stdin,
53
- stdout: @stdout,
54
- errors: @errors
55
- ).call
56
- return if output_destination.nil?
57
- destination =
58
- if output_destination[:mode] == :file
59
- Domain::RowSession::RowOutputDestination.file(path: output_destination[:path])
60
- else
61
- Domain::RowSession::RowOutputDestination.console
62
- end
63
- session = session.with_output_destination(destination)
34
+ success(headers: headers)
35
+ rescue CSV::MalformedCSVError
36
+ failure(:could_not_parse_csv)
37
+ rescue Errno::EACCES
38
+ failure(:cannot_read_file, path: file_path)
39
+ end
64
40
 
65
- stats =
41
+ def extract(session:, headers:, on_row: nil)
66
42
  if session.output_destination.file?
67
- Infrastructure::Output::CsvRowFileWriter.new(
68
- stdout: @stdout,
69
- errors: @errors,
70
- row_streamer: @row_streamer
71
- ).call(
43
+ stats = @csv_row_file_writer.call(
72
44
  output_path: session.output_destination.path,
73
45
  file_path: session.source.path,
74
46
  col_sep: session.source.separator,
@@ -76,35 +48,35 @@ module Csvtool
76
48
  start_row: session.row_range.start_row,
77
49
  end_row: session.row_range.end_row
78
50
  )
51
+ success(stats.merge(output_path: session.output_destination.path))
79
52
  else
80
- Infrastructure::Output::CsvRowConsoleWriter.new(stdout: @stdout, row_streamer: @row_streamer).call(
53
+ stats = @row_streamer.each_in_range(
81
54
  file_path: session.source.path,
82
55
  col_sep: session.source.separator,
83
- headers: headers,
84
56
  start_row: session.row_range.start_row,
85
57
  end_row: session.row_range.end_row
86
- )
58
+ ) { |fields| on_row.call(fields) if on_row }
59
+ success(stats)
87
60
  end
88
- return if stats.nil?
89
-
90
- @errors.row_range_out_of_bounds(stats[:row_count]) unless stats[:matched]
91
- rescue Domain::RowSession::InvalidStartRowError
92
- @errors.invalid_start_row
93
- rescue Domain::RowSession::InvalidEndRowError
94
- @errors.invalid_end_row
95
- rescue Domain::RowSession::InvalidRowRangeOrderError
96
- @errors.invalid_row_range_order
97
- rescue ArgumentError => e
98
- return @errors.empty_output_path if e.message == "file output path cannot be empty"
99
-
100
- raise e
101
61
  rescue CSV::MalformedCSVError
102
- @errors.could_not_parse_csv
103
- rescue Errno::EACCES
104
- @errors.cannot_read_file(file_path)
62
+ failure(:could_not_parse_csv)
63
+ rescue Errno::EACCES, Errno::ENOENT => e
64
+ if session.output_destination.file?
65
+ failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class)
66
+ else
67
+ failure(:cannot_read_file, path: session.source.path)
68
+ end
105
69
  end
106
-
70
+
107
71
  private
72
+
73
+ def success(data)
74
+ Result.new(ok: true, error: nil, data: data)
75
+ end
76
+
77
+ def failure(code, data = {})
78
+ Result.new(ok: false, error: code, data: data)
79
+ end
108
80
  end
109
81
  end
110
82
  end