csvops 0.3.0.alpha → 0.5.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -149
- data/docs/architecture.md +396 -0
- data/docs/release-v0.4.0-alpha.md +87 -0
- data/docs/release-v0.5.0-alpha.md +89 -0
- data/lib/csvtool/application/use_cases/run_cross_csv_dedupe.rb +96 -0
- data/lib/csvtool/application/use_cases/run_extraction.rb +63 -88
- data/lib/csvtool/application/use_cases/run_row_extraction.rb +45 -73
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +56 -73
- data/lib/csvtool/cli.rb +11 -7
- data/lib/csvtool/domain/cross_csv_dedupe_session/column_selector.rb +44 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session.rb +46 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/csv_profile.rb +24 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/key_mapping.rb +22 -0
- data/lib/csvtool/domain/cross_csv_dedupe_session/match_options.rb +29 -0
- data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +1 -0
- data/lib/csvtool/domain/row_session/row_source.rb +3 -0
- data/lib/csvtool/domain/{column_session → shared}/output_destination.rb +1 -1
- data/lib/csvtool/infrastructure/csv/cross_csv_deduper.rb +85 -0
- data/lib/csvtool/infrastructure/csv/selector_validator.rb +30 -0
- data/lib/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_file_writer.rb +1 -7
- data/lib/csvtool/infrastructure/output/csv_randomized_row_file_writer.rb +23 -0
- data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +2 -9
- data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
- data/lib/csvtool/interface/cli/prompts/dedupe_key_selector_prompt.rb +30 -0
- data/lib/csvtool/interface/cli/prompts/file_path_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/separator_prompt.rb +4 -2
- data/lib/csvtool/interface/cli/prompts/yes_no_prompt.rb +26 -0
- data/lib/csvtool/interface/cli/workflows/builders/column_session_builder.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_extraction_session_builder.rb +22 -0
- data/lib/csvtool/interface/cli/workflows/builders/row_randomization_session_builder.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/presenters/column_extraction_presenter.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter.rb +39 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_extraction_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/presenters/row_randomization_presenter.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_extraction_workflow.rb +88 -0
- data/lib/csvtool/interface/cli/workflows/run_row_extraction_workflow.rb +86 -0
- data/lib/csvtool/interface/cli/workflows/run_row_randomization_workflow.rb +80 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step.rb +55 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_profiles_step.rb +52 -0
- data/lib/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/execute_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/build_preview_step.rb +40 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_destination_step.rb +28 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step.rb +47 -0
- data/lib/csvtool/interface/cli/workflows/steps/extraction/execute_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_destination_step.rb +33 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_range_step.rb +35 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step.rb +32 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/execute_step.rb +43 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step.rb +29 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_destination_step.rb +34 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step.rb +49 -0
- data/lib/csvtool/interface/cli/workflows/steps/row_randomization/execute_step.rb +37 -0
- data/lib/csvtool/interface/cli/workflows/steps/workflow_step_pipeline.rb +25 -0
- data/lib/csvtool/interface/cli/workflows/support/output_destination_mapper.rb +23 -0
- data/lib/csvtool/interface/cli/workflows/support/result_error_handler.rb +22 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/io_boundary_test.rb +26 -0
- data/test/csvtool/application/use_cases/run_cross_csv_dedupe_test.rb +141 -0
- data/test/csvtool/application/use_cases/run_extraction_test.rb +72 -16
- data/test/csvtool/application/use_cases/run_row_extraction_test.rb +82 -102
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +96 -86
- data/test/csvtool/cli_test.rb +130 -16
- data/test/csvtool/cli_unit_test.rb +16 -3
- data/test/csvtool/domain/column_session/column_session_test.rb +2 -2
- data/test/csvtool/domain/column_session/csv_source_test.rb +10 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/column_selector_test.rb +42 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/cross_csv_dedupe_session_test.rb +75 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/csv_profile_test.rb +26 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/key_mapping_test.rb +31 -0
- data/test/csvtool/domain/cross_csv_dedupe_session/match_options_test.rb +52 -0
- data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +2 -2
- data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +15 -1
- data/test/csvtool/domain/row_session/row_session_test.rb +2 -2
- data/test/csvtool/domain/row_session/row_source_test.rb +16 -0
- data/test/csvtool/domain/shared/output_destination_test.rb +24 -0
- data/test/csvtool/infrastructure/csv/cross_csv_deduper_test.rb +155 -0
- data/test/csvtool/infrastructure/csv/selector_validator_test.rb +72 -0
- data/test/csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_file_writer_test.rb +0 -4
- data/test/csvtool/infrastructure/output/csv_randomized_row_file_writer_test.rb +32 -0
- data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +1 -4
- data/test/csvtool/interface/cli/menu_loop_test.rb +50 -13
- data/test/csvtool/interface/cli/prompts/dedupe_key_selector_prompt_test.rb +30 -0
- data/test/csvtool/interface/cli/prompts/file_path_prompt_test.rb +9 -0
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/separator_prompt_test.rb +10 -0
- data/test/csvtool/interface/cli/prompts/yes_no_prompt_test.rb +22 -0
- data/test/csvtool/interface/cli/workflows/builders/column_session_builder_test.rb +17 -0
- data/test/csvtool/interface/cli/workflows/builders/cross_csv_dedupe_session_builder_test.rb +36 -0
- data/test/csvtool/interface/cli/workflows/builders/row_extraction_session_builder_test.rb +21 -0
- data/test/csvtool/interface/cli/workflows/builders/row_randomization_session_builder_test.rb +26 -0
- data/test/csvtool/interface/cli/workflows/presenters/column_extraction_presenter_test.rb +24 -0
- data/test/csvtool/interface/cli/workflows/presenters/cross_csv_dedupe_presenter_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_extraction_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/presenters/row_randomization_presenter_test.rb +33 -0
- data/test/csvtool/interface/cli/workflows/run_cross_csv_dedupe_workflow_test.rb +246 -0
- data/test/csvtool/interface/cli/workflows/run_extraction_workflow_test.rb +56 -0
- data/test/csvtool/interface/cli/workflows/run_row_extraction_workflow_test.rb +83 -0
- data/test/csvtool/interface/cli/workflows/run_row_randomization_workflow_test.rb +69 -0
- data/test/csvtool/interface/cli/workflows/steps/cross_csv_dedupe/collect_options_step_test.rb +41 -0
- data/test/csvtool/interface/cli/workflows/steps/extraction/collect_inputs_step_test.rb +66 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/collect_source_step_test.rb +39 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/execute_step_test.rb +91 -0
- data/test/csvtool/interface/cli/workflows/steps/row_extraction/read_headers_step_test.rb +57 -0
- data/test/csvtool/interface/cli/workflows/steps/row_randomization/collect_inputs_step_test.rb +37 -0
- data/test/csvtool/interface/cli/workflows/steps/workflow_step_pipeline_test.rb +30 -0
- data/test/csvtool/interface/cli/workflows/support/output_destination_mapper_test.rb +23 -0
- data/test/csvtool/interface/cli/workflows/support/result_error_handler_test.rb +34 -0
- data/test/fixtures/dedupe_reference.csv +3 -0
- data/test/fixtures/dedupe_reference.tsv +3 -0
- data/test/fixtures/dedupe_reference_all.csv +5 -0
- data/test/fixtures/dedupe_reference_no_headers.csv +2 -0
- data/test/fixtures/dedupe_reference_none.csv +2 -0
- data/test/fixtures/dedupe_reference_normalization.csv +3 -0
- data/test/fixtures/dedupe_source.csv +6 -0
- data/test/fixtures/dedupe_source.tsv +6 -0
- data/test/fixtures/dedupe_source_no_headers.csv +5 -0
- data/test/fixtures/dedupe_source_normalization.csv +4 -0
- metadata +93 -8
- data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +0 -31
- data/lib/csvtool/domain/row_session/row_output_destination.rb +0 -31
- data/test/csvtool/domain/column_session/output_destination_test.rb +0 -18
- data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +0 -21
- data/test/csvtool/domain/row_session/row_output_destination_test.rb +0 -23
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Release Checklist: v0.4.0-alpha
|
|
2
|
+
|
|
3
|
+
## 1. Verify environment
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
ruby -v
|
|
7
|
+
bundle -v
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
Expected:
|
|
11
|
+
- Ruby `3.3.0`
|
|
12
|
+
|
|
13
|
+
## 2. Install dependencies
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
bundle install
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## 3. Run quality checks
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
bundle exec rake test
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## 4. Smoke test CLI commands
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
bundle exec csvtool menu
|
|
29
|
+
bundle exec csvtool column test/fixtures/sample_people.csv name
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## 5. Smoke test workflows
|
|
33
|
+
|
|
34
|
+
### Row randomization workflow
|
|
35
|
+
|
|
36
|
+
Use menu option `3` (`Randomize rows`) and verify:
|
|
37
|
+
- headered CSV output keeps header in first row
|
|
38
|
+
- seeded mode is reproducible
|
|
39
|
+
- file output path writes valid CSV
|
|
40
|
+
- headerless mode randomizes all rows
|
|
41
|
+
|
|
42
|
+
### Cross-CSV dedupe workflow
|
|
43
|
+
|
|
44
|
+
Use menu option `4` (`Dedupe using another CSV`) and verify:
|
|
45
|
+
- headered + comma happy path produces expected retained rows
|
|
46
|
+
- headerless + index selectors work
|
|
47
|
+
- TSV separators work
|
|
48
|
+
- normalization toggles (`trim`, `case-insensitive`) behave as expected
|
|
49
|
+
- diagnostics render for `no matches` and `all removed`
|
|
50
|
+
- file output mode writes expected CSV
|
|
51
|
+
|
|
52
|
+
## 6. Build and validate gem package
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
gem build csvops.gemspec
|
|
56
|
+
gem install ./csvops-0.4.0.alpha.gem
|
|
57
|
+
csvtool menu
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## 7. Commit release prep
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
git add -A
|
|
64
|
+
git commit -m "chore(release): prepare v0.4.0-alpha"
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## 8. Tag release
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
git tag -a v0.4.0-alpha -m "v0.4.0-alpha"
|
|
71
|
+
git push origin main --tags
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## 9. Publish gem (optional for alpha)
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
gem push csvops-0.4.0.alpha.gem
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## 10. Create GitHub release
|
|
81
|
+
|
|
82
|
+
Create release `v0.4.0-alpha` with:
|
|
83
|
+
- Cross-CSV dedupe workflow with normalization options and large-file streaming behavior
|
|
84
|
+
- Dedupe domain model (`CrossCsvDedupeSession`) with stronger invariants
|
|
85
|
+
- Shared-kernel `OutputDestination` value object across workflows
|
|
86
|
+
- Architecture/docs split (`README` + `docs/architecture.md`) with UML diagrams
|
|
87
|
+
- Dedupe boundary cleanup: CLI workflow (`RunCrossCsvDedupeWorkflow`) and application use-case separation
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# Release Checklist: v0.5.0-alpha
|
|
2
|
+
|
|
3
|
+
## 1. Verify environment
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
ruby -v
|
|
7
|
+
bundle -v
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
Expected:
|
|
11
|
+
- Ruby `3.3.x`
|
|
12
|
+
|
|
13
|
+
## 2. Install dependencies
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
bundle install
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## 3. Run quality checks
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
bundle exec rake test
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## 4. Smoke test CLI commands
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
bundle exec csvtool menu
|
|
29
|
+
bundle exec csvtool column test/fixtures/sample_people.csv name
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## 5. Smoke test workflows
|
|
33
|
+
|
|
34
|
+
### Row extraction workflow
|
|
35
|
+
|
|
36
|
+
Use menu option `2` (`Extract rows (range)`) and verify:
|
|
37
|
+
- headered CSV rows print correctly in console mode
|
|
38
|
+
- out-of-bounds row range shows friendly message
|
|
39
|
+
- file output mode writes expected CSV rows
|
|
40
|
+
|
|
41
|
+
### Row randomization workflow
|
|
42
|
+
|
|
43
|
+
Use menu option `3` (`Randomize rows`) and verify:
|
|
44
|
+
- seeded mode is reproducible
|
|
45
|
+
- headered and headerless modes both work
|
|
46
|
+
- file output path writes valid randomized CSV
|
|
47
|
+
|
|
48
|
+
### Cross-CSV dedupe workflow
|
|
49
|
+
|
|
50
|
+
Use menu option `4` (`Dedupe using another CSV`) and verify:
|
|
51
|
+
- expected retained rows for headered source/reference files
|
|
52
|
+
- separator/header-mode combinations still work
|
|
53
|
+
- file output mode writes expected deduped CSV
|
|
54
|
+
|
|
55
|
+
## 6. Build and validate gem package
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
gem build csvops.gemspec
|
|
59
|
+
gem install ./csvops-0.5.0.alpha.gem
|
|
60
|
+
csvtool menu
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## 7. Commit release prep
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
git add -A
|
|
67
|
+
git commit -m "chore(release): prepare v0.5.0-alpha"
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## 8. Tag release
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
git tag -a v0.5.0-alpha -m "v0.5.0-alpha"
|
|
74
|
+
git push origin main --tags
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## 9. Publish gem (optional for alpha)
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
gem push csvops-0.5.0.alpha.gem
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## 10. Create GitHub release
|
|
84
|
+
|
|
85
|
+
Create release `v0.5.0-alpha` with:
|
|
86
|
+
- Use-case file-write boundary cleanup across all workflows
|
|
87
|
+
- New infrastructure file-writer adapters for row randomization and cross-CSV dedupe
|
|
88
|
+
- Final architecture boundary audit with guard test for direct write APIs in use cases
|
|
89
|
+
- Updated architecture diagrams to reflect current writer adapter dependencies
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
require "csvtool/infrastructure/csv/header_reader"
|
|
5
|
+
require "csvtool/infrastructure/csv/cross_csv_deduper"
|
|
6
|
+
require "csvtool/infrastructure/csv/selector_validator"
|
|
7
|
+
require "csvtool/infrastructure/output/csv_cross_csv_dedupe_file_writer"
|
|
8
|
+
|
|
9
|
+
module Csvtool
|
|
10
|
+
module Application
|
|
11
|
+
module UseCases
|
|
12
|
+
class RunCrossCsvDedupe
|
|
13
|
+
Result = Struct.new(:ok, :error, :data, keyword_init: true) do
|
|
14
|
+
def ok?
|
|
15
|
+
ok
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def initialize(
|
|
20
|
+
header_reader: Infrastructure::CSV::HeaderReader.new,
|
|
21
|
+
deduper: Infrastructure::CSV::CrossCsvDeduper.new,
|
|
22
|
+
selector_validator: Infrastructure::CSV::SelectorValidator.new(header_reader: header_reader),
|
|
23
|
+
csv_cross_csv_dedupe_file_writer: nil
|
|
24
|
+
)
|
|
25
|
+
@header_reader = header_reader
|
|
26
|
+
@deduper = deduper
|
|
27
|
+
@selector_validator = selector_validator
|
|
28
|
+
@csv_cross_csv_dedupe_file_writer = csv_cross_csv_dedupe_file_writer || Infrastructure::Output::CsvCrossCsvDedupeFileWriter.new(
|
|
29
|
+
deduper: @deduper
|
|
30
|
+
)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def call(session:, on_header: nil, on_row: nil)
|
|
34
|
+
current_read_path = session.source.path
|
|
35
|
+
return failure(:column_not_found) unless @selector_validator.valid?(profile: session.source, selector: session.key_mapping.source_selector)
|
|
36
|
+
|
|
37
|
+
current_read_path = session.reference.path
|
|
38
|
+
return failure(:column_not_found) unless @selector_validator.valid?(profile: session.reference, selector: session.key_mapping.reference_selector)
|
|
39
|
+
|
|
40
|
+
source_headers = session.source.headers_present? ? @header_reader.call(file_path: session.source.path, col_sep: session.source.separator) : nil
|
|
41
|
+
current_read_path = session.source.path
|
|
42
|
+
|
|
43
|
+
if session.output_destination.file?
|
|
44
|
+
write_file(session: session, source_headers: source_headers)
|
|
45
|
+
else
|
|
46
|
+
on_header.call(source_headers) if on_header && source_headers
|
|
47
|
+
stats = @deduper.each_retained(**dedupe_options(session)) do |fields|
|
|
48
|
+
on_row.call(fields) if on_row
|
|
49
|
+
end
|
|
50
|
+
success(stats: stats)
|
|
51
|
+
end
|
|
52
|
+
rescue CSV::MalformedCSVError
|
|
53
|
+
failure(:could_not_parse_csv)
|
|
54
|
+
rescue Errno::EACCES, Errno::ENOENT => e
|
|
55
|
+
if session.output_destination.file?
|
|
56
|
+
failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class)
|
|
57
|
+
else
|
|
58
|
+
failure(:cannot_read_file, path: current_read_path || session.source.path)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def write_file(session:, source_headers:)
|
|
65
|
+
stats = @csv_cross_csv_dedupe_file_writer.call(
|
|
66
|
+
path: session.output_destination.path,
|
|
67
|
+
headers: source_headers,
|
|
68
|
+
col_sep: session.source.separator,
|
|
69
|
+
dedupe_options: dedupe_options(session)
|
|
70
|
+
)
|
|
71
|
+
success(stats: stats, output_path: session.output_destination.path)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def dedupe_options(session)
|
|
75
|
+
{
|
|
76
|
+
source_path: session.source.path,
|
|
77
|
+
reference_path: session.reference.path,
|
|
78
|
+
source_selector: session.key_mapping.source_selector,
|
|
79
|
+
reference_selector: session.key_mapping.reference_selector,
|
|
80
|
+
source_col_sep: session.source.separator,
|
|
81
|
+
reference_col_sep: session.reference.separator,
|
|
82
|
+
match_options: session.match_options
|
|
83
|
+
}
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def success(data)
|
|
87
|
+
Result.new(ok: true, error: nil, data: data)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def failure(code, data = {})
|
|
91
|
+
Result.new(ok: false, error: code, data: data)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -1,64 +1,47 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "csv"
|
|
4
|
-
require "csvtool/interface/cli/errors/presenter"
|
|
5
|
-
require "csvtool/interface/cli/prompts/file_path_prompt"
|
|
6
|
-
require "csvtool/interface/cli/prompts/separator_prompt"
|
|
7
|
-
require "csvtool/interface/cli/prompts/column_selector_prompt"
|
|
8
|
-
require "csvtool/interface/cli/prompts/skip_blanks_prompt"
|
|
9
|
-
require "csvtool/interface/cli/prompts/confirm_prompt"
|
|
10
|
-
require "csvtool/interface/cli/prompts/output_destination_prompt"
|
|
11
4
|
require "csvtool/infrastructure/csv/header_reader"
|
|
12
5
|
require "csvtool/infrastructure/csv/value_streamer"
|
|
13
|
-
require "csvtool/services/preview_builder"
|
|
14
|
-
require "csvtool/infrastructure/output/console_writer"
|
|
15
6
|
require "csvtool/infrastructure/output/csv_file_writer"
|
|
16
|
-
require "csvtool/
|
|
17
|
-
require "csvtool/domain/column_session/csv_source"
|
|
18
|
-
require "csvtool/domain/column_session/column_selection"
|
|
19
|
-
require "csvtool/domain/column_session/extraction_options"
|
|
20
|
-
require "csvtool/domain/column_session/extraction_value"
|
|
21
|
-
require "csvtool/domain/column_session/preview"
|
|
22
|
-
require "csvtool/domain/column_session/output_destination"
|
|
23
|
-
require "csvtool/domain/column_session/column_session"
|
|
7
|
+
require "csvtool/services/preview_builder"
|
|
24
8
|
|
|
25
9
|
module Csvtool
|
|
26
10
|
module Application
|
|
27
11
|
module UseCases
|
|
28
12
|
class RunExtraction
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
@header_reader = Infrastructure::CSV::HeaderReader.new
|
|
34
|
-
@value_streamer = Infrastructure::CSV::ValueStreamer.new
|
|
35
|
-
@preview_builder = Services::PreviewBuilder.new(value_streamer: @value_streamer)
|
|
13
|
+
Result = Struct.new(:ok, :error, :data, keyword_init: true) do
|
|
14
|
+
def ok?
|
|
15
|
+
ok
|
|
16
|
+
end
|
|
36
17
|
end
|
|
37
18
|
|
|
38
|
-
def
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
19
|
+
def initialize(
|
|
20
|
+
header_reader: Infrastructure::CSV::HeaderReader.new,
|
|
21
|
+
value_streamer: Infrastructure::CSV::ValueStreamer.new,
|
|
22
|
+
preview_builder: nil,
|
|
23
|
+
csv_file_writer: nil
|
|
24
|
+
)
|
|
25
|
+
@header_reader = header_reader
|
|
26
|
+
@value_streamer = value_streamer
|
|
27
|
+
@preview_builder = preview_builder || Services::PreviewBuilder.new(value_streamer: value_streamer)
|
|
28
|
+
@csv_file_writer = csv_file_writer || Infrastructure::Output::CsvFileWriter.new(value_streamer: @value_streamer)
|
|
29
|
+
end
|
|
45
30
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
return @errors.no_headers if headers.empty?
|
|
31
|
+
def read_headers(file_path:, col_sep:)
|
|
32
|
+
return failure(:file_not_found, path: file_path) unless File.file?(file_path)
|
|
49
33
|
|
|
50
|
-
|
|
51
|
-
return if
|
|
52
|
-
column_selection = Domain::ColumnSession::ColumnSelection.new(name: column_name)
|
|
34
|
+
headers = @header_reader.call(file_path: file_path, col_sep: col_sep)
|
|
35
|
+
return failure(:no_headers) if headers.empty?
|
|
53
36
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
)
|
|
37
|
+
success(headers: headers)
|
|
38
|
+
rescue CSV::MalformedCSVError
|
|
39
|
+
failure(:could_not_parse_csv)
|
|
40
|
+
rescue Errno::EACCES
|
|
41
|
+
failure(:cannot_read_file, path: file_path)
|
|
42
|
+
end
|
|
61
43
|
|
|
44
|
+
def preview(session:)
|
|
62
45
|
preview_values = @preview_builder.call(
|
|
63
46
|
file_path: session.source.path,
|
|
64
47
|
column_name: session.column_selection.name,
|
|
@@ -66,58 +49,50 @@ module Csvtool
|
|
|
66
49
|
skip_blanks: session.options.skip_blanks?,
|
|
67
50
|
limit: session.options.preview_limit
|
|
68
51
|
)
|
|
69
|
-
|
|
70
|
-
values: preview_values.map { |value| Domain::ColumnSession::ExtractionValue.new(value) }
|
|
71
|
-
)
|
|
72
|
-
session = session.with_preview(preview)
|
|
73
|
-
|
|
74
|
-
confirmed = Interface::CLI::Prompts::ConfirmPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors).call(session.preview.to_strings)
|
|
75
|
-
return unless confirmed
|
|
76
|
-
session = session.confirm!
|
|
77
|
-
|
|
78
|
-
output_destination = Interface::CLI::Prompts::OutputDestinationPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors).call
|
|
79
|
-
return if output_destination.nil?
|
|
80
|
-
domain_destination =
|
|
81
|
-
if output_destination[:mode] == :file
|
|
82
|
-
Domain::ColumnSession::OutputDestination.file(path: output_destination[:path])
|
|
83
|
-
else
|
|
84
|
-
Domain::ColumnSession::OutputDestination.console
|
|
85
|
-
end
|
|
86
|
-
session = session.with_output_destination(domain_destination)
|
|
87
|
-
|
|
88
|
-
write_output(
|
|
89
|
-
session.output_destination,
|
|
90
|
-
file_path: session.source.path,
|
|
91
|
-
column_name: session.column_selection.name,
|
|
92
|
-
col_sep: session.source.separator.value,
|
|
93
|
-
skip_blanks: session.options.skip_blanks?
|
|
94
|
-
)
|
|
52
|
+
success(preview_values: preview_values)
|
|
95
53
|
rescue CSV::MalformedCSVError
|
|
96
|
-
|
|
54
|
+
failure(:could_not_parse_csv)
|
|
97
55
|
rescue Errno::EACCES
|
|
98
|
-
|
|
56
|
+
failure(:cannot_read_file, path: session.source.path)
|
|
99
57
|
end
|
|
100
58
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
59
|
+
def extract(session:, on_value: nil)
|
|
60
|
+
if session.output_destination.file?
|
|
61
|
+
@csv_file_writer.call(
|
|
62
|
+
output_path: session.output_destination.path,
|
|
63
|
+
file_path: session.source.path,
|
|
64
|
+
column_name: session.column_selection.name,
|
|
65
|
+
col_sep: session.source.separator.value,
|
|
66
|
+
skip_blanks: session.options.skip_blanks?
|
|
67
|
+
)
|
|
68
|
+
success(output_path: session.output_destination.path)
|
|
106
69
|
else
|
|
107
|
-
|
|
70
|
+
@value_streamer.each(
|
|
71
|
+
file_path: session.source.path,
|
|
72
|
+
column_name: session.column_selection.name,
|
|
73
|
+
col_sep: session.source.separator.value,
|
|
74
|
+
skip_blanks: session.options.skip_blanks?
|
|
75
|
+
) { |value| on_value.call(value) if on_value }
|
|
76
|
+
success({})
|
|
108
77
|
end
|
|
78
|
+
rescue CSV::MalformedCSVError
|
|
79
|
+
failure(:could_not_parse_csv)
|
|
80
|
+
rescue Errno::EACCES, Errno::ENOENT => e
|
|
81
|
+
if session.output_destination.file?
|
|
82
|
+
failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class)
|
|
83
|
+
else
|
|
84
|
+
failure(:cannot_read_file, path: session.source.path)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
private
|
|
89
|
+
|
|
90
|
+
def success(data)
|
|
91
|
+
Result.new(ok: true, error: nil, data: data)
|
|
109
92
|
end
|
|
110
93
|
|
|
111
|
-
def
|
|
112
|
-
|
|
113
|
-
args = {
|
|
114
|
-
file_path: file_path,
|
|
115
|
-
column_name: column_name,
|
|
116
|
-
col_sep: col_sep,
|
|
117
|
-
skip_blanks: skip_blanks
|
|
118
|
-
}
|
|
119
|
-
args[:output_path] = output_destination.path if output_destination.file?
|
|
120
|
-
writer.call(**args)
|
|
94
|
+
def failure(code, data = {})
|
|
95
|
+
Result.new(ok: false, error: code, data: data)
|
|
121
96
|
end
|
|
122
97
|
end
|
|
123
98
|
end
|
|
@@ -1,74 +1,46 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "csv"
|
|
4
|
-
require "csvtool/interface/cli/errors/presenter"
|
|
5
|
-
require "csvtool/interface/cli/prompts/file_path_prompt"
|
|
6
|
-
require "csvtool/interface/cli/prompts/separator_prompt"
|
|
7
|
-
require "csvtool/interface/cli/prompts/output_destination_prompt"
|
|
8
4
|
require "csvtool/infrastructure/csv/header_reader"
|
|
9
5
|
require "csvtool/infrastructure/csv/row_streamer"
|
|
10
|
-
require "csvtool/infrastructure/output/csv_row_console_writer"
|
|
11
6
|
require "csvtool/infrastructure/output/csv_row_file_writer"
|
|
12
|
-
require "csvtool/domain/row_session/row_range"
|
|
13
|
-
require "csvtool/domain/row_session/row_source"
|
|
14
|
-
require "csvtool/domain/row_session/row_output_destination"
|
|
15
|
-
require "csvtool/domain/row_session/row_session"
|
|
16
7
|
|
|
17
8
|
module Csvtool
|
|
18
9
|
module Application
|
|
19
10
|
module UseCases
|
|
20
11
|
class RunRowExtraction
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
@header_reader = Infrastructure::CSV::HeaderReader.new
|
|
26
|
-
@row_streamer = Infrastructure::CSV::RowStreamer.new
|
|
12
|
+
Result = Struct.new(:ok, :error, :data, keyword_init: true) do
|
|
13
|
+
def ok?
|
|
14
|
+
ok
|
|
15
|
+
end
|
|
27
16
|
end
|
|
28
17
|
|
|
29
|
-
def
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
start_row_input = @stdin.gets&.strip.to_s
|
|
39
|
-
@stdout.print "End row (1-based, inclusive): "
|
|
40
|
-
end_row_input = @stdin.gets&.strip.to_s
|
|
18
|
+
def initialize(
|
|
19
|
+
header_reader: Infrastructure::CSV::HeaderReader.new,
|
|
20
|
+
row_streamer: Infrastructure::CSV::RowStreamer.new,
|
|
21
|
+
csv_row_file_writer: nil
|
|
22
|
+
)
|
|
23
|
+
@header_reader = header_reader
|
|
24
|
+
@row_streamer = row_streamer
|
|
25
|
+
@csv_row_file_writer = csv_row_file_writer || Infrastructure::Output::CsvRowFileWriter.new(row_streamer: @row_streamer)
|
|
26
|
+
end
|
|
41
27
|
|
|
42
|
-
|
|
43
|
-
return
|
|
28
|
+
def read_headers(file_path:, col_sep:)
|
|
29
|
+
return failure(:file_not_found, path: file_path) unless File.file?(file_path)
|
|
44
30
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
end_row_input: end_row_input
|
|
48
|
-
)
|
|
49
|
-
session = Domain::RowSession::RowSession.start(source: source, row_range: row_range)
|
|
31
|
+
headers = @header_reader.call(file_path: file_path, col_sep: col_sep)
|
|
32
|
+
return failure(:no_headers) if headers.empty?
|
|
50
33
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
destination =
|
|
58
|
-
if output_destination[:mode] == :file
|
|
59
|
-
Domain::RowSession::RowOutputDestination.file(path: output_destination[:path])
|
|
60
|
-
else
|
|
61
|
-
Domain::RowSession::RowOutputDestination.console
|
|
62
|
-
end
|
|
63
|
-
session = session.with_output_destination(destination)
|
|
34
|
+
success(headers: headers)
|
|
35
|
+
rescue CSV::MalformedCSVError
|
|
36
|
+
failure(:could_not_parse_csv)
|
|
37
|
+
rescue Errno::EACCES
|
|
38
|
+
failure(:cannot_read_file, path: file_path)
|
|
39
|
+
end
|
|
64
40
|
|
|
65
|
-
|
|
41
|
+
def extract(session:, headers:, on_row: nil)
|
|
66
42
|
if session.output_destination.file?
|
|
67
|
-
|
|
68
|
-
stdout: @stdout,
|
|
69
|
-
errors: @errors,
|
|
70
|
-
row_streamer: @row_streamer
|
|
71
|
-
).call(
|
|
43
|
+
stats = @csv_row_file_writer.call(
|
|
72
44
|
output_path: session.output_destination.path,
|
|
73
45
|
file_path: session.source.path,
|
|
74
46
|
col_sep: session.source.separator,
|
|
@@ -76,35 +48,35 @@ module Csvtool
|
|
|
76
48
|
start_row: session.row_range.start_row,
|
|
77
49
|
end_row: session.row_range.end_row
|
|
78
50
|
)
|
|
51
|
+
success(stats.merge(output_path: session.output_destination.path))
|
|
79
52
|
else
|
|
80
|
-
|
|
53
|
+
stats = @row_streamer.each_in_range(
|
|
81
54
|
file_path: session.source.path,
|
|
82
55
|
col_sep: session.source.separator,
|
|
83
|
-
headers: headers,
|
|
84
56
|
start_row: session.row_range.start_row,
|
|
85
57
|
end_row: session.row_range.end_row
|
|
86
|
-
)
|
|
58
|
+
) { |fields| on_row.call(fields) if on_row }
|
|
59
|
+
success(stats)
|
|
87
60
|
end
|
|
88
|
-
return if stats.nil?
|
|
89
|
-
|
|
90
|
-
@errors.row_range_out_of_bounds(stats[:row_count]) unless stats[:matched]
|
|
91
|
-
rescue Domain::RowSession::InvalidStartRowError
|
|
92
|
-
@errors.invalid_start_row
|
|
93
|
-
rescue Domain::RowSession::InvalidEndRowError
|
|
94
|
-
@errors.invalid_end_row
|
|
95
|
-
rescue Domain::RowSession::InvalidRowRangeOrderError
|
|
96
|
-
@errors.invalid_row_range_order
|
|
97
|
-
rescue ArgumentError => e
|
|
98
|
-
return @errors.empty_output_path if e.message == "file output path cannot be empty"
|
|
99
|
-
|
|
100
|
-
raise e
|
|
101
61
|
rescue CSV::MalformedCSVError
|
|
102
|
-
|
|
103
|
-
rescue Errno::EACCES
|
|
104
|
-
|
|
62
|
+
failure(:could_not_parse_csv)
|
|
63
|
+
rescue Errno::EACCES, Errno::ENOENT => e
|
|
64
|
+
if session.output_destination.file?
|
|
65
|
+
failure(:cannot_write_output_file, path: session.output_destination.path, error_class: e.class)
|
|
66
|
+
else
|
|
67
|
+
failure(:cannot_read_file, path: session.source.path)
|
|
68
|
+
end
|
|
105
69
|
end
|
|
106
|
-
|
|
70
|
+
|
|
107
71
|
private
|
|
72
|
+
|
|
73
|
+
def success(data)
|
|
74
|
+
Result.new(ok: true, error: nil, data: data)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def failure(code, data = {})
|
|
78
|
+
Result.new(ok: false, error: code, data: data)
|
|
79
|
+
end
|
|
108
80
|
end
|
|
109
81
|
end
|
|
110
82
|
end
|