csvops 0.2.0.alpha → 0.3.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +39 -5
- data/docs/release-v0.3.0-alpha.md +74 -0
- data/lib/csvtool/application/use_cases/run_row_randomization.rb +105 -0
- data/lib/csvtool/cli.rb +5 -1
- data/lib/csvtool/domain/row_randomization_session/randomization_options.rb +17 -0
- data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +31 -0
- data/lib/csvtool/domain/row_randomization_session/randomization_session.rb +25 -0
- data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +23 -0
- data/lib/csvtool/infrastructure/csv/row_randomizer.rb +83 -0
- data/lib/csvtool/interface/cli/errors/presenter.rb +4 -0
- data/lib/csvtool/interface/cli/menu_loop.rb +5 -2
- data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +22 -0
- data/lib/csvtool/interface/cli/prompts/seed_prompt.rb +29 -0
- data/lib/csvtool/version.rb +1 -1
- data/test/csvtool/application/use_cases/run_row_randomization_test.rb +124 -0
- data/test/csvtool/cli_test.rb +117 -12
- data/test/csvtool/cli_unit_test.rb +14 -2
- data/test/csvtool/domain/row_randomization_session/randomization_options_test.rb +20 -0
- data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +21 -0
- data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +26 -0
- data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +28 -0
- data/test/csvtool/infrastructure/csv/row_randomizer_test.rb +37 -0
- data/test/csvtool/interface/cli/errors/presenter_test.rb +2 -0
- data/test/csvtool/interface/cli/menu_loop_test.rb +41 -10
- data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +14 -0
- data/test/csvtool/interface/cli/prompts/seed_prompt_test.rb +39 -0
- data/test/fixtures/sample_people_no_headers.csv +3 -0
- metadata +19 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9663c50901b31a8073c4a5a0524e9e30c81c20bbe1b736af71649e60a7150a0e
|
|
4
|
+
data.tar.gz: a622dad35eb52afeded279726d2575db0cd210c8ed4aa07650506bfd2e2b6de5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 28726bb66d05881caead074ce529d79db5424b85a7552f8b56cca44e891b3bb0c34cd850ff22351d6d93a5ef725e3891ccc8b7ac7e1e62d24d4e4c4d7d9b344d
|
|
7
|
+
data.tar.gz: 0fb96011d8737fb757b30e6226b9086453aa3ef6e5def1e95b77bb8bdbd414e25b72adfe6a8e65ee0498bbb88f2a809777a0d2405057689d00b3858c73014b93
|
data/README.md
CHANGED
|
@@ -127,7 +127,7 @@ bundle exec rake test
|
|
|
127
127
|
|
|
128
128
|
## Alpha release
|
|
129
129
|
|
|
130
|
-
Current prerelease version: `0.
|
|
130
|
+
Current prerelease version: `0.3.0.alpha`
|
|
131
131
|
|
|
132
132
|
Install prerelease from RubyGems:
|
|
133
133
|
|
|
@@ -137,21 +137,21 @@ gem install csvops --pre
|
|
|
137
137
|
|
|
138
138
|
Release runbook:
|
|
139
139
|
|
|
140
|
-
- `docs/release-v0.
|
|
140
|
+
- `docs/release-v0.3.0-alpha.md`
|
|
141
141
|
|
|
142
142
|
## Architecture
|
|
143
143
|
|
|
144
144
|
The codebase follows a DDD-lite layered structure:
|
|
145
145
|
|
|
146
|
-
- `domain/`: core domain models and invariants (`ColumnSession` and `
|
|
147
|
-
- `application/`: use-case orchestration (`RunExtraction`, `RunRowExtraction`).
|
|
146
|
+
- `domain/`: core domain models and invariants (`ColumnSession`, `RowSession`, and `RandomizationSession` aggregates + supporting entities/value objects).
|
|
147
|
+
- `application/`: use-case orchestration (`RunExtraction`, `RunRowExtraction`, `RunRowRandomization`).
|
|
148
148
|
- `infrastructure/`: CSV reading/streaming and output adapters (console/file).
|
|
149
149
|
- `interface/cli/`: menu, prompts, and user-facing error presentation.
|
|
150
150
|
- `Csvtool::CLI`: entrypoint wiring from command args to interface/application flow.
|
|
151
151
|
|
|
152
152
|
## Domain model
|
|
153
153
|
|
|
154
|
-
Bounded contexts: `Column Extraction` and `Row
|
|
154
|
+
Bounded contexts: `Column Extraction`, `Row Extraction`, and `Row Randomization`.
|
|
155
155
|
|
|
156
156
|
### Column Extraction
|
|
157
157
|
|
|
@@ -228,6 +228,38 @@ flowchart LR
|
|
|
228
228
|
APP2 --> INFOUT2["Infrastructure Output\nCsvRowConsoleWriter + CsvRowFileWriter"]
|
|
229
229
|
```
|
|
230
230
|
|
|
231
|
+
### Row Randomization
|
|
232
|
+
|
|
233
|
+
Core DDD structure:
|
|
234
|
+
|
|
235
|
+
- Aggregate root: `RandomizationSession`
|
|
236
|
+
- Captures one randomization request from source + options + output destination.
|
|
237
|
+
- Entity:
|
|
238
|
+
- `RandomizationSource` (file path + separator + header mode)
|
|
239
|
+
- Value objects:
|
|
240
|
+
- `RandomizationOptions` (optional deterministic `seed`)
|
|
241
|
+
- `RandomizationOutputDestination` (`console` or `file(path)`)
|
|
242
|
+
- Application service:
|
|
243
|
+
- `Application::UseCases::RunRowRandomization` orchestrates row randomization.
|
|
244
|
+
- Infrastructure adapters:
|
|
245
|
+
- `Infrastructure::CSV::HeaderReader`
|
|
246
|
+
- `Infrastructure::CSV::RowRandomizer` (external chunked `RAND + sort` + merge)
|
|
247
|
+
- Interface adapters:
|
|
248
|
+
- `Interface::CLI::MenuLoop`
|
|
249
|
+
- `Interface::CLI::Prompts::*`
|
|
250
|
+
- `Interface::CLI::Errors::Presenter`
|
|
251
|
+
|
|
252
|
+
```mermaid
|
|
253
|
+
flowchart LR
|
|
254
|
+
UI3["Interface CLI\n(Menu + Prompts + Errors)"] --> APP3["Application Use Case\nRunRowRandomization"]
|
|
255
|
+
APP3 --> AGG3["Domain Aggregate\nRandomizationSession"]
|
|
256
|
+
|
|
257
|
+
AGG3 --> E4["Entity\nRandomizationSource"]
|
|
258
|
+
AGG3 --> V3["Value Objects\nRandomizationOptions / RandomizationOutputDestination"]
|
|
259
|
+
|
|
260
|
+
APP3 --> INFCSV3["Infrastructure CSV\nHeaderReader + RowRandomizer"]
|
|
261
|
+
```
|
|
262
|
+
|
|
231
263
|
## Project layout
|
|
232
264
|
|
|
233
265
|
```text
|
|
@@ -235,8 +267,10 @@ bin/tool # CLI entrypoint
|
|
|
235
267
|
lib/csvtool/cli.rb
|
|
236
268
|
lib/csvtool/domain/column_session/*
|
|
237
269
|
lib/csvtool/domain/row_session/*
|
|
270
|
+
lib/csvtool/domain/row_randomization_session/*
|
|
238
271
|
lib/csvtool/application/use_cases/run_extraction.rb
|
|
239
272
|
lib/csvtool/application/use_cases/run_row_extraction.rb
|
|
273
|
+
lib/csvtool/application/use_cases/run_row_randomization.rb
|
|
240
274
|
lib/csvtool/infrastructure/csv/*
|
|
241
275
|
lib/csvtool/infrastructure/output/*
|
|
242
276
|
lib/csvtool/interface/cli/menu_loop.rb
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Release Checklist: v0.3.0-alpha
|
|
2
|
+
|
|
3
|
+
## 1. Verify environment
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
ruby -v
|
|
7
|
+
bundle -v
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
Expected:
|
|
11
|
+
- Ruby `3.3.0`
|
|
12
|
+
|
|
13
|
+
## 2. Install dependencies
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
bundle install
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## 3. Run quality checks
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
bundle exec rake test
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## 4. Smoke test CLI commands
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
bundle exec csvtool menu
|
|
29
|
+
bundle exec csvtool column test/fixtures/sample_people.csv name
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## 5. Smoke test row randomization workflow
|
|
33
|
+
|
|
34
|
+
Use menu option `3` (`Randomize rows`) and verify:
|
|
35
|
+
- headered CSV output keeps header in first row
|
|
36
|
+
- seeded mode is reproducible
|
|
37
|
+
- file output path writes valid CSV
|
|
38
|
+
- headerless mode randomizes all rows
|
|
39
|
+
|
|
40
|
+
## 6. Build and validate gem package
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
gem build csvops.gemspec
|
|
44
|
+
gem install ./csvops-0.3.0.alpha.gem
|
|
45
|
+
csvtool menu
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## 7. Commit release prep
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
git add -A
|
|
52
|
+
git commit -m "chore(release): prepare v0.3.0-alpha"
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## 8. Tag release
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
git tag -a v0.3.0-alpha -m "v0.3.0-alpha"
|
|
59
|
+
git push origin main --tags
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## 9. Publish gem (optional for alpha)
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
gem push csvops-0.3.0.alpha.gem
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## 10. Create GitHub release
|
|
69
|
+
|
|
70
|
+
Create release `v0.3.0-alpha` with:
|
|
71
|
+
- Randomize rows workflow support
|
|
72
|
+
- Seeded deterministic randomization
|
|
73
|
+
- External chunked randomization strategy for large files
|
|
74
|
+
- Updated domain model (`RowRandomizationSession`)
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
require "csvtool/interface/cli/errors/presenter"
|
|
5
|
+
require "csvtool/interface/cli/prompts/file_path_prompt"
|
|
6
|
+
require "csvtool/interface/cli/prompts/separator_prompt"
|
|
7
|
+
require "csvtool/interface/cli/prompts/headers_present_prompt"
|
|
8
|
+
require "csvtool/interface/cli/prompts/seed_prompt"
|
|
9
|
+
require "csvtool/interface/cli/prompts/output_destination_prompt"
|
|
10
|
+
require "csvtool/infrastructure/csv/header_reader"
|
|
11
|
+
require "csvtool/infrastructure/csv/row_randomizer"
|
|
12
|
+
require "csvtool/domain/row_randomization_session/randomization_source"
|
|
13
|
+
require "csvtool/domain/row_randomization_session/randomization_options"
|
|
14
|
+
require "csvtool/domain/row_randomization_session/randomization_output_destination"
|
|
15
|
+
require "csvtool/domain/row_randomization_session/randomization_session"
|
|
16
|
+
|
|
17
|
+
module Csvtool
|
|
18
|
+
module Application
|
|
19
|
+
module UseCases
|
|
20
|
+
class RunRowRandomization
|
|
21
|
+
def initialize(stdin:, stdout:)
|
|
22
|
+
@stdin = stdin
|
|
23
|
+
@stdout = stdout
|
|
24
|
+
@errors = Interface::CLI::Errors::Presenter.new(stdout: stdout)
|
|
25
|
+
@header_reader = Infrastructure::CSV::HeaderReader.new
|
|
26
|
+
@row_randomizer = Infrastructure::CSV::RowRandomizer.new
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def call
|
|
30
|
+
file_path = Interface::CLI::Prompts::FilePathPrompt.new(stdin: @stdin, stdout: @stdout).call
|
|
31
|
+
return @errors.file_not_found(file_path) unless File.file?(file_path)
|
|
32
|
+
|
|
33
|
+
col_sep = Interface::CLI::Prompts::SeparatorPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors).call
|
|
34
|
+
return if col_sep.nil?
|
|
35
|
+
|
|
36
|
+
headers_present = Interface::CLI::Prompts::HeadersPresentPrompt.new(stdin: @stdin, stdout: @stdout).call
|
|
37
|
+
source = Domain::RowRandomizationSession::RandomizationSource.new(
|
|
38
|
+
path: file_path,
|
|
39
|
+
separator: col_sep,
|
|
40
|
+
headers_present: headers_present
|
|
41
|
+
)
|
|
42
|
+
headers = source.headers_present? ? @header_reader.call(file_path: source.path, col_sep: source.separator) : nil
|
|
43
|
+
return @errors.no_headers if source.headers_present? && headers.empty?
|
|
44
|
+
|
|
45
|
+
seed = Interface::CLI::Prompts::SeedPrompt.new(stdin: @stdin, stdout: @stdout, errors: @errors).call
|
|
46
|
+
return if seed == Interface::CLI::Prompts::SeedPrompt::INVALID
|
|
47
|
+
options = Domain::RowRandomizationSession::RandomizationOptions.new(seed: seed)
|
|
48
|
+
session = Domain::RowRandomizationSession::RandomizationSession.start(source: source, options: options)
|
|
49
|
+
|
|
50
|
+
output_destination = Interface::CLI::Prompts::OutputDestinationPrompt.new(
|
|
51
|
+
stdin: @stdin,
|
|
52
|
+
stdout: @stdout,
|
|
53
|
+
errors: @errors
|
|
54
|
+
).call
|
|
55
|
+
return if output_destination.nil?
|
|
56
|
+
destination =
|
|
57
|
+
if output_destination[:mode] == :file
|
|
58
|
+
Domain::RowRandomizationSession::RandomizationOutputDestination.file(path: output_destination[:path])
|
|
59
|
+
else
|
|
60
|
+
Domain::RowRandomizationSession::RandomizationOutputDestination.console
|
|
61
|
+
end
|
|
62
|
+
session = session.with_output_destination(destination)
|
|
63
|
+
|
|
64
|
+
randomized_rows = @row_randomizer.each(
|
|
65
|
+
file_path: session.source.path,
|
|
66
|
+
col_sep: session.source.separator,
|
|
67
|
+
headers: session.source.headers_present?,
|
|
68
|
+
seed: session.options.seed
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if session.output_destination.file?
|
|
72
|
+
write_output_file(session.output_destination.path, headers, randomized_rows, col_sep: session.source.separator)
|
|
73
|
+
else
|
|
74
|
+
print_to_console(headers, randomized_rows, col_sep: session.source.separator)
|
|
75
|
+
end
|
|
76
|
+
rescue CSV::MalformedCSVError
|
|
77
|
+
@errors.could_not_parse_csv
|
|
78
|
+
rescue ArgumentError => e
|
|
79
|
+
return @errors.empty_output_path if e.message == "file output path cannot be empty"
|
|
80
|
+
|
|
81
|
+
raise e
|
|
82
|
+
rescue Errno::EACCES
|
|
83
|
+
@errors.cannot_read_file(file_path)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
private
|
|
87
|
+
|
|
88
|
+
def print_to_console(headers, rows, col_sep:)
|
|
89
|
+
@stdout.puts
|
|
90
|
+
@stdout.puts ::CSV.generate_line(headers, row_sep: "", col_sep: col_sep).chomp if headers
|
|
91
|
+
rows.each { |fields| @stdout.puts ::CSV.generate_line(fields, row_sep: "", col_sep: col_sep).chomp }
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def write_output_file(path, headers, rows, col_sep:)
|
|
95
|
+
::CSV.open(path, "w", write_headers: !headers.nil?, headers: headers, col_sep: col_sep) do |csv|
|
|
96
|
+
rows.each { |fields| csv << fields }
|
|
97
|
+
end
|
|
98
|
+
@stdout.puts "Wrote output to #{path}"
|
|
99
|
+
rescue Errno::EACCES, Errno::ENOENT => e
|
|
100
|
+
@errors.cannot_write_output_file(path, e.class)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
data/lib/csvtool/cli.rb
CHANGED
|
@@ -4,6 +4,7 @@ require "csv"
|
|
|
4
4
|
require "csvtool/interface/cli/menu_loop"
|
|
5
5
|
require "csvtool/application/use_cases/run_extraction"
|
|
6
6
|
require "csvtool/application/use_cases/run_row_extraction"
|
|
7
|
+
require "csvtool/application/use_cases/run_row_randomization"
|
|
7
8
|
require "csvtool/interface/cli/errors/presenter"
|
|
8
9
|
require "csvtool/infrastructure/csv/header_reader"
|
|
9
10
|
require "csvtool/infrastructure/csv/value_streamer"
|
|
@@ -14,6 +15,7 @@ module Csvtool
|
|
|
14
15
|
MENU_OPTIONS = [
|
|
15
16
|
"Extract column",
|
|
16
17
|
"Extract rows (range)",
|
|
18
|
+
"Randomize rows",
|
|
17
19
|
"Exit"
|
|
18
20
|
].freeze
|
|
19
21
|
|
|
@@ -45,12 +47,14 @@ module Csvtool
|
|
|
45
47
|
def run_menu_loop
|
|
46
48
|
extract_column_action = -> { Application::UseCases::RunExtraction.new(stdin: @stdin, stdout: @stdout).call }
|
|
47
49
|
extract_rows_action = -> { Application::UseCases::RunRowExtraction.new(stdin: @stdin, stdout: @stdout).call }
|
|
50
|
+
randomize_rows_action = -> { Application::UseCases::RunRowRandomization.new(stdin: @stdin, stdout: @stdout).call }
|
|
48
51
|
Interface::CLI::MenuLoop.new(
|
|
49
52
|
stdin: @stdin,
|
|
50
53
|
stdout: @stdout,
|
|
51
54
|
menu_options: MENU_OPTIONS,
|
|
52
55
|
extract_column_action: extract_column_action,
|
|
53
|
-
extract_rows_action: extract_rows_action
|
|
56
|
+
extract_rows_action: extract_rows_action,
|
|
57
|
+
randomize_rows_action: randomize_rows_action
|
|
54
58
|
).run
|
|
55
59
|
end
|
|
56
60
|
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module RowRandomizationSession
|
|
6
|
+
class RandomizationOptions
|
|
7
|
+
attr_reader :seed
|
|
8
|
+
|
|
9
|
+
def initialize(seed:)
|
|
10
|
+
raise ArgumentError, "seed must be an integer or nil" unless seed.nil? || seed.is_a?(Integer)
|
|
11
|
+
|
|
12
|
+
@seed = seed
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module RowRandomizationSession
|
|
6
|
+
class RandomizationOutputDestination
|
|
7
|
+
attr_reader :mode, :path
|
|
8
|
+
|
|
9
|
+
def self.console
|
|
10
|
+
new(mode: :console)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def self.file(path:)
|
|
14
|
+
new(mode: :file, path: path)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def initialize(mode:, path: nil)
|
|
18
|
+
raise ArgumentError, "invalid output mode" unless %i[console file].include?(mode)
|
|
19
|
+
raise ArgumentError, "file output path cannot be empty" if mode == :file && path.to_s.empty?
|
|
20
|
+
|
|
21
|
+
@mode = mode
|
|
22
|
+
@path = path
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def file?
|
|
26
|
+
@mode == :file
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module RowRandomizationSession
|
|
6
|
+
class RandomizationSession
|
|
7
|
+
attr_reader :source, :options, :output_destination
|
|
8
|
+
|
|
9
|
+
def self.start(source:, options:)
|
|
10
|
+
new(source: source, options: options)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def initialize(source:, options:, output_destination: nil)
|
|
14
|
+
@source = source
|
|
15
|
+
@options = options
|
|
16
|
+
@output_destination = output_destination
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def with_output_destination(destination)
|
|
20
|
+
self.class.new(source: @source, options: @options, output_destination: destination)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Domain
|
|
5
|
+
module RowRandomizationSession
|
|
6
|
+
class RandomizationSource
|
|
7
|
+
attr_reader :path, :separator
|
|
8
|
+
|
|
9
|
+
def initialize(path:, separator:, headers_present:)
|
|
10
|
+
raise ArgumentError, "separator cannot be empty" if separator.to_s.empty?
|
|
11
|
+
|
|
12
|
+
@path = path
|
|
13
|
+
@separator = separator
|
|
14
|
+
@headers_present = headers_present
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def headers_present?
|
|
18
|
+
@headers_present
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "csv"
|
|
4
|
+
require "tempfile"
|
|
5
|
+
|
|
6
|
+
module Csvtool
|
|
7
|
+
module Infrastructure
|
|
8
|
+
module CSV
|
|
9
|
+
class RowRandomizer
|
|
10
|
+
DEFAULT_CHUNK_SIZE = 10_000
|
|
11
|
+
|
|
12
|
+
def call(file_path:, col_sep:, headers:, seed: nil)
|
|
13
|
+
each(file_path: file_path, col_sep: col_sep, headers: headers, seed: seed).to_a
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def each(file_path:, col_sep:, headers:, seed: nil, chunk_size: DEFAULT_CHUNK_SIZE)
|
|
17
|
+
chunk_paths = []
|
|
18
|
+
return enum_for(:each, file_path: file_path, col_sep: col_sep, headers: headers, seed: seed, chunk_size: chunk_size) unless block_given?
|
|
19
|
+
|
|
20
|
+
rng = seed.nil? ? Random.new : Random.new(seed)
|
|
21
|
+
sequence = 0
|
|
22
|
+
chunk_entries = []
|
|
23
|
+
|
|
24
|
+
::CSV.foreach(file_path, headers: headers, col_sep: col_sep) do |row|
|
|
25
|
+
fields = headers ? row.fields : row
|
|
26
|
+
chunk_entries << [rng.rand, sequence, fields]
|
|
27
|
+
sequence += 1
|
|
28
|
+
flush_chunk(chunk_entries, chunk_paths) if chunk_entries.length >= chunk_size
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
flush_chunk(chunk_entries, chunk_paths) unless chunk_entries.empty?
|
|
32
|
+
merge_chunks(chunk_paths) { |fields| yield fields }
|
|
33
|
+
ensure
|
|
34
|
+
cleanup_chunks(chunk_paths)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
def flush_chunk(entries, chunk_paths)
|
|
40
|
+
entries.sort_by! { |rand_key, seq, _fields| [rand_key, seq] }
|
|
41
|
+
file = Tempfile.new("csvtool-row-randomizer-chunk")
|
|
42
|
+
file.binmode
|
|
43
|
+
entries.each { |entry| Marshal.dump(entry, file) }
|
|
44
|
+
file.close
|
|
45
|
+
chunk_paths << file.path
|
|
46
|
+
entries.clear
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def merge_chunks(chunk_paths)
|
|
50
|
+
readers = chunk_paths.map { |path| File.open(path, "rb") }
|
|
51
|
+
heads = readers.map { |reader| next_entry(reader) }
|
|
52
|
+
|
|
53
|
+
loop do
|
|
54
|
+
indexed = heads.each_with_index.select { |entry, _i| !entry.nil? }
|
|
55
|
+
break if indexed.empty?
|
|
56
|
+
|
|
57
|
+
min_entry, min_index = indexed.min_by { |entry, _i| [entry[0], entry[1]] }
|
|
58
|
+
yield min_entry[2]
|
|
59
|
+
heads[min_index] = next_entry(readers[min_index])
|
|
60
|
+
end
|
|
61
|
+
ensure
|
|
62
|
+
readers&.each(&:close)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def next_entry(reader)
|
|
66
|
+
Marshal.load(reader)
|
|
67
|
+
rescue EOFError
|
|
68
|
+
nil
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def cleanup_chunks(chunk_paths)
|
|
72
|
+
return if chunk_paths.nil?
|
|
73
|
+
|
|
74
|
+
chunk_paths.each do |path|
|
|
75
|
+
File.delete(path) if File.exist?(path)
|
|
76
|
+
rescue Errno::EACCES, Errno::ENOENT
|
|
77
|
+
nil
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -4,12 +4,13 @@ module Csvtool
|
|
|
4
4
|
module Interface
|
|
5
5
|
module CLI
|
|
6
6
|
class MenuLoop
|
|
7
|
-
def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:)
|
|
7
|
+
def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:)
|
|
8
8
|
@stdin = stdin
|
|
9
9
|
@stdout = stdout
|
|
10
10
|
@menu_options = menu_options
|
|
11
11
|
@extract_column_action = extract_column_action
|
|
12
12
|
@extract_rows_action = extract_rows_action
|
|
13
|
+
@randomize_rows_action = randomize_rows_action
|
|
13
14
|
end
|
|
14
15
|
|
|
15
16
|
def run
|
|
@@ -25,9 +26,11 @@ module Csvtool
|
|
|
25
26
|
when "2"
|
|
26
27
|
@extract_rows_action.call
|
|
27
28
|
when "3"
|
|
29
|
+
@randomize_rows_action.call
|
|
30
|
+
when "4"
|
|
28
31
|
return 0
|
|
29
32
|
else
|
|
30
|
-
@stdout.puts "Please choose 1, 2, or
|
|
33
|
+
@stdout.puts "Please choose 1, 2, 3, or 4."
|
|
31
34
|
end
|
|
32
35
|
end
|
|
33
36
|
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Prompts
|
|
7
|
+
class HeadersPresentPrompt
|
|
8
|
+
def initialize(stdin:, stdout:)
|
|
9
|
+
@stdin = stdin
|
|
10
|
+
@stdout = stdout
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call
|
|
14
|
+
@stdout.print "Headers present? [Y/n]: "
|
|
15
|
+
answer = @stdin.gets&.strip.to_s.downcase
|
|
16
|
+
!%w[n no].include?(answer)
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Csvtool
|
|
4
|
+
module Interface
|
|
5
|
+
module CLI
|
|
6
|
+
module Prompts
|
|
7
|
+
class SeedPrompt
|
|
8
|
+
INVALID = :invalid
|
|
9
|
+
|
|
10
|
+
def initialize(stdin:, stdout:, errors:)
|
|
11
|
+
@stdin = stdin
|
|
12
|
+
@stdout = stdout
|
|
13
|
+
@errors = errors
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def call
|
|
17
|
+
@stdout.print "Random seed (optional integer): "
|
|
18
|
+
raw = @stdin.gets&.strip.to_s
|
|
19
|
+
return nil if raw.empty?
|
|
20
|
+
return raw.to_i if /\A-?\d+\z/.match?(raw)
|
|
21
|
+
|
|
22
|
+
@errors.invalid_seed
|
|
23
|
+
INVALID
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
data/lib/csvtool/version.rb
CHANGED