csvops 0.1.0.alpha → 0.3.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +83 -10
  3. data/docs/release-v0.2.0-alpha.md +80 -0
  4. data/docs/release-v0.3.0-alpha.md +74 -0
  5. data/lib/csvtool/application/use_cases/run_extraction.rb +17 -17
  6. data/lib/csvtool/application/use_cases/run_row_extraction.rb +111 -0
  7. data/lib/csvtool/application/use_cases/run_row_randomization.rb +105 -0
  8. data/lib/csvtool/cli.rb +10 -2
  9. data/lib/csvtool/domain/{extraction_session → column_session}/column_selection.rb +1 -1
  10. data/lib/csvtool/domain/{extraction_session/extraction_session.rb → column_session/column_session.rb} +2 -2
  11. data/lib/csvtool/domain/{extraction_session → column_session}/csv_source.rb +1 -1
  12. data/lib/csvtool/domain/{extraction_session → column_session}/extraction_options.rb +1 -1
  13. data/lib/csvtool/domain/{extraction_session → column_session}/extraction_value.rb +1 -1
  14. data/lib/csvtool/domain/{extraction_session → column_session}/output_destination.rb +1 -1
  15. data/lib/csvtool/domain/{extraction_session → column_session}/preview.rb +1 -1
  16. data/lib/csvtool/domain/{extraction_session → column_session}/separator.rb +1 -1
  17. data/lib/csvtool/domain/row_randomization_session/randomization_options.rb +17 -0
  18. data/lib/csvtool/domain/row_randomization_session/randomization_output_destination.rb +31 -0
  19. data/lib/csvtool/domain/row_randomization_session/randomization_session.rb +25 -0
  20. data/lib/csvtool/domain/row_randomization_session/randomization_source.rb +23 -0
  21. data/lib/csvtool/domain/row_session/row_output_destination.rb +31 -0
  22. data/lib/csvtool/domain/row_session/row_range.rb +39 -0
  23. data/lib/csvtool/domain/row_session/row_session.rb +25 -0
  24. data/lib/csvtool/domain/row_session/row_source.rb +16 -0
  25. data/lib/csvtool/infrastructure/csv/row_randomizer.rb +83 -0
  26. data/lib/csvtool/infrastructure/csv/row_streamer.rb +27 -0
  27. data/lib/csvtool/infrastructure/output/csv_row_console_writer.rb +34 -0
  28. data/lib/csvtool/infrastructure/output/csv_row_file_writer.rb +45 -0
  29. data/lib/csvtool/interface/cli/errors/presenter.rb +20 -0
  30. data/lib/csvtool/interface/cli/menu_loop.rb +13 -5
  31. data/lib/csvtool/interface/cli/prompts/headers_present_prompt.rb +22 -0
  32. data/lib/csvtool/interface/cli/prompts/seed_prompt.rb +29 -0
  33. data/lib/csvtool/version.rb +1 -1
  34. data/test/csvtool/application/use_cases/run_row_extraction_test.rb +140 -0
  35. data/test/csvtool/application/use_cases/run_row_randomization_test.rb +124 -0
  36. data/test/csvtool/cli_test.rb +237 -6
  37. data/test/csvtool/cli_unit_test.rb +24 -1
  38. data/test/csvtool/domain/{extraction_session → column_session}/column_selection_test.rb +2 -2
  39. data/test/csvtool/domain/column_session/column_session_test.rb +35 -0
  40. data/test/csvtool/domain/column_session/csv_source_test.rb +14 -0
  41. data/test/csvtool/domain/{extraction_session → column_session}/extraction_options_test.rb +3 -3
  42. data/test/csvtool/domain/{extraction_session → column_session}/extraction_value_test.rb +2 -2
  43. data/test/csvtool/domain/{extraction_session → column_session}/output_destination_test.rb +3 -3
  44. data/test/csvtool/domain/column_session/preview_test.rb +18 -0
  45. data/test/csvtool/domain/{extraction_session → column_session}/separator_test.rb +3 -3
  46. data/test/csvtool/domain/row_randomization_session/randomization_options_test.rb +20 -0
  47. data/test/csvtool/domain/row_randomization_session/randomization_output_destination_test.rb +21 -0
  48. data/test/csvtool/domain/row_randomization_session/randomization_session_test.rb +26 -0
  49. data/test/csvtool/domain/row_randomization_session/randomization_source_test.rb +28 -0
  50. data/test/csvtool/domain/row_session/row_output_destination_test.rb +23 -0
  51. data/test/csvtool/domain/row_session/row_range_test.rb +30 -0
  52. data/test/csvtool/domain/row_session/row_session_test.rb +22 -0
  53. data/test/csvtool/domain/row_session/row_source_test.rb +12 -0
  54. data/test/csvtool/infrastructure/csv/row_randomizer_test.rb +37 -0
  55. data/test/csvtool/infrastructure/csv/row_streamer_test.rb +41 -0
  56. data/test/csvtool/infrastructure/output/csv_row_console_writer_test.rb +24 -0
  57. data/test/csvtool/infrastructure/output/csv_row_file_writer_test.rb +40 -0
  58. data/test/csvtool/interface/cli/errors/presenter_test.rb +10 -0
  59. data/test/csvtool/interface/cli/menu_loop_test.rb +68 -12
  60. data/test/csvtool/interface/cli/prompts/headers_present_prompt_test.rb +14 -0
  61. data/test/csvtool/interface/cli/prompts/seed_prompt_test.rb +39 -0
  62. data/test/fixtures/sample_people_bad_tail.csv +5 -0
  63. data/test/fixtures/sample_people_no_headers.csv +3 -0
  64. metadata +53 -17
  65. data/test/csvtool/domain/extraction_session/csv_source_test.rb +0 -14
  66. data/test/csvtool/domain/extraction_session/extraction_session_test.rb +0 -35
  67. data/test/csvtool/domain/extraction_session/preview_test.rb +0 -18
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Csvtool
4
4
  module Domain
5
- module ExtractionSession
5
+ module ColumnSession
6
6
  class ExtractionValue
7
7
  attr_reader :value
8
8
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Csvtool
4
4
  module Domain
5
- module ExtractionSession
5
+ module ColumnSession
6
6
  class OutputDestination
7
7
  attr_reader :mode, :path
8
8
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Csvtool
4
4
  module Domain
5
- module ExtractionSession
5
+ module ColumnSession
6
6
  class Preview
7
7
  attr_reader :values
8
8
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Csvtool
4
4
  module Domain
5
- module ExtractionSession
5
+ module ColumnSession
6
6
  class Separator
7
7
  attr_reader :value
8
8
 
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module RowRandomizationSession
6
+ class RandomizationOptions
7
+ attr_reader :seed
8
+
9
+ def initialize(seed:)
10
+ raise ArgumentError, "seed must be an integer or nil" unless seed.nil? || seed.is_a?(Integer)
11
+
12
+ @seed = seed
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module RowRandomizationSession
6
+ class RandomizationOutputDestination
7
+ attr_reader :mode, :path
8
+
9
+ def self.console
10
+ new(mode: :console)
11
+ end
12
+
13
+ def self.file(path:)
14
+ new(mode: :file, path: path)
15
+ end
16
+
17
+ def initialize(mode:, path: nil)
18
+ raise ArgumentError, "invalid output mode" unless %i[console file].include?(mode)
19
+ raise ArgumentError, "file output path cannot be empty" if mode == :file && path.to_s.empty?
20
+
21
+ @mode = mode
22
+ @path = path
23
+ end
24
+
25
+ def file?
26
+ @mode == :file
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module RowRandomizationSession
6
+ class RandomizationSession
7
+ attr_reader :source, :options, :output_destination
8
+
9
+ def self.start(source:, options:)
10
+ new(source: source, options: options)
11
+ end
12
+
13
+ def initialize(source:, options:, output_destination: nil)
14
+ @source = source
15
+ @options = options
16
+ @output_destination = output_destination
17
+ end
18
+
19
+ def with_output_destination(destination)
20
+ self.class.new(source: @source, options: @options, output_destination: destination)
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module RowRandomizationSession
6
+ class RandomizationSource
7
+ attr_reader :path, :separator
8
+
9
+ def initialize(path:, separator:, headers_present:)
10
+ raise ArgumentError, "separator cannot be empty" if separator.to_s.empty?
11
+
12
+ @path = path
13
+ @separator = separator
14
+ @headers_present = headers_present
15
+ end
16
+
17
+ def headers_present?
18
+ @headers_present
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module RowSession
6
+ class RowOutputDestination
7
+ attr_reader :mode, :path
8
+
9
+ def self.console
10
+ new(mode: :console)
11
+ end
12
+
13
+ def self.file(path:)
14
+ new(mode: :file, path: path)
15
+ end
16
+
17
+ def initialize(mode:, path: nil)
18
+ raise ArgumentError, "invalid output mode" unless %i[console file].include?(mode)
19
+ raise ArgumentError, "file output path cannot be empty" if mode == :file && path.to_s.empty?
20
+
21
+ @mode = mode
22
+ @path = path
23
+ end
24
+
25
+ def file?
26
+ @mode == :file
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module RowSession
6
+ class InvalidStartRowError < StandardError; end
7
+ class InvalidEndRowError < StandardError; end
8
+ class InvalidRowRangeOrderError < StandardError; end
9
+
10
+ class RowRange
11
+ attr_reader :start_row, :end_row
12
+
13
+ def self.from_inputs(start_row_input:, end_row_input:)
14
+ unless /\A[1-9]\d*\z/.match?(start_row_input.to_s)
15
+ raise InvalidStartRowError, "invalid start row"
16
+ end
17
+ unless /\A[1-9]\d*\z/.match?(end_row_input.to_s)
18
+ raise InvalidEndRowError, "invalid end row"
19
+ end
20
+
21
+ start_row = start_row_input.to_i
22
+ end_row = end_row_input.to_i
23
+ raise InvalidRowRangeOrderError, "end row before start row" if end_row < start_row
24
+
25
+ new(start_row: start_row, end_row: end_row)
26
+ end
27
+
28
+ def initialize(start_row:, end_row:)
29
+ raise InvalidStartRowError, "invalid start row" unless start_row.to_i >= 1
30
+ raise InvalidEndRowError, "invalid end row" unless end_row.to_i >= 1
31
+ raise InvalidRowRangeOrderError, "end row before start row" if end_row < start_row
32
+
33
+ @start_row = start_row
34
+ @end_row = end_row
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module RowSession
6
+ class RowSession
7
+ attr_reader :source, :row_range, :output_destination
8
+
9
+ def self.start(source:, row_range:)
10
+ new(source: source, row_range: row_range)
11
+ end
12
+
13
+ def initialize(source:, row_range:, output_destination: nil)
14
+ @source = source
15
+ @row_range = row_range
16
+ @output_destination = output_destination
17
+ end
18
+
19
+ def with_output_destination(destination)
20
+ self.class.new(source: @source, row_range: @row_range, output_destination: destination)
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Domain
5
+ module RowSession
6
+ class RowSource
7
+ attr_reader :path, :separator
8
+
9
+ def initialize(path:, separator:)
10
+ @path = path
11
+ @separator = separator
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+ require "tempfile"
5
+
6
+ module Csvtool
7
+ module Infrastructure
8
+ module CSV
9
+ class RowRandomizer
10
+ DEFAULT_CHUNK_SIZE = 10_000
11
+
12
+ def call(file_path:, col_sep:, headers:, seed: nil)
13
+ each(file_path: file_path, col_sep: col_sep, headers: headers, seed: seed).to_a
14
+ end
15
+
16
+ def each(file_path:, col_sep:, headers:, seed: nil, chunk_size: DEFAULT_CHUNK_SIZE)
17
+ chunk_paths = []
18
+ return enum_for(:each, file_path: file_path, col_sep: col_sep, headers: headers, seed: seed, chunk_size: chunk_size) unless block_given?
19
+
20
+ rng = seed.nil? ? Random.new : Random.new(seed)
21
+ sequence = 0
22
+ chunk_entries = []
23
+
24
+ ::CSV.foreach(file_path, headers: headers, col_sep: col_sep) do |row|
25
+ fields = headers ? row.fields : row
26
+ chunk_entries << [rng.rand, sequence, fields]
27
+ sequence += 1
28
+ flush_chunk(chunk_entries, chunk_paths) if chunk_entries.length >= chunk_size
29
+ end
30
+
31
+ flush_chunk(chunk_entries, chunk_paths) unless chunk_entries.empty?
32
+ merge_chunks(chunk_paths) { |fields| yield fields }
33
+ ensure
34
+ cleanup_chunks(chunk_paths)
35
+ end
36
+
37
+ private
38
+
39
+ def flush_chunk(entries, chunk_paths)
40
+ entries.sort_by! { |rand_key, seq, _fields| [rand_key, seq] }
41
+ file = Tempfile.new("csvtool-row-randomizer-chunk")
42
+ file.binmode
43
+ entries.each { |entry| Marshal.dump(entry, file) }
44
+ file.close
45
+ chunk_paths << file.path
46
+ entries.clear
47
+ end
48
+
49
+ def merge_chunks(chunk_paths)
50
+ readers = chunk_paths.map { |path| File.open(path, "rb") }
51
+ heads = readers.map { |reader| next_entry(reader) }
52
+
53
+ loop do
54
+ indexed = heads.each_with_index.select { |entry, _i| !entry.nil? }
55
+ break if indexed.empty?
56
+
57
+ min_entry, min_index = indexed.min_by { |entry, _i| [entry[0], entry[1]] }
58
+ yield min_entry[2]
59
+ heads[min_index] = next_entry(readers[min_index])
60
+ end
61
+ ensure
62
+ readers&.each(&:close)
63
+ end
64
+
65
+ def next_entry(reader)
66
+ Marshal.load(reader)
67
+ rescue EOFError
68
+ nil
69
+ end
70
+
71
+ def cleanup_chunks(chunk_paths)
72
+ return if chunk_paths.nil?
73
+
74
+ chunk_paths.each do |path|
75
+ File.delete(path) if File.exist?(path)
76
+ rescue Errno::EACCES, Errno::ENOENT
77
+ nil
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module CSV
8
+ class RowStreamer
9
+ def each_in_range(file_path:, col_sep:, start_row:, end_row:)
10
+ row_index = 0
11
+ matched = false
12
+
13
+ ::CSV.foreach(file_path, headers: true, col_sep: col_sep) do |row|
14
+ row_index += 1
15
+ next if row_index < start_row
16
+ break if row_index > end_row
17
+
18
+ matched = true
19
+ yield row.fields
20
+ end
21
+
22
+ { matched: matched, row_count: row_index }
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module Output
8
+ class CsvRowConsoleWriter
9
+ def initialize(stdout:, row_streamer:)
10
+ @stdout = stdout
11
+ @row_streamer = row_streamer
12
+ end
13
+
14
+ def call(file_path:, col_sep:, headers:, start_row:, end_row:)
15
+ wrote_header = false
16
+ stats = @row_streamer.each_in_range(
17
+ file_path: file_path,
18
+ col_sep: col_sep,
19
+ start_row: start_row,
20
+ end_row: end_row
21
+ ) do |fields|
22
+ unless wrote_header
23
+ @stdout.puts ::CSV.generate_line(headers, row_sep: "").chomp
24
+ wrote_header = true
25
+ end
26
+ @stdout.puts ::CSV.generate_line(fields, row_sep: "").chomp
27
+ end
28
+
29
+ stats
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Csvtool
6
+ module Infrastructure
7
+ module Output
8
+ class CsvRowFileWriter
9
+ def initialize(stdout:, errors:, row_streamer:)
10
+ @stdout = stdout
11
+ @errors = errors
12
+ @row_streamer = row_streamer
13
+ end
14
+
15
+ def call(file_path:, col_sep:, headers:, start_row:, end_row:, output_path:)
16
+ csv = nil
17
+ wrote_rows = false
18
+
19
+ stats = @row_streamer.each_in_range(
20
+ file_path: file_path,
21
+ col_sep: col_sep,
22
+ start_row: start_row,
23
+ end_row: end_row
24
+ ) do |fields|
25
+ unless wrote_rows
26
+ csv = ::CSV.open(output_path, "w")
27
+ csv << headers
28
+ wrote_rows = true
29
+ end
30
+ csv << fields
31
+ end
32
+
33
+ csv&.close
34
+ @stdout.puts "Wrote output to #{output_path}" if wrote_rows
35
+ stats
36
+ rescue Errno::EACCES, Errno::ENOENT => e
37
+ @errors.cannot_write_output_file(output_path, e.class)
38
+ nil
39
+ ensure
40
+ csv&.close unless csv&.closed?
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -49,9 +49,29 @@ module Csvtool
49
49
  @stdout.puts "Invalid separator choice."
50
50
  end
51
51
 
52
+ def invalid_seed
53
+ @stdout.puts "Seed must be an integer."
54
+ end
55
+
52
56
  def canceled
53
57
  @stdout.puts "Canceled."
54
58
  end
59
+
60
+ def invalid_start_row
61
+ @stdout.puts "Start row must be a positive integer."
62
+ end
63
+
64
+ def invalid_end_row
65
+ @stdout.puts "End row must be a positive integer."
66
+ end
67
+
68
+ def invalid_row_range_order
69
+ @stdout.puts "End row must be greater than or equal to start row."
70
+ end
71
+
72
+ def row_range_out_of_bounds(total_rows)
73
+ @stdout.puts "Row range is out of bounds. File has #{total_rows} data rows."
74
+ end
55
75
  end
56
76
  end
57
77
  end
@@ -4,25 +4,33 @@ module Csvtool
4
4
  module Interface
5
5
  module CLI
6
6
  class MenuLoop
7
- def initialize(stdin:, stdout:, menu_options:, extract_action:)
7
+ def initialize(stdin:, stdout:, menu_options:, extract_column_action:, extract_rows_action:, randomize_rows_action:)
8
8
  @stdin = stdin
9
9
  @stdout = stdout
10
10
  @menu_options = menu_options
11
- @extract_action = extract_action
11
+ @extract_column_action = extract_column_action
12
+ @extract_rows_action = extract_rows_action
13
+ @randomize_rows_action = randomize_rows_action
12
14
  end
13
15
 
14
16
  def run
15
17
  loop do
16
18
  print_menu
17
19
  @stdout.print "> "
20
+ choice = @stdin.gets
21
+ return 0 if choice.nil?
18
22
 
19
- case @stdin.gets&.strip
23
+ case choice.strip
20
24
  when "1"
21
- @extract_action.call
25
+ @extract_column_action.call
22
26
  when "2"
27
+ @extract_rows_action.call
28
+ when "3"
29
+ @randomize_rows_action.call
30
+ when "4"
23
31
  return 0
24
32
  else
25
- @stdout.puts "Please choose 1 or 2."
33
+ @stdout.puts "Please choose 1, 2, 3, or 4."
26
34
  end
27
35
  end
28
36
  end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Prompts
7
+ class HeadersPresentPrompt
8
+ def initialize(stdin:, stdout:)
9
+ @stdin = stdin
10
+ @stdout = stdout
11
+ end
12
+
13
+ def call
14
+ @stdout.print "Headers present? [Y/n]: "
15
+ answer = @stdin.gets&.strip.to_s.downcase
16
+ !%w[n no].include?(answer)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Csvtool
4
+ module Interface
5
+ module CLI
6
+ module Prompts
7
+ class SeedPrompt
8
+ INVALID = :invalid
9
+
10
+ def initialize(stdin:, stdout:, errors:)
11
+ @stdin = stdin
12
+ @stdout = stdout
13
+ @errors = errors
14
+ end
15
+
16
+ def call
17
+ @stdout.print "Random seed (optional integer): "
18
+ raw = @stdin.gets&.strip.to_s
19
+ return nil if raw.empty?
20
+ return raw.to_i if /\A-?\d+\z/.match?(raw)
21
+
22
+ @errors.invalid_seed
23
+ INVALID
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Csvtool
4
- VERSION = "0.1.0.alpha"
4
+ VERSION = "0.3.0.alpha"
5
5
  end