eco-helpers 2.7.12 → 2.7.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,114 @@
1
+ module Eco
2
+ class CSV
3
+ class Split
4
+ include Eco::Language::AuxiliarLogger
5
+
6
+ attr_reader :filename
7
+
8
+ def initialize(filename, max_rows:, **kargs)
9
+ raise ArgumentError, "File '#{filename}' does not exist" unless ::File.exist?(filename)
10
+ @filename = filename
11
+ @max_rows = max_rows
12
+ @params = kargs
13
+ init
14
+ end
15
+
16
+ # @yield [idx, file] a block to spot the filename
17
+ # @yieldparam idx [Integer] the number of the file
18
+ # @yieldparam file [String] the default name of the file
19
+ # @yieldreturn [String] the filename of the file `idx`.
20
+ # - If `nil` it will create its own filename convention
21
+ # @return [Array<String>] names of the generated files
22
+ def call(&block)
23
+ stream.for_each do |row, ridx|
24
+ copy_row(row, ridx, &block)
25
+ end
26
+ out_files
27
+ ensure
28
+ puts "Close at row #{row_idx}"
29
+ @csv&.close
30
+ end
31
+
32
+ private
33
+
34
+ attr_reader :params
35
+ attr_reader :idx, :max_rows
36
+ attr_reader :headers, :row_idx
37
+
38
+ attr_accessor :exception
39
+
40
+ def copy_row(row, ridx, &block)
41
+ @headers ||= row.headers
42
+ @row_idx = ridx
43
+ current_csv(ridx, &block) << row.fields
44
+ end
45
+
46
+ def current_csv(ridx)
47
+ if split?(ridx) || @csv.nil?
48
+ puts "Split at row #{row_idx}"
49
+ @csv&.close
50
+ out_filename = generate_name(nidx = next_idx)
51
+ out_filename = yield(nidx, out_filename) if block_given?
52
+ @csv = ::CSV.open(out_filename, "w")
53
+ @csv << headers
54
+ out_files << out_filename
55
+ end
56
+ @csv
57
+ end
58
+
59
+ def split?(ridx)
60
+ ((ridx + 1) % max_rows).zero?
61
+ end
62
+
63
+ def next_idx
64
+ idx.tap { @idx += 1 }
65
+ end
66
+
67
+ def init
68
+ @idx ||= 0 # rubocop:disable Naming/MemoizedInstanceVariableName
69
+ end
70
+
71
+ def stream
72
+ @stream ||= Eco::CSV::Stream.new(filename, **params)
73
+ end
74
+
75
+ def generate_name(fidx)
76
+ File.join(input_dir, "#{input_name}_#{file_number(fidx)}#{input_ext}")
77
+ end
78
+
79
+ def file_number(num)
80
+ "#{zeroed}#{num}"[-5..]
81
+ end
82
+
83
+ def zeroed
84
+ "0" * 5
85
+ end
86
+
87
+ def out_files
88
+ @out_files ||= []
89
+ end
90
+
91
+ def input_name
92
+ @input_name ||= File.basename(input_basename, input_ext)
93
+ end
94
+
95
+ def input_ext
96
+ @input_ext ||= input_basename.split('.')[1..].join('.').then do |name|
97
+ ".#{name}"
98
+ end
99
+ end
100
+
101
+ def input_basename
102
+ @input_basename ||= File.basename(input_full_filename)
103
+ end
104
+
105
+ def input_dir
106
+ @input_dir = File.dirname(input_full_filename)
107
+ end
108
+
109
+ def input_full_filename
110
+ @input_full_filename ||= File.expand_path(filename)
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,66 @@
1
+ module Eco
2
+ class CSV
3
+ class Stream
4
+ include Eco::Language::AuxiliarLogger
5
+
6
+ attr_reader :filename
7
+
8
+ def initialize(filename, **kargs)
9
+ raise ArgumentError, "File '#{filename}' does not exist" unless ::File.exist?(filename)
10
+ @filename = filename
11
+ @params = {
12
+ headers: true,
13
+ skip_blanks: true
14
+ }.merge(kargs)
15
+ init
16
+ end
17
+
18
+ def for_each(start_at_idx: 0)
19
+ raise ArgumentError, 'Expecting block, but not given.' unless block_given?
20
+
21
+ move_to_idx(start_at_idx)
22
+
23
+ yield(row, next_idx) while (self.row = csv.shift)
24
+ rescue StandardError => err
25
+ self.exception = err
26
+ raise
27
+ ensure
28
+ (fd.close; @fd = nil) if fd.is_a?(::File) # rubocop:disable Style/Semicolon
29
+ if exception
30
+ # Give some feedback if it crashes
31
+ msg = []
32
+ msg << "Last row IDX: #{idx}"
33
+ msg << "Last row content: #{row.to_h.pretty_inspect}"
34
+ puts msg
35
+ log(:debug) { msg.join("\n") }
36
+ end
37
+ end
38
+
39
+ def move_to_idx(start_at_idx)
40
+ next_idx while (idx < start_at_idx) && (self.row = csv.shift)
41
+ end
42
+
43
+ private
44
+
45
+ attr_reader :params
46
+ attr_reader :idx, :fd
47
+
48
+ attr_accessor :row, :exception
49
+
50
+ def next_idx
51
+ idx.tap { @idx += 1 }
52
+ end
53
+
54
+ # see https://dalibornasevic.com/posts/68-processing-large-csv-files-with-ruby
55
+ def csv
56
+ return @csv if instance_variable_defined?(:@csv)
57
+ @fd = ::File.open(filename, 'r')
58
+ @csv = Eco::CSV.new(fd, **params)
59
+ end
60
+
61
+ def init
62
+ @idx ||= 0 # rubocop:disable Naming/MemoizedInstanceVariableName
63
+ end
64
+ end
65
+ end
66
+ end
data/lib/eco/csv.rb CHANGED
@@ -17,8 +17,22 @@ module Eco
17
17
  end
18
18
  parse(get_file_content(file, **params), **kargs)
19
19
  end
20
+
21
+ # @yield [idx, file] a block to spot the filename
22
+ # @yieldparam idx [Integer] the number of the file
23
+ # @yieldparam file [String] the default name of the file
24
+ # @yieldreturn [String] the filename of the file `idx`.
25
+ # - If `nil` it will create its own filename convention
26
+ # @param filename [String] the orignal file
27
+ # @param max_rows [Integer] number of rows per file
28
+ # @see Eco::CSV::Split#call
29
+ def split(filename, max_rows:, **kargs, &block)
30
+ Eco::CSV::Split.new(filename, max_rows: max_rows, **kargs).call(&block)
31
+ end
20
32
  end
21
33
  end
22
34
  end
23
35
 
24
36
  require_relative 'csv/table'
37
+ require_relative 'csv/stream'
38
+ require_relative 'csv/split'
@@ -19,12 +19,12 @@ module Eco
19
19
  attr_writer :timestamp
20
20
  attr_reader :level
21
21
 
22
- forward *LOG_LEVELS, *METHODS
22
+ forward(*LOG_LEVELS, *METHODS)
23
23
 
24
24
  def initialize(level: ::Logger::INFO, timestamp: false)
25
25
  @level = level
26
26
  self.timestamp = timestamp
27
- loggers[:console] = ::Logger.new(STDOUT).tap do |logger|
27
+ loggers[:console] = ::Logger.new($stdout).tap do |logger|
28
28
  logger.formatter = format_proc(console: true)
29
29
  logger.level = level
30
30
  end
@@ -41,7 +41,7 @@ module Eco
41
41
  private
42
42
 
43
43
  def forward(meth, *args, &block)
44
- loggers.each do |_key, logger|
44
+ loggers.each_value do |logger|
45
45
  logger.send(meth, *args, &block)
46
46
  end
47
47
  end
@@ -52,6 +52,7 @@ module Eco
52
52
 
53
53
  def console_timestamp(datetime)
54
54
  return nil unless timestamp?
55
+
55
56
  timestamp(datetime)
56
57
  end
57
58
 
@@ -62,10 +63,10 @@ module Eco
62
63
  end
63
64
 
64
65
  def format_proc(console: true, &block)
65
- proc do |severity, datetime, progname, msg|
66
- str_stamp = console ? console_timestamp(datetime) : timestamp(datetime)
66
+ proc do |severity, datetime, _progname, msg|
67
+ str_stamp = console ? console_timestamp(datetime) : timestamp(datetime)
67
68
  "#{severity.to_s[0]}: #{str_stamp}#{msg}\n".tap do |formatted_msg|
68
- block.call(severity, datetime, msg, formatted_msg) if block
69
+ block&.call(severity, datetime, msg, formatted_msg)
69
70
  end
70
71
  end
71
72
  end
data/lib/eco/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Eco
2
- VERSION = '2.7.12'.freeze
2
+ VERSION = '2.7.13'.freeze
3
3
  end