eco-helpers 2.7.12 → 2.7.13

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,114 @@
1
+ module Eco
2
+ class CSV
3
+ class Split
4
+ include Eco::Language::AuxiliarLogger
5
+
6
+ attr_reader :filename
7
+
8
+ def initialize(filename, max_rows:, **kargs)
9
+ raise ArgumentError, "File '#{filename}' does not exist" unless ::File.exist?(filename)
10
+ @filename = filename
11
+ @max_rows = max_rows
12
+ @params = kargs
13
+ init
14
+ end
15
+
16
+ # @yield [idx, file] a block to spot the filename
17
+ # @yieldparam idx [Integer] the number of the file
18
+ # @yieldparam file [String] the default name of the file
19
+ # @yieldreturn [String] the filename of the file `idx`.
20
+ # - If `nil` it will create its own filename convention
21
+ # @return [Array<String>] names of the generated files
22
+ def call(&block)
23
+ stream.for_each do |row, ridx|
24
+ copy_row(row, ridx, &block)
25
+ end
26
+ out_files
27
+ ensure
28
+ puts "Close at row #{row_idx}"
29
+ @csv&.close
30
+ end
31
+
32
+ private
33
+
34
+ attr_reader :params
35
+ attr_reader :idx, :max_rows
36
+ attr_reader :headers, :row_idx
37
+
38
+ attr_accessor :exception
39
+
40
+ def copy_row(row, ridx, &block)
41
+ @headers ||= row.headers
42
+ @row_idx = ridx
43
+ current_csv(ridx, &block) << row.fields
44
+ end
45
+
46
+ def current_csv(ridx)
47
+ if split?(ridx) || @csv.nil?
48
+ puts "Split at row #{row_idx}"
49
+ @csv&.close
50
+ out_filename = generate_name(nidx = next_idx)
51
+ out_filename = yield(nidx, out_filename) if block_given?
52
+ @csv = ::CSV.open(out_filename, "w")
53
+ @csv << headers
54
+ out_files << out_filename
55
+ end
56
+ @csv
57
+ end
58
+
59
+ def split?(ridx)
60
+ ((ridx + 1) % max_rows).zero?
61
+ end
62
+
63
+ def next_idx
64
+ idx.tap { @idx += 1 }
65
+ end
66
+
67
+ def init
68
+ @idx ||= 0 # rubocop:disable Naming/MemoizedInstanceVariableName
69
+ end
70
+
71
+ def stream
72
+ @stream ||= Eco::CSV::Stream.new(filename, **params)
73
+ end
74
+
75
+ def generate_name(fidx)
76
+ File.join(input_dir, "#{input_name}_#{file_number(fidx)}#{input_ext}")
77
+ end
78
+
79
+ def file_number(num)
80
+ "#{zeroed}#{num}"[-5..]
81
+ end
82
+
83
+ def zeroed
84
+ "0" * 5
85
+ end
86
+
87
+ def out_files
88
+ @out_files ||= []
89
+ end
90
+
91
+ def input_name
92
+ @input_name ||= File.basename(input_basename, input_ext)
93
+ end
94
+
95
+ def input_ext
96
+ @input_ext ||= input_basename.split('.')[1..].join('.').then do |name|
97
+ ".#{name}"
98
+ end
99
+ end
100
+
101
+ def input_basename
102
+ @input_basename ||= File.basename(input_full_filename)
103
+ end
104
+
105
+ def input_dir
106
+ @input_dir = File.dirname(input_full_filename)
107
+ end
108
+
109
+ def input_full_filename
110
+ @input_full_filename ||= File.expand_path(filename)
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,66 @@
1
+ module Eco
2
+ class CSV
3
+ class Stream
4
+ include Eco::Language::AuxiliarLogger
5
+
6
+ attr_reader :filename
7
+
8
+ def initialize(filename, **kargs)
9
+ raise ArgumentError, "File '#{filename}' does not exist" unless ::File.exist?(filename)
10
+ @filename = filename
11
+ @params = {
12
+ headers: true,
13
+ skip_blanks: true
14
+ }.merge(kargs)
15
+ init
16
+ end
17
+
18
+ def for_each(start_at_idx: 0)
19
+ raise ArgumentError, 'Expecting block, but not given.' unless block_given?
20
+
21
+ move_to_idx(start_at_idx)
22
+
23
+ yield(row, next_idx) while (self.row = csv.shift)
24
+ rescue StandardError => err
25
+ self.exception = err
26
+ raise
27
+ ensure
28
+ (fd.close; @fd = nil) if fd.is_a?(::File) # rubocop:disable Style/Semicolon
29
+ if exception
30
+ # Give some feedback if it crashes
31
+ msg = []
32
+ msg << "Last row IDX: #{idx}"
33
+ msg << "Last row content: #{row.to_h.pretty_inspect}"
34
+ puts msg
35
+ log(:debug) { msg.join("\n") }
36
+ end
37
+ end
38
+
39
+ def move_to_idx(start_at_idx)
40
+ next_idx while (idx < start_at_idx) && (self.row = csv.shift)
41
+ end
42
+
43
+ private
44
+
45
+ attr_reader :params
46
+ attr_reader :idx, :fd
47
+
48
+ attr_accessor :row, :exception
49
+
50
+ def next_idx
51
+ idx.tap { @idx += 1 }
52
+ end
53
+
54
+ # see https://dalibornasevic.com/posts/68-processing-large-csv-files-with-ruby
55
+ def csv
56
+ return @csv if instance_variable_defined?(:@csv)
57
+ @fd = ::File.open(filename, 'r')
58
+ @csv = Eco::CSV.new(fd, **params)
59
+ end
60
+
61
+ def init
62
+ @idx ||= 0 # rubocop:disable Naming/MemoizedInstanceVariableName
63
+ end
64
+ end
65
+ end
66
+ end
data/lib/eco/csv.rb CHANGED
@@ -17,8 +17,22 @@ module Eco
17
17
  end
18
18
  parse(get_file_content(file, **params), **kargs)
19
19
  end
20
+
21
+ # @yield [idx, file] a block to spot the filename
22
+ # @yieldparam idx [Integer] the number of the file
23
+ # @yieldparam file [String] the default name of the file
24
+ # @yieldreturn [String] the filename of the file `idx`.
25
+ # - If `nil` it will create its own filename convention
26
+ # @param filename [String] the orignal file
27
+ # @param max_rows [Integer] number of rows per file
28
+ # @see Eco::CSV::Split#call
29
+ def split(filename, max_rows:, **kargs, &block)
30
+ Eco::CSV::Split.new(filename, max_rows: max_rows, **kargs).call(&block)
31
+ end
20
32
  end
21
33
  end
22
34
  end
23
35
 
24
36
  require_relative 'csv/table'
37
+ require_relative 'csv/stream'
38
+ require_relative 'csv/split'
@@ -19,12 +19,12 @@ module Eco
19
19
  attr_writer :timestamp
20
20
  attr_reader :level
21
21
 
22
- forward *LOG_LEVELS, *METHODS
22
+ forward(*LOG_LEVELS, *METHODS)
23
23
 
24
24
  def initialize(level: ::Logger::INFO, timestamp: false)
25
25
  @level = level
26
26
  self.timestamp = timestamp
27
- loggers[:console] = ::Logger.new(STDOUT).tap do |logger|
27
+ loggers[:console] = ::Logger.new($stdout).tap do |logger|
28
28
  logger.formatter = format_proc(console: true)
29
29
  logger.level = level
30
30
  end
@@ -41,7 +41,7 @@ module Eco
41
41
  private
42
42
 
43
43
  def forward(meth, *args, &block)
44
- loggers.each do |_key, logger|
44
+ loggers.each_value do |logger|
45
45
  logger.send(meth, *args, &block)
46
46
  end
47
47
  end
@@ -52,6 +52,7 @@ module Eco
52
52
 
53
53
  def console_timestamp(datetime)
54
54
  return nil unless timestamp?
55
+
55
56
  timestamp(datetime)
56
57
  end
57
58
 
@@ -62,10 +63,10 @@ module Eco
62
63
  end
63
64
 
64
65
  def format_proc(console: true, &block)
65
- proc do |severity, datetime, progname, msg|
66
- str_stamp = console ? console_timestamp(datetime) : timestamp(datetime)
66
+ proc do |severity, datetime, _progname, msg|
67
+ str_stamp = console ? console_timestamp(datetime) : timestamp(datetime)
67
68
  "#{severity.to_s[0]}: #{str_stamp}#{msg}\n".tap do |formatted_msg|
68
- block.call(severity, datetime, msg, formatted_msg) if block
69
+ block&.call(severity, datetime, msg, formatted_msg)
69
70
  end
70
71
  end
71
72
  end
data/lib/eco/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Eco
2
- VERSION = '2.7.12'.freeze
2
+ VERSION = '2.7.13'.freeze
3
3
  end