eco-helpers 3.0.18 → 3.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,12 +2,9 @@ class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Lo
2
2
  attribute :xls
3
3
 
4
4
  attr_accessor :already_required
5
- attr_reader :file
6
5
 
7
- def parser(file, _deps)
8
- @file = file
9
- rows.tap do |rws|
10
- @file = nil
6
+ def parser(filename, _deps)
7
+ rows(file: filename).tap do |rws|
11
8
  rws.each do |row|
12
9
  to_string!(row)
13
10
  end
@@ -22,13 +19,14 @@ class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Lo
22
19
 
23
20
  def to_string!(row)
24
21
  row.transform_values! do |val|
25
- next nil unless val
22
+ next unless val
26
23
  next val if val.is_a?(String)
24
+
27
25
  val.to_s
28
26
  end
29
27
  end
30
28
 
31
- def headers
29
+ def expected_headers
32
30
  log(:warn) {
33
31
  "Headers detection is using your fields_map.json file (native behaviour)"
34
32
  }
@@ -39,30 +37,31 @@ class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Lo
39
37
  0
40
38
  end
41
39
 
42
- def workbook
40
+ def workbook(file)
43
41
  require_reading_libs!
44
42
  Roo::Spreadsheet.open(file)
45
43
  end
46
44
 
47
- def spreadheet(name_or_index = sheet_name)
48
- workbook.sheet(name_or_index)
45
+ def spreadheet(name_or_index = sheet_name, file:)
46
+ workbook(file).sheet(name_or_index)
49
47
  end
50
48
 
51
- def rows(target = headers)
52
- spreadheet.parse(header_search: target, clean: true)
49
+ def rows(target = expected_headers, file:)
50
+ spreadheet(file: file).parse(header_search: target, clean: true)
53
51
  rescue Roo::HeaderRowNotFoundError => e
54
52
  missing = JSON.parse(e.message)
55
53
 
56
54
  log(:warn) {
57
- "The input file is missing these headers: #{missing}"
55
+ "The input file is missing these expected headers: #{missing}"
58
56
  }
59
57
 
60
58
  present = target - missing
61
- rows(present)
59
+ rows(present, file: file)
62
60
  end
63
61
 
64
62
  def require_reading_libs!
65
63
  return if already_required
64
+
66
65
  require 'roo'
67
66
  require 'roo-xls'
68
67
  self.already_required = true
@@ -12,6 +12,7 @@ module Eco
12
12
  end
13
13
  end
14
14
 
15
+ require_relative 'default_parsers/helpers'
15
16
  require_relative 'default_parsers/select_parser'
16
17
  require_relative 'default_parsers/boolean_parser'
17
18
  require_relative 'default_parsers/numeric_parser'
@@ -22,4 +23,5 @@ require_relative 'default_parsers/freemium_parser'
22
23
  require_relative 'default_parsers/policy_groups_parser'
23
24
  require_relative 'default_parsers/login_providers_parser'
24
25
  require_relative 'default_parsers/csv_parser'
26
+ require_relative 'default_parsers/json_parser'
25
27
  require_relative 'default_parsers/xls_parser'
@@ -28,6 +28,7 @@ module Eco
28
28
  # to translate external names into internal ones and _vice versa_.
29
29
  def initialize(e, schema:, person_parser: nil, default_parser: nil, attr_map: nil)
30
30
  super(e)
31
+
31
32
  msg = "Constructor needs a PersonSchema. Given: #{schema.class}"
32
33
  fatal msg unless schema.is_a?(Ecoportal::API::V1::PersonSchema)
33
34
 
@@ -133,9 +134,10 @@ module Eco
133
134
  out.concat(curr)
134
135
  end
135
136
  end
136
- # Get content only when it's not :xls
137
+
138
+ # Get content only when it's not :xls, nor :json
137
139
  # note: even if content was provided, file takes precedence
138
- if (format != :xls) && file # rubocop:disable Style/IfUnlessModifier
140
+ if get_content?(format) && file # rubocop:disable Style/IfUnlessModifier
139
141
  content = get_file_content(file, encoding: encoding)
140
142
  end
141
143
 
@@ -166,8 +168,10 @@ module Eco
166
168
  end
167
169
  end.tap do |out_array|
168
170
  start_from_two = (format == :csv) || format == :xls
169
- out_array.each_with_index do |entry_hash, i|
170
- entry_hash["idx"] = start_from_two ? i + 2 : i + 1
171
+ first_idx = start_from_two ? 2 : 1
172
+
173
+ out_array.each.with_index(first_idx) do |entry_hash, idx|
174
+ entry_hash["idx"] = idx
171
175
  entry_hash["source_file"] = file
172
176
  end
173
177
  end
@@ -222,6 +226,13 @@ module Eco
222
226
 
223
227
  private
224
228
 
229
+ def get_content?(format)
230
+ return false if format == :xls
231
+ return false if format == :json
232
+
233
+ true
234
+ end
235
+
225
236
  def abort(message)
226
237
  log(:error) { message }
227
238
  exit(1)
@@ -6,14 +6,16 @@ module Eco
6
6
  module StatusHandling
7
7
  private
8
8
 
9
- def tap_status(enviro:, queue:, method:, status: nil, &block)
9
+ def tap_status(enviro:, queue:, method:, status: nil)
10
10
  status ||= Eco::API::Session::Batch::Status.new(
11
11
  enviro,
12
12
  queue: queue,
13
13
  method: method
14
14
  )
15
15
 
16
- status.tap(&block)
16
+ status.tap do
17
+ yield(status) if block_given?
18
+ end
17
19
  end
18
20
  end
19
21
  end
@@ -71,7 +71,7 @@ module Eco
71
71
  tap_status(status: status, enviro: enviro, queue: data, method: method) do |overall_status|
72
72
  pending_for_server_error = data.to_a[0..]
73
73
 
74
- batch_mode_on(*RETRY_ON, options: options, allow_job_mode: job_mode) do |job_mode, per_page|
74
+ batch_mode_on(*RETRY_ON, options: options, allow_job_mode: job_mode) do |as_job_mode, per_page|
75
75
  iteration = 0
76
76
  done = 0
77
77
  iterations = (data.length.to_f / per_page).ceil
@@ -79,7 +79,7 @@ module Eco
79
79
  start_time = Time.now
80
80
 
81
81
  data.each_slice(per_page) do |slice|
82
- iteration += 1
82
+ iteration += 1
83
83
 
84
84
  msg = "starting batch '#{method}' iteration #{iteration}/#{iterations}, "
85
85
  msg << "with #{slice.length} entries of #{data.length} -- #{done} done"
@@ -89,7 +89,7 @@ module Eco
89
89
  start_slice = Time.now
90
90
 
91
91
  offer_retry_on(*RETRY_ON, retries_left: TIMEOUT_RETRIES) do
92
- people_api.batch(job_mode: job_mode) do |batch|
92
+ people_api.batch(job_mode: as_job_mode) do |batch|
93
93
  slice.each do |person|
94
94
  batch.public_send(method, person) do |response|
95
95
  faltal("Request with no response") unless response
@@ -132,7 +132,9 @@ module Eco
132
132
  # If `schema` is `nil` or not provided it uses the currently associated to the `session`
133
133
  def entry_factory(schema: nil)
134
134
  schema = to_schema(schema) || self.schema
135
+
135
136
  return @entry_factories[schema&.id] if @entry_factories.key?(schema&.id)
137
+
136
138
  unless @entry_factories.empty?
137
139
  @entry_factories[schema&.id] = @entry_factories.values.first.newFactory(schema: schema)
138
140
  return @entry_factories[schema&.id]
@@ -0,0 +1,26 @@
1
+ class Eco::API::UseCases::Default::Utils::GroupCsv
2
+ class Cli < Eco::API::UseCases::Cli
3
+ str_desc = 'Groups the csv rows by a pivot field. '
4
+ str_desc << 'It assumes the sorting field is sorted '
5
+ str_desc << '(same values should be consecutive)'
6
+
7
+ desc str_desc
8
+
9
+ callback do |_session, options, _usecase|
10
+ if (file = SCR.get_file(cli_name, required: true, should_exist: true))
11
+ options.deep_merge!(input: {file: {name: file}})
12
+ end
13
+ end
14
+
15
+ add_option("-start-at", "Get only the last N-start_at rows") do |options|
16
+ count = SCR.get_arg("-start-at", with_param: true)
17
+ options.deep_merge!(input: {file: {start_at: count}})
18
+ end
19
+
20
+ add_option('-by', 'The column that should be used to group') do |options|
21
+ if (file = SCR.get_arg("-by", with_param: true))
22
+ options.deep_merge!(input: {group_by_field: file})
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,10 @@
1
+ class Eco::API::UseCases::Default::Utils::JsonToCsv
2
+ class Cli < Eco::API::UseCases::Cli
3
+ desc "Transforms an input JSON file into a CSV one."
4
+
5
+ callback do |_sess, options, _case|
6
+ file = SCR.get_file(cli_name, required: true, should_exist: true)
7
+ options.deep_merge!(source: {file: file})
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,17 @@
1
+ class Eco::API::UseCases::Default::Utils::SortCsv
2
+ class Cli < Eco::API::UseCases::Cli
3
+ desc 'Sorts the CSV by column -by'
4
+
5
+ callback do |_session, options, _usecase|
6
+ if (file = SCR.get_file(cli_name, required: true, should_exist: true))
7
+ options.deep_merge!(input: {file: file})
8
+ end
9
+ end
10
+
11
+ add_option('-by', 'The column that should be used to sorting') do |options|
12
+ if (file = SCR.get_arg("-by", with_param: true))
13
+ options.deep_merge!(input: {sort_by: file})
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ class Eco::API::UseCases::Default::Utils::SplitJson
2
+ class Cli < Eco::API::UseCases::Cli
3
+ desc 'Splits a json input file into multiple files'
4
+
5
+ callback do |_sess, options, _case|
6
+ file = SCR.get_file(cli_name, required: true, should_exist: true)
7
+ options.deep_merge!(source: {file: file})
8
+ end
9
+
10
+ add_option("-max-items", "The max count of items of the output files") do |options|
11
+ count = SCR.get_arg("-max-items", with_param: true)
12
+ options.deep_merge!(output: {file: {max_items: count}})
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,213 @@
1
+ # This script assumes that for the `GROUP_BY_FIELD` rows are consecutive.
2
+ # @note you might run first the `sort-csv` case.
3
+ # @note you must inherit from this case and define the constants.
4
+ #
5
+ # GROUP_BY_FIELD = 'target_csv_field'.freeze
6
+ # GROUPED_FIELDS = [
7
+ # 'joined_field_1',
8
+ # 'joined_field_2',
9
+ # 'joined_field_3',
10
+ # ].freeze
11
+ #
12
+ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
13
+ name 'group-csv'
14
+ type :other
15
+
16
+ require_relative 'cli/group_csv_cli'
17
+
18
+ def main(*_args)
19
+ if simulate?
20
+ count = Eco::CSV.count(input_file)
21
+ log(:info) { "CSV '#{input_file}' has #{count} rows." }
22
+ else
23
+ generate_file
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def generate_file # rubocop:disable Metrics/AbcSize
30
+ row_count = 0
31
+ in_index = nil
32
+
33
+ CSV.open(output_filename, 'wb') do |out_csv|
34
+ first = true
35
+
36
+ puts "\n"
37
+
38
+ streamed_input.for_each(start_at_idx: start_at) do |row, idx|
39
+ if first
40
+ first = false
41
+ headers!(row)
42
+ out_csv << headers
43
+ require_group_by_field!(row, file: input_file)
44
+ end
45
+
46
+ in_index = idx
47
+ next unless !block_given? || yield(row, idx)
48
+
49
+ next unless pivotable?(row, idx)
50
+ next unless (last_group = pivot_row(row))
51
+
52
+ row_count += 1
53
+
54
+ if (row_count % 500).zero?
55
+ print "... Done #{row_count} rows \r"
56
+ $stdout.flush
57
+ end
58
+
59
+ out_csv << last_group.values_at(*headers)
60
+ end
61
+
62
+ # finalize
63
+ if (lrow = pivot_row)
64
+ row_count += 1
65
+ out_csv << lrow.values_at(*headers)
66
+ end
67
+ ensure
68
+ msg = "Generated file '#{output_filename}' "
69
+ msg << "with #{row_count} rows (out of #{in_index})."
70
+
71
+ log(:info) { msg } unless simulate?
72
+ end
73
+ end
74
+
75
+ # It tracks the current grouped row
76
+ # @return [Nil, Hash] the last grouped row when `row` doesn't belong
77
+ # or `nil` otherwise
78
+ def pivot_row(row = nil)
79
+ @group ||= {}
80
+ return @group unless row
81
+
82
+ pivot_value = row[group_by_field]
83
+
84
+ unless (last_pivot = @group[group_by_field])
85
+ last_pivot = @group[group_by_field] = pivot_value
86
+ end
87
+
88
+ last = @group
89
+ @group = {group_by_field => pivot_value} unless pivot_value == last_pivot
90
+
91
+ headers_rest.each do |field|
92
+ curr_values = row[field].to_s.split('|').compact.uniq
93
+ pivot_values = @group[field].to_s.split('|').compact.uniq
94
+ @group[field] = (pivot_values | curr_values).join('|')
95
+ end
96
+
97
+ last unless last == @group
98
+ end
99
+
100
+ attr_reader :group
101
+ attr_reader :headers, :headers_rest
102
+
103
+ def headers!(row)
104
+ return if headers?
105
+
106
+ @headers_rest = grouped_fields & row.headers
107
+ @headers_rest -= [group_by_field]
108
+ @headers = [group_by_field, *headers_rest]
109
+ end
110
+
111
+ def headers?
112
+ instance_variable_defined?(:@headers)
113
+ end
114
+
115
+ def pivotable?(row, idx)
116
+ return true unless row[group_by_field].to_s.strip.empty?
117
+
118
+ msg = "Row #{idx} doesn't have value for pivot field '#{group_by_field}'"
119
+ msg << ". Skipping (discared) ..."
120
+ log(:warn) { msg }
121
+ false
122
+ end
123
+
124
+ def streamed_input
125
+ @streamed_input ||= Eco::CSV::Stream.new(input_file)
126
+ end
127
+
128
+ def input_file
129
+ options.dig(:input, :file, :name)
130
+ end
131
+
132
+ def start_at
133
+ return nil unless (num = options.dig(:input, :file, :start_at))
134
+
135
+ num = num.to_i
136
+ num = nil if num.zero?
137
+ num
138
+ end
139
+
140
+ def output_filename
141
+ return nil unless input_name
142
+
143
+ File.join(input_dir, "#{input_name}_grouped#{input_ext}")
144
+ end
145
+
146
+ def input_name
147
+ @input_name ||= File.basename(input_basename, input_ext)
148
+ end
149
+
150
+ def input_ext
151
+ @input_ext ||= input_basename.split('.')[1..].join('.').then do |name|
152
+ ".#{name}"
153
+ end
154
+ end
155
+
156
+ def input_basename
157
+ @input_basename ||= File.basename(input_full_filename)
158
+ end
159
+
160
+ def input_dir
161
+ @input_dir = File.dirname(input_full_filename)
162
+ end
163
+
164
+ def input_full_filename
165
+ @input_full_filename ||= File.expand_path(input_file)
166
+ end
167
+
168
+ def require_group_by_field!(row, file:)
169
+ return true if row.key?(group_by_field)
170
+
171
+ msg = "Pivot field '#{group_by_field}' missing in header of file '#{file}'"
172
+ log(:error) { msg }
173
+ raise msg
174
+ end
175
+
176
+ def group_by_field
177
+ return @group_by_field if instance_variable_defined?(:@group_by_field)
178
+
179
+ return (@group_by_field = opts_group_by) if opts_group_by
180
+
181
+ unless self.class.const_defined?(:GROUP_BY_FIELD)
182
+ msg = "(#{self.class}) You must define GROUP_BY_FIELD constant"
183
+ log(:error) { msg }
184
+ raise msg
185
+ end
186
+
187
+ @group_by_field = self.class::GROUP_BY_FIELD
188
+ end
189
+
190
+ def grouped_fields
191
+ return @grouped_fields if instance_variable_defined?(:@grouped_fields)
192
+
193
+ unless self.class.const_defined?(:GROUPED_FIELDS)
194
+ msg = "(#{self.class}) You must define GROUPED_FIELDS constant"
195
+ log(:error) { msg }
196
+ raise msg
197
+ end
198
+
199
+ @grouped_fields ||= [self.class::GROUPED_FIELDS].flatten.compact.tap do |flds|
200
+ next unless flds.empty?
201
+
202
+ log(:warn) {
203
+ msg = "There were no fields to be grouped/joined. "
204
+ msg << "This is equivalent to launch a unique operation."
205
+ msg
206
+ }
207
+ end
208
+ end
209
+
210
+ def opts_group_by
211
+ options.dig(:input, :group_by_field)
212
+ end
213
+ end
@@ -0,0 +1,71 @@
1
+ class Eco::API::UseCases::Default::Utils::JsonToCsv < Eco::API::Common::Loaders::UseCase
2
+ require_relative 'cli/json_to_csv_cli'
3
+
4
+ name 'json-to-csv'
5
+ type :other
6
+
7
+ def main(*_args)
8
+ return if simulate?
9
+
10
+ CSV.open(out_filename, 'w') do |csv|
11
+ csv << all_keys
12
+ data.each do |item|
13
+ csv << item.values_at(*all_keys)
14
+ end
15
+ ensure
16
+ log(:info) {
17
+ "Generated output file: '#{File.expand_path(out_filename)}'."
18
+ }
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def all_keys
25
+ @all_keys ||= data.each_with_object([]) do |item, head|
26
+ head.concat(item.keys - head)
27
+ end
28
+ end
29
+
30
+ def data
31
+ @data ||= parse_json_file.tap do |dt|
32
+ ensure_array!(dt)
33
+
34
+ log(:info) {
35
+ "Loaded #{dt.count} items (from file '#{File.basename(input_file)}')"
36
+ }
37
+
38
+ exit 0 if simulate?
39
+ end
40
+ end
41
+
42
+ def out_filename
43
+ @out_filename ||= ''.then do
44
+ input_basename = File.basename(input_file)
45
+ base_name = File.basename(input_basename, '.json')
46
+ "#{base_name}.csv"
47
+ end
48
+ end
49
+
50
+ def input_file
51
+ options.dig(:source, :file)
52
+ end
53
+
54
+ def ensure_array!(data)
55
+ return if data.is_a?(Array)
56
+
57
+ msg = "Expecting JSON file to contain an Array. Given: #{data.class}"
58
+ log(:error) { msg }
59
+ raise msg
60
+ end
61
+
62
+ def parse_json_file(filename = input_file)
63
+ fd = File.open(filename)
64
+ JSON.load fd # rubocop:disable Security/JSONLoad
65
+ rescue JSON::ParserError => err
66
+ log(:error) { "Parsing error on file '#{filename}'" }
67
+ raise err
68
+ ensure
69
+ fd&.close
70
+ end
71
+ end
@@ -0,0 +1,127 @@
1
+ class Eco::API::UseCases::Default::Utils::SortCsv < Eco::API::Custom::UseCase
2
+ name 'sort-csv'
3
+ type :other
4
+
5
+ require_relative 'cli/sort_csv_cli'
6
+
7
+ def main(*_args)
8
+ if simulate?
9
+ count = Eco::CSV.count(input_file)
10
+ log(:info) { "CSV '#{input_file}' has #{count} rows." }
11
+ else
12
+ group_input_rows
13
+ generate_file
14
+ end
15
+ end
16
+
17
+ private
18
+
19
+ attr_reader :headers, :headers_rest
20
+
21
+ def group_input_rows
22
+ idx = 0
23
+ first = true
24
+
25
+ Eco::CSV.foreach(input_file, headers: true, skip_blanks: true) do |row|
26
+ idx += 1
27
+
28
+ if first
29
+ first = false
30
+ @output_headers = row.headers
31
+ require_sort_field!(row, file: input_file)
32
+ end
33
+
34
+ pivot_value = row[sort_field]
35
+ (row_groups[pivot_value] ||= []) << row
36
+
37
+ if (idx % 500).zero?
38
+ print "... Tracked #{idx} rows \r"
39
+ $stdout.flush
40
+ end
41
+ end
42
+ ensure
43
+ log(:info) { "Tracked #{idx} rows"}
44
+ end
45
+
46
+ def generate_file
47
+ idx = 0
48
+
49
+ CSV.open(output_filename, 'wb') do |csv|
50
+ csv << @output_headers
51
+
52
+ row_groups.keys.sort.each do |key|
53
+ row_groups[key].each do |row|
54
+ csv << row.values_at(*@output_headers)
55
+
56
+ idx += 1
57
+ if (idx % 500).zero?
58
+ print "... Sorted #{idx} rows \r"
59
+ $stdout.flush
60
+ end
61
+ end
62
+ end
63
+ end
64
+ ensure
65
+ msg = "Generated file '#{output_filename}' with #{idx} rows."
66
+ log(:info) { msg } unless simulate?
67
+ end
68
+
69
+ def row_groups
70
+ @row_groups ||= {}
71
+ end
72
+
73
+ def output_filename
74
+ return nil unless input_name
75
+
76
+ File.join(input_dir, "#{input_name}_sorted#{input_ext}")
77
+ end
78
+
79
+ def input_name
80
+ @input_name ||= File.basename(input_basename, input_ext)
81
+ end
82
+
83
+ def input_ext
84
+ @input_ext ||= input_basename.split('.')[1..].join('.').then do |name|
85
+ ".#{name}"
86
+ end
87
+ end
88
+
89
+ def input_basename
90
+ @input_basename ||= File.basename(input_full_filename)
91
+ end
92
+
93
+ def input_dir
94
+ @input_dir = File.dirname(input_full_filename)
95
+ end
96
+
97
+ def input_full_filename
98
+ @input_full_filename ||= File.expand_path(input_file)
99
+ end
100
+
101
+ def input_file
102
+ options.dig(:input, :file)
103
+ end
104
+
105
+ def require_sort_field!(row, file:)
106
+ return true if row.key?(sort_field)
107
+
108
+ msg = "Sort field '#{sort_field}' missing in header of file '#{file}'"
109
+ log(:error) { msg }
110
+ raise msg
111
+ end
112
+
113
+ def sort_field
114
+ @sort_field ||= opts_sort_by.tap do |pivot|
115
+ next if pivot
116
+
117
+ msg = "The pivot field should be specified with -by option"
118
+
119
+ log(:error) { msg }
120
+ raise msg
121
+ end
122
+ end
123
+
124
+ def opts_sort_by
125
+ options.dig(:input, :sort_by)
126
+ end
127
+ end