eco-helpers 3.2.12 → 3.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,313 @@
1
+ # This script assumes that for the `MERGE_BY_FIELD` rows are consecutive.
2
+ # @note you might run first the `sort-csv` case.
3
+ # @note at the moment, it does NOT add new fields from the merge file.
4
+ # It only uses the headers of the original file.
5
+ # @note you must inherit from this case and define the constants.
6
+ #
7
+ # MERGE_BY_FIELD = 'target_csv_field'.freeze
8
+ # # those not merged are overridden
9
+ # JOINED_FIELDS = [
10
+ # 'joined_field_1',
11
+ # 'joined_field_2',
12
+ # 'joined_field_3',
13
+ # ].freeze
14
+ #
15
+ class Eco::API::UseCases::Default::Utils::MergeCsv < Eco::API::Custom::UseCase
16
+ name 'merge-csv'
17
+ type :other
18
+
19
+ require_relative 'cli/merge_csv_cli'
20
+
21
+ def main(*_args)
22
+ if simulate?
23
+ count = Eco::CSV.count(input_file)
24
+ log(:info) { "CSV '#{input_file}' has #{count} rows." }
25
+ else
26
+ generate_file
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def generate_file # rubocop:disable Metrics/AbcSize
33
+ in_index = nil
34
+
35
+ CSV.open(output_filename, 'wb') do |out_csv|
36
+ pending = false
37
+ first = true
38
+ m_first = true
39
+ row = nil
40
+ idx = nil
41
+
42
+ puts "\n"
43
+
44
+ streamed_merging.for_each do |m_row, m_idx|
45
+ if m_first
46
+ m_first = false
47
+ require_merge_by_field!(m_row, file: merge_file)
48
+ end
49
+
50
+ next unless pivotable?(m_row, m_idx, file: merge_file)
51
+
52
+ merging_row(m_row)
53
+ merge_done = false
54
+
55
+ loop do
56
+ unless pending
57
+ row = nil
58
+ streamed_input.shift do |o_row, i|
59
+ idx = i
60
+ row = o_row
61
+
62
+ if first
63
+ first = false
64
+ headers!(row)
65
+ out_csv << headers
66
+ require_merge_by_field!(row, file: input_file)
67
+ end
68
+ end
69
+ end
70
+
71
+ break unless row
72
+
73
+ in_index = idx
74
+ next unless pivotable?(row, idx, file: input_file)
75
+
76
+ row_count!
77
+ added = original_row(row) do |merged_row, merged:|
78
+ out_csv << merged_row.values_at(*headers)
79
+ merge_done = true if merged
80
+ end
81
+
82
+ pending = !added
83
+
84
+ break if merge_done
85
+ break unless added
86
+ break if streamed_input.eof?
87
+ end
88
+
89
+ row = nil unless pending
90
+
91
+ if pending || streamed_input.eof?
92
+ msg = "Could not merge row #{m_idx} (#{merging_row[merge_by_field]}) "
93
+ msg << "because the pivot value does not exist in the original file"
94
+ msg << ". Skipping (discarded) ..."
95
+ log(:warn) { msg }
96
+ end
97
+ end
98
+
99
+ # finalize
100
+ loop do
101
+ row = nil
102
+ streamed_input.shift do |o_row, i|
103
+ idx = i
104
+ row = o_row
105
+ end
106
+
107
+ break unless row
108
+
109
+ in_index = idx
110
+ next unless pivotable?(row, idx, file: input_file)
111
+
112
+ row_count!
113
+ out_csv << row.values_at(*headers)
114
+
115
+ break if streamed_input.eof?
116
+ end
117
+ ensure
118
+ msg = "Generated file '#{output_filename}' "
119
+ msg << "with #{row_count} rows (out of #{in_index + 1})."
120
+
121
+ log(:info) { msg } unless simulate?
122
+ end
123
+ end
124
+
125
+ # It tracks the current merging row
126
+ # @return [Nil, Hash] the last merge row when `row` doesn't belong
127
+ # or `nil` otherwise
128
+ def merging_row(row = nil)
129
+ return @merging_row unless row
130
+
131
+ @merging_row = row.to_h
132
+ end
133
+
134
+ # It tracks the current grouped row
135
+ # @return [Nil, Hash] the last grouped row when `row` doesn't belong
136
+ # or `nil` otherwise
137
+ def original_row(row)
138
+ pivot_value = row[merge_by_field]
139
+ merge_pivot = merging_row[merge_by_field]
140
+
141
+ if pivot_value > merge_pivot
142
+ # as both files are sorted, we can't add the original row now
143
+ # and we need to just return false
144
+ return false
145
+ elsif pivot_value < merge_pivot
146
+ yield(row.to_h, merged: false) if block_given?
147
+ return true
148
+ end
149
+
150
+ merged_row = {}
151
+ merged_row = {merge_by_field => pivot_value}
152
+
153
+ joined_fields.each do |field|
154
+ original_values = row[field].to_s.split('|').compact.uniq
155
+ merge_values = merging_row[field].to_s.split('|').compact.uniq
156
+
157
+ merged_row[field] = (original_values | merge_values).join('|')
158
+ merged_row[field] = nil if merged_row[field].to_s.strip.empty?
159
+ end
160
+
161
+ headers_rest.each do |field|
162
+ merged_row[field] = row[field]
163
+ merged_row[field] = merging_row[field] if merging_row.key?(field)
164
+ merged_row[field] = nil if merged_row[field].to_s.strip.empty?
165
+ end
166
+
167
+ missed_headers = (merging_row.keys - headers)
168
+ if missed_headers.any? && !warned_missed_headers?
169
+ msg = "Missing headers in merged file: #{missed_headers.join(', ')}"
170
+ log(:warn) { msg }
171
+ @warned_missed_headers = true
172
+ end
173
+
174
+ merged_row = merged_row.slice(*headers)
175
+ yield(merged_row, merged: true) if block_given?
176
+
177
+ true
178
+ end
179
+
180
+ attr_reader :merge, :row_count
181
+ attr_reader :headers, :headers_rest
182
+
183
+
184
+ # Whether if we already warned about merging headers that
185
+ # are not in the original
186
+ def warned_missed_headers?
187
+ @warned_missed_headers ||= false
188
+ end
189
+
190
+ def headers!(row)
191
+ return if headers?
192
+
193
+ @headers = row.to_h.keys
194
+ @joined_fields = @headers & joined_fields
195
+ @headers_rest = @headers - @joined_fields - [merge_by_field]
196
+ @headers = [merge_by_field, *@joined_fields, *@headers_rest]
197
+ end
198
+
199
+ def headers?
200
+ instance_variable_defined?(:@headers)
201
+ end
202
+
203
+ def row_count!
204
+ @row_count ||= 0
205
+ (@row_count += 1).tap do |cnt|
206
+ if (cnt % 500).zero?
207
+ print "... Done #{cnt} rows \r"
208
+ $stdout.flush
209
+ end
210
+ end
211
+ end
212
+
213
+ def pivotable?(row, idx, file:)
214
+ return false if row.nil?
215
+ return true unless row[merge_by_field].to_s.strip.empty?
216
+
217
+ msg = "Row #{idx} doesn't have value for pivot field '#{merge_by_field}'"
218
+ msg << " (file: '#{file}'). Skipping (discarded) ..."
219
+ log(:warn) { msg }
220
+ false
221
+ end
222
+
223
+ def streamed_input
224
+ @streamed_input ||= Eco::CSV::Stream.new(input_file)
225
+ end
226
+
227
+ def streamed_merging
228
+ @streamed_merging ||= Eco::CSV::Stream.new(merge_file)
229
+ end
230
+
231
+ def input_file
232
+ options.dig(:input, :file, :name)
233
+ end
234
+
235
+ def merge_file
236
+ options.dig(:input, :merge_file, :name)
237
+ end
238
+
239
+ def output_filename
240
+ return unless input_name
241
+
242
+ File.join(input_dir, "#{input_name}_merged#{input_ext}")
243
+ end
244
+
245
+ def input_name
246
+ @input_name ||= File.basename(input_basename, input_ext)
247
+ end
248
+
249
+ def input_ext
250
+ @input_ext ||= input_basename.split('.')[1..].join('.').then do |name|
251
+ ".#{name}"
252
+ end
253
+ end
254
+
255
+ def input_basename
256
+ @input_basename ||= File.basename(input_full_filename)
257
+ end
258
+
259
+ def input_dir
260
+ @input_dir = File.dirname(input_full_filename)
261
+ end
262
+
263
+ def input_full_filename
264
+ @input_full_filename ||= File.expand_path(input_file)
265
+ end
266
+
267
+ def require_merge_by_field!(row, file:)
268
+ return true if row.key?(merge_by_field)
269
+
270
+ msg = "Pivot field '#{merge_by_field}' missing in header of file '#{file}'"
271
+ log(:error) { msg }
272
+ raise msg
273
+ end
274
+
275
+ def merge_by_field
276
+ return @merge_by_field if instance_variable_defined?(:@merge_by_field)
277
+
278
+ return (@merge_by_field = opts_merge_by) if opts_merge_by
279
+
280
+ unless self.class.const_defined?(:MERGE_BY_FIELD)
281
+ msg = "(#{self.class}) You must define MERGE_BY_FIELD constant"
282
+ log(:error) { msg }
283
+ raise msg
284
+ end
285
+
286
+ @merge_by_field = self.class::MERGE_BY_FIELD
287
+ end
288
+
289
+ def joined_fields
290
+ return @joined_fields if instance_variable_defined?(:@joined_fields)
291
+
292
+ unless self.class.const_defined?(:JOINED_FIELDS)
293
+ msg = "(#{self.class}) You must define JOINED_FIELDS constant"
294
+ log(:error) { msg }
295
+ raise msg
296
+ end
297
+
298
+ @joined_fields ||= [self.class::JOINED_FIELDS].flatten.compact.tap do |flds|
299
+ next unless flds.empty?
300
+
301
+ log(:warn) {
302
+ msg = 'There were no fields to be joined (JOINED_FIELDS). '
303
+ msg << 'This means all fields present in the merging file '
304
+ msg << ' will be overridden in the original file.'
305
+ msg
306
+ }
307
+ end
308
+ end
309
+
310
+ def opts_merge_by
311
+ options.dig(:input, :merge_by_field)
312
+ end
313
+ end
@@ -1,7 +1,7 @@
1
1
  class Eco::API::UseCases::Default::Utils::SplitCsv < Eco::API::Common::Loaders::UseCase
2
2
  require_relative 'cli/split_csv_cli'
3
3
 
4
- MAX_ROWS = 15_000
4
+ MAX_ROWS = :unused
5
5
 
6
6
  name 'split-csv'
7
7
  type :other
@@ -15,6 +15,7 @@ class Eco::API::UseCases::Default::Utils::SplitCsv < Eco::API::Common::Loaders::
15
15
  input_file,
16
16
  max_rows: max_rows,
17
17
  start_at: start_at,
18
+ **params,
18
19
  &filter
19
20
  ).tap do |split|
20
21
  msg = []
@@ -31,6 +32,10 @@ class Eco::API::UseCases::Default::Utils::SplitCsv < Eco::API::Common::Loaders::
31
32
 
32
33
  private
33
34
 
35
+ def params
36
+ {}
37
+ end
38
+
34
39
  def filter
35
40
  nil
36
41
  end
@@ -0,0 +1,179 @@
1
+ # Tracks the files of a source folder into a file
2
+ class Eco::API::UseCases::Default::Utils::TrackFiles < Eco::API::Custom::UseCase
3
+ name 'track-files'
4
+ type :other
5
+
6
+ require_relative 'cli/track_files_cli'
7
+
8
+ OUT_HEADERS = %w[
9
+ ref_id
10
+ filename
11
+ filesize
12
+ s3_path
13
+ ].freeze
14
+
15
+ REF_ID_PATH_POSITION = :last
16
+ BASE_S3_PATH = 'uploads'.freeze
17
+ # S3_SUBPATH = 'org-name'.freeze
18
+
19
+ def main(*_args)
20
+ if simulate?
21
+ count_files
22
+ else
23
+ generate_file
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ attr_reader :folder_count, :file_count
30
+
31
+ def folder_count!(cnt = 1)
32
+ @folder_count ||= 0
33
+
34
+ print '.'
35
+ @folder_count += cnt
36
+ end
37
+
38
+ def file_count!(cnt = 1)
39
+ @file_count ||= 0
40
+ @file_count += cnt
41
+ end
42
+
43
+ def count_files
44
+ with_each_file
45
+
46
+ log(:info) {
47
+ "Found #{file_count} files, in #{folder_count} folders (with files)."
48
+ }
49
+ end
50
+
51
+ def ref_id_path_position
52
+ self.class::REF_ID_PATH_POSITION
53
+ end
54
+
55
+ def generate_file
56
+ CSV.open(output_filename, 'wb') do |csv|
57
+ csv << self.class::OUT_HEADERS
58
+
59
+ with_each_file do |file, src_path|
60
+ ref_id =
61
+ case ref_id_path_position
62
+ when :first then src_path.first
63
+ when :last then src_path.last
64
+ else
65
+ raise ArgumentError, "Unknown REF_ID_PATH_POSITION: #{ref_id_path_position} "
66
+ end
67
+
68
+ file_row = [ref_id]
69
+ file_row << file_name = File.basename(file)
70
+ file_row << File.size(file)
71
+ file_row << s3_path(file_name, src_path)
72
+
73
+ csv << file_row
74
+ end
75
+ end
76
+ ensure
77
+ msg = "Generated file '#{output_filename}' "
78
+ msg << "with #{file_count} files/rows "
79
+ msg << "organized in #{folder_count} folders."
80
+
81
+ log(:info) { msg } unless simulate?
82
+ end
83
+
84
+ def with_each_file(folders = top_subfolders, src_path: [], &block)
85
+ folders.each do |folder|
86
+ folder_name = File.basename(folder)
87
+ path = src_path[0..-1]
88
+ path << folder_name
89
+
90
+ files = folder_files(folder)
91
+ subfolders = top_subfolders(folder)
92
+
93
+ next if files.empty? && subfolders.empty? # skip
94
+
95
+ if files.any? && subfolders.any?
96
+ msg = "Folder '#{folder}' contains both files and subfolders."
97
+ msg << "\nFor correctly tracking and handling file attachments, "
98
+ msg << "this is not supported."
99
+
100
+ raise ArgumentError, msg
101
+ end
102
+
103
+ unless files.empty?
104
+ folder_count!
105
+ file_count!(files.count)
106
+
107
+ files.each do |file|
108
+ yield(file, path) if block_given?
109
+ end
110
+ end
111
+
112
+ next if subfolders.empty?
113
+
114
+ with_each_file(
115
+ subfolders,
116
+ src_path: path,
117
+ &block
118
+ )
119
+ end
120
+ end
121
+
122
+ def s3_path(filename, path)
123
+ [
124
+ self.class::BASE_S3_PATH,
125
+ s3_subpath,
126
+ *path,
127
+ filename
128
+ ].compact.join('/')
129
+ end
130
+
131
+ def s3_subpath
132
+ options.dig(:output, :s3_path) ||
133
+ s3_subpath_const ||
134
+ config.active_enviro
135
+ end
136
+
137
+ def s3_subpath_const
138
+ self.class::S3_SUBPATH if self.class.const_defined?(:S3_SUBPATH)
139
+ end
140
+
141
+ def top_subfolders(base_folder = input_base_folder)
142
+ Dir[
143
+ File.join(base_folder, "*")
144
+ ].select do |f|
145
+ File.directory?(f)
146
+ end
147
+ end
148
+
149
+ def folder_files(dir)
150
+ Dir[
151
+ File.join(dir, "*")
152
+ ].select do |f|
153
+ File.file?(f)
154
+ end
155
+ end
156
+
157
+ def output_filename
158
+ return unless input_folder_name
159
+
160
+ File.join(
161
+ config.active_enviro,
162
+ 'sftp',
163
+ "#{input_folder_name}_files.csv"
164
+ )
165
+ end
166
+
167
+ def input_folder_name
168
+ @input_folder_name ||= File.basename(input_base_folder)
169
+ end
170
+
171
+ def input_base_folder
172
+ options.dig(:input, :folder).tap do |folder|
173
+ next if File.directory?(folder)
174
+
175
+ msg = "Expecting '#{folder}' to be a directory, but it isn't."
176
+ raise ArgumentError, msg
177
+ end
178
+ end
179
+ end
@@ -14,4 +14,7 @@ require_relative 'utils/split_json_case'
14
14
  require_relative 'utils/json_to_csv_case'
15
15
  require_relative 'utils/sort_csv_case'
16
16
  require_relative 'utils/group_csv_case'
17
+ require_relative 'utils/merge_csv_case'
17
18
  require_relative 'utils/entries_to_csv_case'
19
+ require_relative 'utils/track_files_case'
20
+ require_relative 'utils/add_page_id_case'
@@ -40,7 +40,7 @@ module Eco::API::UseCases::GraphQL::Helpers::Location::Command
40
40
  return nil unless error?
41
41
 
42
42
  msg = []
43
- msg << "(#{command} '#{node_id}') #{error.message}"
43
+ msg << "(#{command_type} '#{node_id}') #{error.message}"
44
44
 
45
45
  feed = []
46
46
  feed.concat(error.validationErrors.map(&:message)) unless error.validationErrors.empty?
@@ -55,7 +55,7 @@ module Eco::API::UseCases::GraphQL::Helpers::Location::Command
55
55
  end
56
56
 
57
57
  def command_input_data
58
- input[command]
58
+ input[command_type]
59
59
  end
60
60
 
61
61
  def command_id
@@ -53,7 +53,8 @@ module Eco::API::UseCases::GraphQL::Helpers::Location::Command
53
53
  next applied unless with_id_change
54
54
 
55
55
  applied.select do |result|
56
- next false unless (command = result.command_result_data)
56
+ # next false unless (command = result.command_result_data)
57
+ next false unless (command = result.command_input_data)
57
58
 
58
59
  command.keys.include?(:newId)
59
60
  end
@@ -22,9 +22,10 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
22
22
  # both are being moved (specific/long mappings first)
23
23
  return 1 if from.subset_of?(other.from)
24
24
  return -1 if from.superset_of?(other.from)
25
- return -1 if (from & other.from).empty?
25
+ return -1 unless from.intersect?(other.from)
26
26
  return -1 if from.length >= other.from.length
27
27
  return 1 if from.length < other.from.length
28
+
28
29
  -1
29
30
  end
30
31
 
@@ -49,16 +50,19 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
49
50
  def maps?
50
51
  return false if any?(&:empty?)
51
52
  return false if from == to
53
+
52
54
  true
53
55
  end
54
56
 
55
57
  def rename?
56
58
  return false unless maps?
59
+
57
60
  both? {|set| set.length == 1}
58
61
  end
59
62
 
60
63
  def move?
61
64
  return false unless maps?
65
+
62
66
  !rename?
63
67
  end
64
68
  end
@@ -4,7 +4,7 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
4
4
  class << self
5
5
  def attr_compare(*attrs)
6
6
  attrs.each do |attr|
7
- meth = "#{attr}".to_sym # rubocop:disable Style/RedundantInterpolation
7
+ meth = :"#{attr}"
8
8
  define_method meth do |value|
9
9
  set.send(meth, to_set(value))
10
10
  end
@@ -13,7 +13,7 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
13
13
 
14
14
  def attr_operate(*attrs)
15
15
  attrs.each do |attr|
16
- meth = "#{attr}".to_sym # rubocop:disable Style/RedundantInterpolation
16
+ meth = :"#{attr}"
17
17
  define_method meth do |value|
18
18
  self.class.new(set.send(meth, to_set(value)))
19
19
  end
@@ -57,6 +57,7 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
57
57
  def include?(value)
58
58
  value = value.to_s.strip
59
59
  return false if value.empty?
60
+
60
61
  set.include?(value)
61
62
  end
62
63
 
@@ -82,7 +83,9 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
82
83
  return value.ini_tags.dup if value.is_a?(self.class)
83
84
  return value.dup if value.is_a?(Array)
84
85
  return value.to_a if value.is_a?(Set)
85
- raise ArgumentError, "Expecting #{self.class}, Set or Array. Given: #{value.class}"
86
+
87
+ msg = "Expecting #{self.class}, Set or Array. Given: #{value.class}"
88
+ raise ArgumentError, msg
86
89
  end
87
90
 
88
91
  def to_set(value)
@@ -22,7 +22,7 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
22
22
  end
23
23
 
24
24
  def to_csv(filename)
25
- CSV.open(filename, "w") do |fd|
25
+ CSV.open(filename, 'w') do |fd|
26
26
  fd << %w[src_tags dst_tags]
27
27
 
28
28
  each do |tags_map|
@@ -67,7 +67,8 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
67
67
  end
68
68
 
69
69
  def <<(pair)
70
- raise ArgumentError, "Expecting pair of Array in Array. Given: #{pair}" unless self.class.correct_pair?(pair)
70
+ msg = "Expecting pair of Array in Array. Given: #{pair}"
71
+ raise ArgumentError, msg unless self.class.correct_pair?(pair)
71
72
 
72
73
  add(*pair)
73
74
  end
@@ -76,6 +76,8 @@ class Eco::API::UseCases::GraphQL::Samples::Location
76
76
  ) do |input, stage|
77
77
  next unless input
78
78
 
79
+ self.id_name_input = input if simulate? && stage == :id_name
80
+
79
81
  some_update = true
80
82
 
81
83
  sliced_batches(
@@ -98,8 +100,8 @@ class Eco::API::UseCases::GraphQL::Samples::Location
98
100
  rearchive
99
101
  end
100
102
 
101
- rescued { delete_or_publish_draft }
102
- rescued { manage_remaps_table }
103
+ rescued { delete_or_publish_draft }
104
+ rescued { manage_remaps_table if some_update }
103
105
  end
104
106
  end
105
107
 
@@ -131,6 +133,8 @@ class Eco::API::UseCases::GraphQL::Samples::Location
131
133
 
132
134
  private
133
135
 
136
+ attr_accessor :id_name_input
137
+
134
138
  # Work with adapted diff builders.
135
139
  def nodes_diff_class
136
140
  Eco::API::UseCases::GraphQL::Helpers::Location::Command::Diffs
@@ -231,11 +235,17 @@ class Eco::API::UseCases::GraphQL::Samples::Location
231
235
  end
232
236
 
233
237
  def manage_remaps_table
234
- return unless results.final_response?
235
-
236
238
  rescued do
237
- results.applied_commands(with_id_change: true) do |result|
238
- update_tags_remap_table(result.command)
239
+ if simulate? && id_name_input
240
+ id_name_input[:commands].each do |command|
241
+ update_tags_remap_table(command[:update])
242
+ end
243
+ elsif results.final_response?
244
+ results.applied_commands(with_id_change: true).each do |result|
245
+ update_tags_remap_table(result.command_input_data)
246
+ end
247
+ else
248
+ return
239
249
  end
240
250
  end
241
251
 
@@ -36,8 +36,9 @@ class Eco::API::UseCases::GraphQL::Samples::Location
36
36
  # @note the SFTP push only happens if `remote_subfolder` is defined, via:
37
37
  # 1. `options.dig(:sftp, :remote_subfolder)`
38
38
  # 2. `REMOTE_FOLDER` const
39
- def close_handling_tags_remap_csv
39
+ def close_handling_tags_remap_csv # rubocop:disable Naming/PredicateMethod
40
40
  return false unless super
41
+ return true if simulate?
41
42
 
42
43
  upload(tags_remap_csv_file) unless remote_subfolder.nil?
43
44
  true