RubyGems - eco-helpers - Versions diffs - 3.0.17 → 3.0.19 - Mend

eco-helpers 3.0.17 → 3.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +25 -1
data/eco-helpers.gemspec +3 -3
data/lib/eco/api/common/loaders/parser.rb +10 -0
data/lib/eco/api/common/people/default_parsers/csv_parser.rb +21 -208
data/lib/eco/api/common/people/default_parsers/helpers/expected_headers.rb +206 -0
data/lib/eco/api/common/people/default_parsers/helpers/null_parsing.rb +36 -0
data/lib/eco/api/common/people/default_parsers/helpers.rb +15 -0
data/lib/eco/api/common/people/default_parsers/json_parser.rb +56 -0
data/lib/eco/api/common/people/default_parsers/xls_parser.rb +13 -14
data/lib/eco/api/common/people/default_parsers.rb +2 -0
data/lib/eco/api/common/people/entry_factory.rb +15 -4
data/lib/eco/api/session/batch/launcher/mode_size.rb +65 -0
data/lib/eco/api/session/batch/launcher/retry.rb +3 -3
data/lib/eco/api/session/batch/launcher/status_handling.rb +4 -2
data/lib/eco/api/session/batch/launcher.rb +42 -37
data/lib/eco/api/session.rb +2 -0
data/lib/eco/api/usecases/default/utils/cli/group_csv_cli.rb +26 -0
data/lib/eco/api/usecases/default/utils/cli/json_to_csv_cli.rb +10 -0
data/lib/eco/api/usecases/default/utils/cli/sort_csv_cli.rb +17 -0
data/lib/eco/api/usecases/default/utils/cli/split_json_cli.rb +15 -0
data/lib/eco/api/usecases/default/utils/group_csv_case.rb +213 -0
data/lib/eco/api/usecases/default/utils/json_to_csv_case.rb +71 -0
data/lib/eco/api/usecases/default/utils/sort_csv_case.rb +127 -0
data/lib/eco/api/usecases/default/utils/split_json_case.rb +224 -0
data/lib/eco/api/usecases/default/utils.rb +4 -0
data/lib/eco/version.rb +1 -1
metadata +22 -11
data/lib/eco/api/session/batch/launcher/mode.rb +0 -23
data/lib/eco/api/session/batch/launcher/size.rb +0 -40

data/lib/eco/api/common/people/default_parsers/xls_parser.rb CHANGED Viewed

@@ -2,12 +2,9 @@ class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Lo
   attribute :xls
   attr_accessor :already_required
-  attr_reader   :file
-  def parser(file, _deps)
-    @file = file
-    rows.tap do |rws|
-      @file = nil
+  def parser(filename, _deps)
+    rows(file: filename).tap do |rws|
       rws.each do |row|
         to_string!(row)
       end
@@ -22,13 +19,14 @@ class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Lo
   def to_string!(row)
     row.transform_values! do |val|
-      next nil unless val
+      next     unless val
       next val if val.is_a?(String)
       val.to_s
     end
   end
-  def headers
+  def expected_headers
     log(:warn) {
       "Headers detection is using your fields_map.json file (native behaviour)"
     }
@@ -39,30 +37,31 @@ class Eco::API::Common::People::DefaultParsers::XLSParser < Eco::API::Common::Lo
     0
   end
-  def workbook
+  def workbook(file)
     require_reading_libs!
     Roo::Spreadsheet.open(file)
   end
-  def spreadheet(name_or_index = sheet_name)
-    workbook.sheet(name_or_index)
+  def spreadheet(name_or_index = sheet_name, file:)
+    workbook(file).sheet(name_or_index)
   end
-  def rows(target = headers)
-    spreadheet.parse(header_search: target, clean: true)
+  def rows(target = expected_headers, file:)
+    spreadheet(file: file).parse(header_search: target, clean: true)
   rescue Roo::HeaderRowNotFoundError => e
     missing = JSON.parse(e.message)
     log(:warn) {
-      "The input file is missing these headers: #{missing}"
+      "The input file is missing these expected headers: #{missing}"
     }
     present = target - missing
-    rows(present)
+    rows(present, file: file)
   end
   def require_reading_libs!
     return if already_required
     require 'roo'
     require 'roo-xls'
     self.already_required = true

data/lib/eco/api/common/people/default_parsers.rb CHANGED Viewed

@@ -12,6 +12,7 @@ module Eco
   end
 end
+require_relative 'default_parsers/helpers'
 require_relative 'default_parsers/select_parser'
 require_relative 'default_parsers/boolean_parser'
 require_relative 'default_parsers/numeric_parser'
@@ -22,4 +23,5 @@ require_relative 'default_parsers/freemium_parser'
 require_relative 'default_parsers/policy_groups_parser'
 require_relative 'default_parsers/login_providers_parser'
 require_relative 'default_parsers/csv_parser'
+require_relative 'default_parsers/json_parser'
 require_relative 'default_parsers/xls_parser'

data/lib/eco/api/common/people/entry_factory.rb CHANGED Viewed

@@ -28,6 +28,7 @@ module Eco
           #   to translate external names into internal ones and _vice versa_.
           def initialize(e, schema:, person_parser: nil, default_parser: nil, attr_map: nil)
             super(e)
             msg = "Constructor needs a PersonSchema. Given: #{schema.class}"
             fatal msg unless schema.is_a?(Ecoportal::API::V1::PersonSchema)
@@ -133,9 +134,10 @@ module Eco
                 out.concat(curr)
               end
             end
-            # Get content only when it's not :xls
+            # Get content only when it's not :xls, nor :json
             # note: even if content was provided, file takes precedence
-            if (format != :xls) && file # rubocop:disable Style/IfUnlessModifier
+            if get_content?(format) && file # rubocop:disable Style/IfUnlessModifier
               content = get_file_content(file, encoding: encoding)
             end
@@ -166,8 +168,10 @@ module Eco
               end
             end.tap do |out_array|
               start_from_two = (format == :csv) || format == :xls
-              out_array.each_with_index do |entry_hash, i|
-                entry_hash["idx"] = start_from_two ? i + 2 : i + 1
+              first_idx      = start_from_two ? 2 : 1
+              out_array.each.with_index(first_idx) do |entry_hash, idx|
+                entry_hash["idx"]         = idx
                 entry_hash["source_file"] = file
               end
             end
@@ -222,6 +226,13 @@ module Eco
           private
+          def get_content?(format)
+            return false if format == :xls
+            return false if format == :json
+            true
+          end
           def abort(message)
             log(:error) { message }
             exit(1)

data/lib/eco/api/session/batch/launcher/mode_size.rb ADDED Viewed

@@ -0,0 +1,65 @@
+module Eco
+  module API
+    class Session
+      class Batch
+        module Launcher
+          module ModeSize
+            include Eco::API::Session::Batch::Launcher::Options
+            DEFAULT_BATCH_SIZE = 50
+            DEFAULT_JOB_SIZE   = 100
+            def batch_size(opts = options)
+              return job_mode_size if job_mode?(opts)
+              batch_mode_size
+            end
+            private
+            # Swaps to batch endpoint on specific errors
+            def batch_mode_on(*error_types, options: self.options, allow_job_mode: true, &block)
+              in_job_mode = allow_job_mode && job_mode?(options)
+              yield(in_job_mode, batch_size(options))
+            rescue *error_types
+              raise unless in_job_mode
+              yield(false , batch_mode_size)
+            end
+            # MODE
+            # @return [Symbol] the batch mode to run
+            def batch_mode(opts = options)
+              opts.dig(:workflow, :batch, :mode) || :batch
+            end
+            # @return [Boolean] are we running in `:job` mode?
+            def job_mode?(opts = options)
+              batch_mode(opts) == :job
+            end
+            # SIZE
+            def job_mode_size
+              options.dig(:workflow, :batch, :job, :size).then do |size|
+                next self.class::DEFAULT_JOB_SIZE unless size
+                size
+              end
+            end
+            def batch_mode_size
+              options.dig(:workflow, :batch, :size).then do |size|
+                next self.class::DEFAULT_BATCH_SIZE unless size
+                [size, 100].min
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/eco/api/session/batch/launcher/retry.rb CHANGED Viewed

@@ -16,9 +16,9 @@ module Eco
             private
-            def offer_retry_on(error_type, retries_left = 3, &block)
+            def offer_retry_on(*error_types, retries_left: 3, &block)
               yield
-            rescue error_type => err
+            rescue *error_types => err
               raise err.class, err.message, cause: nil unless retries_left.positive?
               explanation = "#{err}\n"
@@ -29,7 +29,7 @@ module Eco
                 raise unless response.upcase.start_with?("Y")
                 puts "\nOkay... let's retry!"
-                offer_retry_on(error_type, retries_left - 1, &block)
+                offer_retry_on(*error_types, retries_left: retries_left - 1, &block)
               end
             end
           end

data/lib/eco/api/session/batch/launcher/status_handling.rb CHANGED Viewed

@@ -6,14 +6,16 @@ module Eco
           module StatusHandling
             private
-            def tap_status(enviro:, queue:, method:, status: nil, &block)
+            def tap_status(enviro:, queue:, method:, status: nil)
               status ||= Eco::API::Session::Batch::Status.new(
                 enviro,
                 queue:  queue,
                 method: method
               )
-              status.tap(&block)
+              status.tap do
+                yield(status) if block_given?
+              end
             end
           end
         end

data/lib/eco/api/session/batch/launcher.rb CHANGED Viewed

@@ -1,7 +1,6 @@
 require_relative 'launcher/valid_methods'
 require_relative 'launcher/options'
-require_relative 'launcher/mode'
-require_relative 'launcher/size'
+require_relative 'launcher/mode_size'
 require_relative 'launcher/benchmarking'
 require_relative 'launcher/status_handling'
 require_relative 'launcher/retry'
@@ -24,12 +23,17 @@ module Eco
           end
           include Options
-          include Mode
-          include Size
+          include ModeSize
           include Benchmarking
           include StatusHandling
           include Retry
+          TIMEOUT_RETRIES = 2
+          RETRY_ON        = [
+            Ecoportal::API::Errors::TimeOut,
+            Ecoportal::API::Errors::StartTimeOut
+          ].freeze
           private
           def batch_from(
@@ -48,7 +52,6 @@ module Eco
             launch_batch(
               data,
               method:     method,
-              per_page:   params[:per_page] || batch_size(options),
               people_api: people_api,
               silent:     silent,
               options:    options
@@ -59,54 +62,56 @@ module Eco
             data,
             method:,
             status:     nil,
-            job_mode:   true, # rubocop:disable Lint/UnusedMethodArgument
+            job_mode:   true,
             options:    self.options,
-            per_page:   batch_size(options),
             people_api: api&.people,
             silent:     false
           )
-            iteration  = 1
-            done       = 0
-            iterations = (data.length.to_f / per_page).ceil
             tap_status(status: status, enviro: enviro, queue: data, method: method) do |overall_status|
               pending_for_server_error = data.to_a[0..]
-              start_time = Time.now
+              batch_mode_on(*RETRY_ON, options: options, allow_job_mode: job_mode) do |as_job_mode, per_page|
+                iteration  = 0
+                done       = 0
+                iterations = (data.length.to_f / per_page).ceil
-              data.each_slice(per_page) do |slice|
-                msg  = "starting batch '#{method}' iteration #{iteration}/#{iterations}, "
-                msg << "with #{slice.length} entries of #{data.length} -- #{done} done"
-                msg << (" " * 20)
-                log(:info) { msg } unless silent
+                start_time = Time.now
-                start_slice = Time.now
+                data.each_slice(per_page) do |slice|
+                  iteration += 1
-                offer_retry_on(Ecoportal::API::Errors::TimeOut) do
-                  people_api.batch(job_mode: job_mode?(options)) do |batch|
-                    slice.each do |person|
-                      batch.public_send(method, person) do |response|
-                        faltal("Request with no response") unless response
+                  msg  = "starting batch '#{method}' iteration #{iteration}/#{iterations}, "
+                  msg << "with #{slice.length} entries of #{data.length} -- #{done} done"
+                  msg << (" " * 20)
+                  log(:info) { msg } unless silent
-                        next if server_error?(response)
+                  start_slice = Time.now
-                        pending_for_server_error.delete(person)
-                        overall_status[person] = response
-                      end
-                    end
-                  end # end batch
-                end
+                  offer_retry_on(*RETRY_ON, retries_left: TIMEOUT_RETRIES) do
+                    people_api.batch(job_mode: as_job_mode) do |batch|
+                      slice.each do |person|
+                        batch.public_send(method, person) do |response|
+                          faltal("Request with no response") unless response
-                done += slice.length
+                          next if server_error?(response)
-                msg  = "  ... iteration #{iteration}/#{iterations} done "
-                msg << "in #{str_per_sec(start_slice, slice.length)} "
-                msg << "(average: #{str_per_sec(start_time, done)})"
-                msg << (" " * 20)
-                log(:info) { msg } unless silent
+                          pending_for_server_error.delete(person)
+                          overall_status[person] = response
+                        end
+                      end
+                    end # end batch
+                  end
+                  done += slice.length
-                iteration  += 1
-              end # next slice
+                  msg  = "  ... iteration #{iteration}/#{iterations} done "
+                  msg << "in #{str_per_sec(start_slice, slice.length)} "
+                  msg << "(average: #{str_per_sec(start_time, done)})"
+                  msg << (" " * 20)
+                  log(:info) { msg } unless silent
+                end # next slice
+              end
               # temporary working around (due to back-end problems with batch/jobs)
               unless pending_for_server_error.empty?

data/lib/eco/api/session.rb CHANGED Viewed

@@ -132,7 +132,9 @@ module Eco
       #  If `schema` is `nil` or not provided it uses the currently associated to the `session`
       def entry_factory(schema: nil)
         schema = to_schema(schema) || self.schema
         return @entry_factories[schema&.id] if @entry_factories.key?(schema&.id)
         unless @entry_factories.empty?
           @entry_factories[schema&.id] = @entry_factories.values.first.newFactory(schema: schema)
           return @entry_factories[schema&.id]

data/lib/eco/api/usecases/default/utils/cli/group_csv_cli.rb ADDED Viewed

@@ -0,0 +1,26 @@
+class Eco::API::UseCases::Default::Utils::GroupCsv
+  class Cli < Eco::API::UseCases::Cli
+    str_desc  = 'Groups the csv rows by a pivot field. '
+    str_desc << 'It assumes the sorting field is sorted '
+    str_desc << '(same values should be consecutive)'
+    desc str_desc
+    callback do |_session, options, _usecase|
+      if (file = SCR.get_file(cli_name, required: true, should_exist: true))
+        options.deep_merge!(input: {file: {name: file}})
+      end
+    end
+    add_option("-start-at", "Get only the last N-start_at rows") do |options|
+      count = SCR.get_arg("-start-at", with_param: true)
+      options.deep_merge!(input: {file: {start_at: count}})
+    end
+    add_option('-by', 'The column that should be used to group') do |options|
+      if (file = SCR.get_arg("-by", with_param: true))
+        options.deep_merge!(input: {group_by_field: file})
+      end
+    end
+  end
+end

data/lib/eco/api/usecases/default/utils/cli/json_to_csv_cli.rb ADDED Viewed

@@ -0,0 +1,10 @@
+class Eco::API::UseCases::Default::Utils::JsonToCsv
+  class Cli < Eco::API::UseCases::Cli
+    desc "Transforms an input JSON file into a CSV one."
+    callback do |_sess, options, _case|
+      file = SCR.get_file(cli_name, required: true, should_exist: true)
+      options.deep_merge!(source: {file: file})
+    end
+  end
+end

data/lib/eco/api/usecases/default/utils/cli/sort_csv_cli.rb ADDED Viewed

@@ -0,0 +1,17 @@
+class Eco::API::UseCases::Default::Utils::SortCsv
+  class Cli < Eco::API::UseCases::Cli
+    desc 'Sorts the CSV by column -by'
+    callback do |_session, options, _usecase|
+      if (file = SCR.get_file(cli_name, required: true, should_exist: true))
+        options.deep_merge!(input: {file: file})
+      end
+    end
+    add_option('-by', 'The column that should be used to sorting') do |options|
+      if (file = SCR.get_arg("-by", with_param: true))
+        options.deep_merge!(input: {sort_by: file})
+      end
+    end
+  end
+end

data/lib/eco/api/usecases/default/utils/cli/split_json_cli.rb ADDED Viewed

@@ -0,0 +1,15 @@
+class Eco::API::UseCases::Default::Utils::SplitJson
+  class Cli < Eco::API::UseCases::Cli
+    desc 'Splits a json input file into multiple files'
+    callback do |_sess, options, _case|
+      file = SCR.get_file(cli_name, required: true, should_exist: true)
+      options.deep_merge!(source: {file: file})
+    end
+    add_option("-max-items", "The max count of items of the output files") do |options|
+      count = SCR.get_arg("-max-items", with_param: true)
+      options.deep_merge!(output: {file: {max_items: count}})
+    end
+  end
+end

data/lib/eco/api/usecases/default/utils/group_csv_case.rb ADDED Viewed

@@ -0,0 +1,213 @@
+# This script assumes that for the `GROUP_BY_FIELD` rows are consecutive.
+# @note you might run first the `sort-csv` case.
+# @note you must inherit from this case and define the constants.
+#
+#      GROUP_BY_FIELD = 'target_csv_field'.freeze
+#      GROUPED_FIELDS = [
+#        'joined_field_1',
+#        'joined_field_2',
+#        'joined_field_3',
+#      ].freeze
+#
+class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
+  name 'group-csv'
+  type :other
+  require_relative 'cli/group_csv_cli'
+  def main(*_args)
+    if simulate?
+      count = Eco::CSV.count(input_file)
+      log(:info) { "CSV '#{input_file}' has #{count} rows." }
+    else
+      generate_file
+    end
+  end
+  private
+  def generate_file # rubocop:disable Metrics/AbcSize
+    row_count = 0
+    in_index = nil
+    CSV.open(output_filename, 'wb') do |out_csv|
+      first = true
+      puts "\n"
+      streamed_input.for_each(start_at_idx: start_at) do |row, idx|
+        if first
+          first = false
+          headers!(row)
+          out_csv << headers
+          require_group_by_field!(row, file: input_file)
+        end
+        in_index = idx
+        next unless !block_given? || yield(row, idx)
+        next unless pivotable?(row, idx)
+        next unless (last_group = pivot_row(row))
+        row_count += 1
+        if (row_count % 500).zero?
+          print "... Done #{row_count} rows          \r"
+          $stdout.flush
+        end
+        out_csv << last_group.values_at(*headers)
+      end
+      # finalize
+      if (lrow = pivot_row)
+        row_count += 1
+        out_csv   << lrow.values_at(*headers)
+      end
+    ensure
+      msg  = "Generated file '#{output_filename}' "
+      msg << "with #{row_count} rows (out of #{in_index})."
+      log(:info) { msg } unless simulate?
+    end
+  end
+  # It tracks the current grouped row
+  # @return [Nil, Hash] the last grouped row when `row` doesn't belong
+  #   or `nil` otherwise
+  def pivot_row(row = nil)
+    @group ||= {}
+    return @group unless row
+    pivot_value = row[group_by_field]
+    unless (last_pivot = @group[group_by_field])
+      last_pivot = @group[group_by_field] = pivot_value
+    end
+    last   = @group
+    @group = {group_by_field => pivot_value} unless pivot_value == last_pivot
+    headers_rest.each do |field|
+      curr_values   = row[field].to_s.split('|').compact.uniq
+      pivot_values  = @group[field].to_s.split('|').compact.uniq
+      @group[field] = (pivot_values | curr_values).join('|')
+    end
+    last unless last == @group
+  end
+  attr_reader :group
+  attr_reader :headers, :headers_rest
+  def headers!(row)
+    return if headers?
+    @headers_rest  = grouped_fields & row.headers
+    @headers_rest -= [group_by_field]
+    @headers       = [group_by_field, *headers_rest]
+  end
+  def headers?
+    instance_variable_defined?(:@headers)
+  end
+  def pivotable?(row, idx)
+    return true unless row[group_by_field].to_s.strip.empty?
+    msg  = "Row #{idx} doesn't have value for pivot field '#{group_by_field}'"
+    msg << ". Skipping (discared) ..."
+    log(:warn) { msg }
+    false
+  end
+  def streamed_input
+    @streamed_input ||= Eco::CSV::Stream.new(input_file)
+  end
+  def input_file
+    options.dig(:input, :file, :name)
+  end
+  def start_at
+    return nil unless (num = options.dig(:input, :file, :start_at))
+    num = num.to_i
+    num = nil if num.zero?
+    num
+  end
+  def output_filename
+    return nil unless input_name
+    File.join(input_dir, "#{input_name}_grouped#{input_ext}")
+  end
+  def input_name
+    @input_name ||= File.basename(input_basename, input_ext)
+  end
+  def input_ext
+    @input_ext ||= input_basename.split('.')[1..].join('.').then do |name|
+      ".#{name}"
+    end
+  end
+  def input_basename
+    @input_basename ||= File.basename(input_full_filename)
+  end
+  def input_dir
+    @input_dir = File.dirname(input_full_filename)
+  end
+  def input_full_filename
+    @input_full_filename ||= File.expand_path(input_file)
+  end
+  def require_group_by_field!(row, file:)
+    return true if row.key?(group_by_field)
+    msg = "Pivot field '#{group_by_field}' missing in header of file '#{file}'"
+    log(:error) { msg }
+    raise msg
+  end
+  def group_by_field
+    return @group_by_field if instance_variable_defined?(:@group_by_field)
+    return (@group_by_field = opts_group_by) if opts_group_by
+    unless self.class.const_defined?(:GROUP_BY_FIELD)
+      msg = "(#{self.class}) You must define GROUP_BY_FIELD constant"
+      log(:error) { msg }
+      raise msg
+    end
+    @group_by_field = self.class::GROUP_BY_FIELD
+  end
+  def grouped_fields
+    return @grouped_fields if instance_variable_defined?(:@grouped_fields)
+    unless self.class.const_defined?(:GROUPED_FIELDS)
+      msg = "(#{self.class}) You must define GROUPED_FIELDS constant"
+      log(:error) { msg }
+      raise msg
+    end
+    @grouped_fields ||= [self.class::GROUPED_FIELDS].flatten.compact.tap do |flds|
+      next unless flds.empty?
+      log(:warn) {
+        msg  = "There were no fields to be grouped/joined. "
+        msg << "This is equivalent to launch a unique operation."
+        msg
+      }
+    end
+  end
+  def opts_group_by
+    options.dig(:input, :group_by_field)
+  end
+end