RubyGems - eco-helpers - Versions diffs - 3.2.12 → 3.2.14 - Mend

eco-helpers 3.2.12 → 3.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 91691c5a914be5eebeff46f776d9c5b720e8e9672317051f36b5995beff3977c
-  data.tar.gz: 1742764775b28b99f136e451cc79c68359924f7722752ccdae043cf7c1b7f6a9
+  metadata.gz: d67a16095de2e32c2c627214b0254df6d2685e0591ab6295082736e52494c4d3
+  data.tar.gz: 60835a688189d8feda9bdc6198bbdb0cfaa9e9f95e5c7521f36cedbec706c1b0
 SHA512:
-  metadata.gz: 3c17e149406ae8ae64c94b623e10a6151d9f0fd4755e736281a89c098c3c6ff180d7f85560d62e63741f3166d283882352e50cf48841c22e4f2106a17e76f3f0
-  data.tar.gz: 25199fc81a46ff8897be9af805bb675a4ee3ca11b61439a91450639703583c1907e97513fd26e14375a30956166dcb2cb35161a56b920f221ff6d2efc69bcee4
+  metadata.gz: 0c1ded6a88ad0c6394e96cb511fddb5c5ac29635307affc1577d5eeb210f01ad8dd78edf78e449b9bca765a8754aa31083abb72beb60746ed21741e523878e6c
+  data.tar.gz: a18f9c81c2430ba8251bdfc34e6e4e1d3da0fd3cbe4647226942469d8da1f71e00aa7e21e3162d9d89d492f98e800642c0132edc30a305515df67764c397de91

data/CHANGELOG.md CHANGED Viewed

@@ -2,7 +2,7 @@
 All notable changes to this project will be documented in this file.
-## [3.2.13] - 2026-01-xx
+## [3.2.15] - 2026-05-xx
 ### Added
@@ -10,6 +10,34 @@ All notable changes to this project will be documented in this file.
 ### Fixed
+## [3.2.14] - 2026-05-22
+### Added
+- `track-files` case
+- `add-page-id` case
+### Changed
+- **improvement**: added `-format` argument to `-group-csv` to output a `jsonl` **custom** file.
+## [3.2.13] - 2026-04-15
+### Added
+- `-split-csv` case
+  - Allow custom split criteria via `splitter` named argument.
+- `-merge-csv` case
+### Changed
+- improved `Stream` with methods `eof?` and `shift`
+### Fixed
+- Locations remap on RS update
+- `-group-csv`: correct rows count
 ## [3.2.12] - 2026-01-19
 ### Added

data/lib/eco/api/usecases/default/utils/add_page_id_case.rb ADDED Viewed

@@ -0,0 +1,273 @@
+# @note you might add a `filter` method
+#
+#      def filter
+#        @filter ||= proc do |row, _r_idx|
+#          next true
+#          next true unless (ref_id = row[pivot_column(row)])
+#          next false if excluded_ref_id?(ref_id)
+#
+#          true
+#        end
+#      end
+#
+class Eco::API::UseCases::Default::Utils::AddPageId < Eco::API::Custom::UseCase
+  name 'add-page-id'
+  type :other
+  require_relative 'cli/add_page_id_cli'
+  PIVOT_FIELD = [
+    'ref_id'
+  ].freeze
+  PAGE_ID          = 'page_id'.freeze
+  EXCLUDED_REF_IDS = %w[].freeze
+  def main(*_args)
+    if simulate?
+      count = Eco::CSV.count(input_file)
+      log(:info) { "CSV '#{input_file}' has #{count} rows." }
+    else
+      generate_file(&filter)
+    end
+  end
+  private
+  attr_reader :headers, :headers_rest
+  def filter
+    nil
+  end
+  def excluded_ref_id?(ref_id)
+    self.class::EXCLUDED_REF_IDS.include?(ref_id)
+  end
+  def generate_file # rubocop:disable Metrics/AbcSize
+    idx           = -1
+    row_count     = 0
+    headers_added = false
+    CSV.open(output_filename, 'wb') do |csv|
+      puts "\n"
+      Eco::CSV.foreach(input_file, headers: true, skip_blanks: true) do |row|
+        idx += 1
+        next unless !block_given? || yield(row, idx)
+        unless headers_added
+          headers!(row)
+          require_pivot_field!(row, file: input_file)
+          csv           << headers
+          headers_added = true
+        end
+        unless (pivot_value = row[pivot_field])
+          msg  = "Row #{idx} doesn't have value for pivot field '#{pivot_field}'"
+          msg << ". Skipping (discarded) ..."
+          log(:warn) { msg }
+          next
+        end
+        unless (page_id = input_maps[pivot_value])
+          warn_unknown_mapping_reference!(pivot_value)
+          next
+        end
+        row_count += 1
+        if (row_count % 500).zero?
+          print "... Mapped #{row_count} rows          \r"
+          $stdout.flush
+        end
+        values     = [page_id, pivot_value]
+        oth_values = row.values_at(*headers_rest)
+        values.concat(oth_values) unless headers_rest.empty?
+        csv << values
+      end
+    end
+  ensure
+    msg = "Generated file '#{output_filename}' with #{row_count} rows (out of #{idx})."
+    log(:info) { msg } unless simulate?
+  end
+  def warn_unknown_mapping_reference!(ref_id)
+    return if unknown.include?(ref_id)
+    unknown << ref_id
+    msg  = "Could not map '#{pivot_field}' '#{ref_id}' to a '#{page_id_field}'"
+    msg << ". Skipping (discarded) ..."
+    log(:warn) { msg }
+  end
+  def unknown
+    @unknown ||= []
+  end
+  def headers!(row)
+    return if instance_variable_defined?(:@headers)
+    @headers_rest = row.headers - base_out_header(row)
+    @headers      = [*base_out_header, *headers_rest]
+  end
+  def base_out_header(row = nil)
+    @base_out_header ||= [page_id_field, pivot_field(row)] # space: :output
+  end
+  def input_maps
+    return @input_maps if instance_variable_defined?(:@input_maps)
+    @input_maps = {}
+    idx = 0
+    Eco::CSV.foreach(input_maps_file, headers: true) do |row|
+      idx += 1
+      if (idx % 500).zero?
+        print "... Creating mappings table (#{idx} done)          \r"
+        $stdout.flush
+      end
+      require_pivot_field!(row, space: :maps, file: input_maps_file)
+      require_page_id_field!(row, file: input_maps_file)
+      ref_id  = row[pivot_field(space: :maps)]
+      page_id = row[page_id_field(space: :maps)]
+      @input_maps[ref_id] = page_id
+    end
+    @input_maps
+  end
+  def input_maps_file
+    options.dig(:input, :maps).tap do |file|
+      next if file && File.exist?(file)
+      log(:error) {
+        msg = "You must specify an existing maps file with the option '-maps-file'"
+        msg << ".\n  * File: '#{file}' does not exist" unless file.nil?
+        msg
+      }
+      exit 1
+    end
+  end
+  def output_filename
+    return nil unless input_name
+    File.join(
+      input_dir,
+      "#{input_name}_mapped#{input_ext}"
+    )
+  end
+  def input_name
+    @input_name ||= File.basename(
+      input_basename,
+      input_ext
+    )
+  end
+  def input_ext
+    @input_ext ||= input_basename.split('.')[1..].join('.').then do |name|
+      ".#{name}"
+    end
+  end
+  def input_basename
+    @input_basename ||= File.basename(input_full_filename)
+  end
+  def input_dir
+    @input_dir = File.dirname(input_full_filename)
+  end
+  def input_full_filename
+    @input_full_filename ||= File.expand_path(input_file)
+  end
+  def input_file
+    options.dig(:input, :file)
+  end
+  def require_pivot_field!(row, file:, space: :input)
+    return true if row.key?(pivot_field(row, space: space))
+    msg = "Pivot field '#{pivot_field}' missing in header of file '#{file}'"
+    log(:error) { msg }
+    raise msg
+  end
+  def require_page_id_field!(row, file:)
+    return true if row.key?(page_id_field(space: :maps))
+    msg = "Page ID field '#{page_id_field(space: :maps)}' missing in header of file '#{file}'"
+    log(:error) { msg }
+    raise msg
+  end
+  def pivot_field(row = nil, space: :input)
+    @pivot_field ||= {}
+    return @pivot_field[space] if @pivot_field.key?(space)
+    @pivot_field[space] ||= pivot_fields(space: space).select do |name|
+      row.key?(name)
+    end.then do |sel|
+      next sel.first if sel.one?
+      msg = "Could not find any column named: #{pivot_fields.join(', ')}"
+      msg = "Multiple pivot columns: #{sel.join(', ')}" if sel.any?
+      log(:error) { msg }
+      raise msg
+    end.tap do |col|
+      log(:info) { "Using header '#{col}' as pivot column." }
+    end
+  end
+  def pivot_fields(space: :input)
+    @pivot_fields ||= {}
+    return @pivot_fields[space] if @pivot_fields.key?(space)
+    return (@pivot_fields[space] = [opts_pivot]) if opts_pivot && space == :input
+    unless self.class.const_defined?(:PIVOT_FIELD)
+      msg = "(#{self.class}) You must define PIVOT_FIELD constant"
+      log(:error) { msg }
+      raise msg
+    end
+    @pivot_fields[space] = self.class::PIVOT_FIELD.dup
+  end
+  def page_id_field(space: :output)
+    @page_id_field = {}
+    return @page_id_field[space] if @page_id_field.key?(space)
+    return (@page_id_field[space] = opts_page_id) if opts_page_id && space == :output
+    unless self.class.const_defined?(:PAGE_ID)
+      msg = "(#{self.class}) You must define PAGE_ID field constant"
+      log(:error) { msg }
+      raise msg
+    end
+    @page_id_field[space] = self.class::PAGE_ID
+  end
+  def opts_pivot
+    options.dig(:input, :pivot_field)
+  end
+  def opts_page_id
+    options.dig(:input, :page_id)
+  end
+end

data/lib/eco/api/usecases/default/utils/cli/add_page_id_cli.rb ADDED Viewed

@@ -0,0 +1,29 @@
+class Eco::API::UseCases::Default::Utils::AddPageId
+  class Cli < Eco::API::UseCases::Cli
+    desc 'Adds the page_id column based on mappings onto -pivot'
+    callback do |_session, options, _usecase|
+      if (file = SCR.get_file(cli_name, required: true, should_exist: true))
+        options.deep_merge!(input: {file: file})
+      end
+    end
+    add_option('-maps-file', 'Source file with he mappings') do |options|
+      if (file = SCR.get_file('-maps-file', required: true, should_exist: true))
+        options.deep_merge!(input: {maps: file})
+      end
+    end
+    add_option('-pivot', 'The column that should be used to pivot') do |options|
+      if (file = SCR.get_arg("-pivot", with_param: true))
+        options.deep_merge!(input: {pivot_field: file})
+      end
+    end
+    add_option('-page-id', 'The column that should be used to dump the id') do |options|
+      if (file = SCR.get_arg("-page-id", with_param: true))
+        options.deep_merge!(input: {page_id: file})
+      end
+    end
+  end
+end

data/lib/eco/api/usecases/default/utils/cli/group_csv_cli.rb CHANGED Viewed

@@ -22,5 +22,10 @@ class Eco::API::UseCases::Default::Utils::GroupCsv
         options.deep_merge!(input: {group_by_field: file})
       end
     end
+    add_option('-format', 'Kind of extract (csv - default | jsonl') do |options|
+      format = SCR.get_arg('-format', with_param: true)
+      options.deep_merge!(output: {format: format})
+    end
   end
 end

data/lib/eco/api/usecases/default/utils/cli/merge_csv_cli.rb ADDED Viewed

@@ -0,0 +1,27 @@
+class Eco::API::UseCases::Default::Utils::MergeCsv
+  class Cli < Eco::API::UseCases::Cli
+    str_desc  = 'Merges the csv rows by a pivot field. '
+    str_desc << 'It assumes the pivot field is sorted '
+    str_desc << '(same values should be consecutive)'
+    desc str_desc
+    callback do |_session, options, _usecase|
+      if (file = SCR.get_file(cli_name, required: true, should_exist: true))
+        options.deep_merge!(input: {file: {name: file}})
+      end
+    end
+    add_option('-merge', 'The CSV file that should be merged onto the original') do |options|
+      if (file = SCR.get_file('-merge', required: true, should_exist: true))
+        options.deep_merge!(input: {merge_file: {name: file}})
+      end
+    end
+    add_option('-by', 'The column that should be used to merge') do |options|
+      if (file = SCR.get_arg('-by', with_param: true))
+        options.deep_merge!(input: {merge_by_field: file})
+      end
+    end
+  end
+end

data/lib/eco/api/usecases/default/utils/cli/track_files_cli.rb ADDED Viewed

@@ -0,0 +1,16 @@
+class Eco::API::UseCases::Default::Utils::TrackFiles
+  class Cli < Eco::API::UseCases::Cli
+    desc 'Tracks the files of a folder in a CSV'
+    callback do |_session, options, _usecase|
+      if (folder = SCR.get_file(cli_name, required: true))
+        options.deep_merge!(input: {folder: folder})
+      end
+    end
+    add_option("-s3-path", "Relative subpath from the S3 uploads folder.") do |options|
+      path = SCR.get_arg("-s3-path", with_param: true)
+      options.deep_merge!(output: {s3_path: path})
+    end
+  end
+end

data/lib/eco/api/usecases/default/utils/group_csv_case/file_handler.rb ADDED Viewed

@@ -0,0 +1,62 @@
+class Eco::API::UseCases::Default::Utils::GroupCsv
+  class FileHandler
+    attr_reader :filename, :format
+    def initialize(filename, format: :csv)
+      @filename = filename
+      @format   = format
+      open
+    end
+    def <<(value)
+      msg = "File has been closed. Can't write to it: #{filename}"
+      raise msg unless file
+      case format
+      when :csv
+        file << value
+      when :jsonl
+        file.puts to_s(value)
+      end
+    end
+    def close
+      return if file.nil?
+      file.close.tap do
+        @file = nil
+      end
+    end
+    private
+    attr_reader :file
+    def to_s(value)
+      case value
+      when String
+        value.split("\n").first.tap do |line|
+          next if line == value
+          raise ArgumentError, "As string, value should be a single line. Given: #{value}"
+        end
+      when Hash
+        value.to_json
+      else
+        raise ArgumentError, "Unsupported type: #{value.class}"
+      end
+    end
+    def open
+      case format
+      when :csv
+        @file = CSV.open(filename, 'wb')
+      when :jsonl
+        @file = File.open(filename, 'wb')
+      else
+        raise "Unknown output format: #{format}"
+      end
+    end
+  end
+end

data/lib/eco/api/usecases/default/utils/group_csv_case.rb CHANGED Viewed

@@ -1,36 +1,59 @@
 # This script assumes that for the `GROUP_BY_FIELD` rows are consecutive.
 # @note you might run first the `sort-csv` case.
+# @note when using `jsonl` as an output `format`, it doesn't merge fields,
+#   but it groups them based on some criteria.
+#   - In this case you need to define a `json_builder` method that returns a hash.
 # @note you must inherit from this case and define the constants.
 #
-#      GROUP_BY_FIELD = 'target_csv_field'.freeze
+#      GROUP_BY_FIELD = 'target_csv_field'.freeze # if `-by` command option isn't used
 #      GROUPED_FIELDS = [
 #        'joined_field_1',
 #        'joined_field_2',
 #        'joined_field_3',
 #      ].freeze
-#
+# @note that `GROUPED_FIELDS` isn't necessary if `jsonl` is used as an output `format`
 class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
   name 'group-csv'
   type :other
   require_relative 'cli/group_csv_cli'
+  require_relative 'group_csv_case/file_handler'
+  OUTPUT_FORMAT = :csv # :csv or :jsonl
   def main(*_args)
     if simulate?
       count = Eco::CSV.count(input_file)
       log(:info) { "CSV '#{input_file}' has #{count} rows." }
     else
+      msg = "You should define a json_builder method when using jsonl as output format"
+      raise msg unless respond_to?(:json_builder, true) || output_format != :jsonl
       generate_file
     end
   end
   private
+  attr_reader :in_index
+  def with_output_file
+    handler = FileHandler.new(output_filename, format: output_format)
+    yield handler
+  ensure
+    handler&.close
+    msg  = "Generated file '#{output_filename}' "
+    msg << "with #{row_count} rows (out of #{in_index + 1})."
+    log(:info) { msg } unless simulate?
+  end
   def generate_file # rubocop:disable Metrics/AbcSize
-    row_count = 0
-    in_index = nil
+    @in_index = nil
-    CSV.open(output_filename, 'wb') do |out_csv|
+    with_output_file do |f_handler|
       first = true
       puts "\n"
@@ -39,36 +62,37 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
         if first
           first = false
           headers!(row)
-          out_csv << headers
+          f_handler << headers if output_format == :csv
           require_group_by_field!(row, file: input_file)
         end
-        in_index = idx
+        @in_index = idx
         next unless !block_given? || yield(row, idx)
         next unless pivotable?(row, idx)
         next unless (last_group = pivot_row(row))
-        row_count += 1
+        row_count!
-        if (row_count % 500).zero?
-          print "... Done #{row_count} rows          \r"
-          $stdout.flush
+        case output_format
+        when :csv
+          f_handler << last_group.values_at(*headers)
+        when :jsonl
+          f_handler << json_builder(last_group)
         end
-        out_csv << last_group.values_at(*headers)
       end
       # finalize
-      if (lrow = pivot_row)
-        row_count += 1
-        out_csv   << lrow.values_at(*headers)
+      if (l_row = pivot_row)
+        row_count!
+        case output_format
+        when :csv
+          f_handler << l_row.values_at(*headers)
+        when :jsonl
+          f_handler << json_builder(l_row)
+        end
       end
-    ensure
-      msg  = "Generated file '#{output_filename}' "
-      msg << "with #{row_count} rows (out of #{in_index})."
-      log(:info) { msg } unless simulate?
     end
   end
@@ -82,41 +106,59 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
     pivot_value = row[group_by_field]
     unless (last_pivot = @group[group_by_field])
+      # init
       last_pivot = @group[group_by_field] = pivot_value
     end
     last   = @group
     @group = {group_by_field => pivot_value} unless pivot_value == last_pivot
-    headers_rest.each do |field|
-      curr_values   = row[field].to_s.split('|').compact.uniq
-      pivot_values  = @group[field].to_s.split('|').compact.uniq
-      @group[field] = (pivot_values | curr_values).join('|')
+    case output_format
+    when :csv
+      headers_rest.each do |field|
+        curr_values   = row[field].to_s.split('|').compact.uniq
+        group_values  = @group[field].to_s.split('|').compact.uniq
+        @group[field] = (group_values | curr_values).join('|')
+      end
+    when :jsonl
+      @group['rows'] ||= []
+      @group['rows'] << row.to_h.slice(*headers_rest)
     end
     last unless last == @group
   end
-  attr_reader :group
+  attr_reader :group, :row_count
   attr_reader :headers, :headers_rest
   def headers!(row)
     return if headers?
-    @headers_rest  = grouped_fields & row.headers
-    @headers_rest -= [group_by_field]
-    @headers       = [group_by_field, *headers_rest]
+    @grouped_fields = row.headers - [group_by_field] if output_format == :jsonl
+    @headers_rest   = grouped_fields & row.headers
+    @headers_rest  -= [group_by_field]
+    @headers        = [group_by_field, *headers_rest]
   end
   def headers?
     instance_variable_defined?(:@headers)
   end
+  def row_count!
+    @row_count ||= 0
+    (@row_count += 1).tap do |cnt|
+      if (cnt % 500).zero?
+        print "... Done #{cnt} rows            \r"
+        $stdout.flush
+      end
+    end
+  end
   def pivotable?(row, idx)
     return true unless row[group_by_field].to_s.strip.empty?
     msg  = "Row #{idx} doesn't have value for pivot field '#{group_by_field}'"
-    msg << '. Skipping (discared) ...'
+    msg << '. Skipping (discarded) ...'
     log(:warn) { msg }
     false
   end
@@ -130,17 +172,21 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
   end
   def start_at
-    return nil unless (num = options.dig(:input, :file, :start_at))
+    return unless (num = options.dig(:input, :file, :start_at))
     num = num.to_i
     num = nil if num.zero?
     num
   end
+  def output_format
+    options.dig(:output, :format)&.to_sym || self.class::OUTPUT_FORMAT
+  end
   def output_filename
-    return nil unless input_name
+    return unless input_name
-    File.join(input_dir, "#{input_name}_grouped#{input_ext}")
+    File.join(input_dir, "#{input_name}_grouped.#{output_format}")
   end
   def input_name