RubyGems - eco-helpers - Versions diffs - 3.2.12 → 3.2.13 - Mend

eco-helpers 3.2.12 → 3.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +18 -1
data/lib/eco/api/usecases/default/utils/cli/merge_csv_cli.rb +27 -0
data/lib/eco/api/usecases/default/utils/group_csv_case.rb +19 -15
data/lib/eco/api/usecases/default/utils/merge_csv_case.rb +313 -0
data/lib/eco/api/usecases/default/utils/split_csv_case.rb +6 -1
data/lib/eco/api/usecases/default/utils.rb +1 -0
data/lib/eco/api/usecases/graphql/helpers/location/command/result.rb +2 -2
data/lib/eco/api/usecases/graphql/helpers/location/command/results.rb +2 -1
data/lib/eco/api/usecases/graphql/helpers/location/tags_remap/tags_map.rb +5 -1
data/lib/eco/api/usecases/graphql/helpers/location/tags_remap/tags_set.rb +6 -3
data/lib/eco/api/usecases/graphql/helpers/location/tags_remap.rb +3 -2
data/lib/eco/api/usecases/graphql/samples/location/command/dsl.rb +16 -6
data/lib/eco/api/usecases/graphql/samples/location/command/service/tree_update.rb +2 -1
data/lib/eco/csv/split.rb +47 -19
data/lib/eco/csv/stream.rb +51 -1
data/lib/eco/csv.rb +6 -3
data/lib/eco/version.rb +1 -1
metadata +5 -7

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 91691c5a914be5eebeff46f776d9c5b720e8e9672317051f36b5995beff3977c
-  data.tar.gz: 1742764775b28b99f136e451cc79c68359924f7722752ccdae043cf7c1b7f6a9
+  metadata.gz: 164a0d7e0bea8396208dae904e45bac439f288773a138e01869fe19ec9eafe21
+  data.tar.gz: b56add1010ab7feee79f58c8bac0835bc39f9bfceb8a56fcf1cb6aff176bdd1d
 SHA512:
-  metadata.gz: 3c17e149406ae8ae64c94b623e10a6151d9f0fd4755e736281a89c098c3c6ff180d7f85560d62e63741f3166d283882352e50cf48841c22e4f2106a17e76f3f0
-  data.tar.gz: 25199fc81a46ff8897be9af805bb675a4ee3ca11b61439a91450639703583c1907e97513fd26e14375a30956166dcb2cb35161a56b920f221ff6d2efc69bcee4
+  metadata.gz: 9a584c9593325bbd6fa7ec08794de3d839bd675c719613899c25e7d3d53bbfb58b03bc33f703b0b38a906a0ce08c30ec31c11d180653e9181f25b510db3bc3dd
+  data.tar.gz: ddfd53f1a2e72cbbf4136ee4ebb25481e098512ac258ff21bfea188417a6824e22aab407f16cb3a3d966b21d320b6aa56659e71f0782accda71f921d5db24601

data/CHANGELOG.md CHANGED Viewed

@@ -2,7 +2,7 @@
 All notable changes to this project will be documented in this file.
-## [3.2.13] - 2026-01-xx
+## [3.2.14] - 2026-04-xx
 ### Added
@@ -10,6 +10,23 @@ All notable changes to this project will be documented in this file.
 ### Fixed
+## [3.2.13] - 2026-04-15
+### Added
+- `-split-csv` case
+  - Allow custom split criteria via `splitter` named argument.
+- `-merge-csv` case
+### Changed
+- improved `Stream` with methods `eof?` and `shift`
+### Fixed
+- Locations remap on RS update
+- `-group-csv`: correct rows count
 ## [3.2.12] - 2026-01-19
 ### Added

data/lib/eco/api/usecases/default/utils/cli/merge_csv_cli.rb ADDED Viewed

@@ -0,0 +1,27 @@
+class Eco::API::UseCases::Default::Utils::MergeCsv
+  class Cli < Eco::API::UseCases::Cli
+    str_desc  = 'Merges the csv rows by a pivot field. '
+    str_desc << 'It assumes the pivot field is sorted '
+    str_desc << '(same values should be consecutive)'
+    desc str_desc
+    callback do |_session, options, _usecase|
+      if (file = SCR.get_file(cli_name, required: true, should_exist: true))
+        options.deep_merge!(input: {file: {name: file}})
+      end
+    end
+    add_option('-merge', 'The CSV file that should be merged onto the original') do |options|
+      if (file = SCR.get_file('-merge', required: true, should_exist: true))
+        options.deep_merge!(input: {merge_file: {name: file}})
+      end
+    end
+    add_option('-by', 'The column that should be used to merge') do |options|
+      if (file = SCR.get_arg('-by', with_param: true))
+        options.deep_merge!(input: {merge_by_field: file})
+      end
+    end
+  end
+end

data/lib/eco/api/usecases/default/utils/group_csv_case.rb CHANGED Viewed

@@ -27,7 +27,6 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
   private
   def generate_file # rubocop:disable Metrics/AbcSize
-    row_count = 0
     in_index = nil
     CSV.open(output_filename, 'wb') do |out_csv|
@@ -49,24 +48,19 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
         next unless pivotable?(row, idx)
         next unless (last_group = pivot_row(row))
-        row_count += 1
-        if (row_count % 500).zero?
-          print "... Done #{row_count} rows          \r"
-          $stdout.flush
-        end
+        row_count!
         out_csv << last_group.values_at(*headers)
       end
       # finalize
-      if (lrow = pivot_row)
-        row_count += 1
-        out_csv   << lrow.values_at(*headers)
+      if (l_row = pivot_row)
+        row_count!
+        out_csv   << l_row.values_at(*headers)
       end
     ensure
       msg  = "Generated file '#{output_filename}' "
-      msg << "with #{row_count} rows (out of #{in_index})."
+      msg << "with #{row_count} rows (out of #{in_index + 1})."
       log(:info) { msg } unless simulate?
     end
@@ -97,7 +91,7 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
     last unless last == @group
   end
-  attr_reader :group
+  attr_reader :group, :row_count
   attr_reader :headers, :headers_rest
   def headers!(row)
@@ -112,11 +106,21 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
     instance_variable_defined?(:@headers)
   end
+  def row_count!
+    @row_count ||= 0
+    (@row_count  += 1).tap do |cnt|
+      if (cnt % 500).zero?
+        print "... Done #{cnt} rows            \r"
+        $stdout.flush
+      end
+    end
+  end
   def pivotable?(row, idx)
     return true unless row[group_by_field].to_s.strip.empty?
     msg  = "Row #{idx} doesn't have value for pivot field '#{group_by_field}'"
-    msg << '. Skipping (discared) ...'
+    msg << '. Skipping (discarded) ...'
     log(:warn) { msg }
     false
   end
@@ -130,7 +134,7 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
   end
   def start_at
-    return nil unless (num = options.dig(:input, :file, :start_at))
+    return unless (num = options.dig(:input, :file, :start_at))
     num = num.to_i
     num = nil if num.zero?
@@ -138,7 +142,7 @@ class Eco::API::UseCases::Default::Utils::GroupCsv < Eco::API::Custom::UseCase
   end
   def output_filename
-    return nil unless input_name
+    return unless input_name
     File.join(input_dir, "#{input_name}_grouped#{input_ext}")
   end

data/lib/eco/api/usecases/default/utils/merge_csv_case.rb ADDED Viewed

@@ -0,0 +1,313 @@
+# This script assumes that for the `MERGE_BY_FIELD` rows are consecutive.
+# @note you might run first the `sort-csv` case.
+# @note at the moment, it does NOT add new fields from the merge file.
+#   It only uses the headers of the original file.
+# @note you must inherit from this case and define the constants.
+#
+#      MERGE_BY_FIELD = 'target_csv_field'.freeze
+#      # those not merged are overridden
+#      JOINED_FIELDS  = [
+#        'joined_field_1',
+#        'joined_field_2',
+#        'joined_field_3',
+#      ].freeze
+#
+class Eco::API::UseCases::Default::Utils::MergeCsv < Eco::API::Custom::UseCase
+  name 'merge-csv'
+  type :other
+  require_relative 'cli/merge_csv_cli'
+  def main(*_args)
+    if simulate?
+      count = Eco::CSV.count(input_file)
+      log(:info) { "CSV '#{input_file}' has #{count} rows." }
+    else
+      generate_file
+    end
+  end
+  private
+  def generate_file # rubocop:disable Metrics/AbcSize
+    in_index = nil
+    CSV.open(output_filename, 'wb') do |out_csv|
+      pending = false
+      first   = true
+      m_first = true
+      row     = nil
+      idx     = nil
+      puts "\n"
+      streamed_merging.for_each do |m_row, m_idx|
+        if m_first
+          m_first = false
+          require_merge_by_field!(m_row, file: merge_file)
+        end
+        next unless pivotable?(m_row, m_idx, file: merge_file)
+        merging_row(m_row)
+        merge_done = false
+        loop do
+          unless pending
+            row = nil
+            streamed_input.shift do |o_row, i|
+              idx = i
+              row = o_row
+              if first
+                first = false
+                headers!(row)
+                out_csv << headers
+                require_merge_by_field!(row, file: input_file)
+              end
+            end
+          end
+          break unless row
+          in_index = idx
+          next unless pivotable?(row, idx, file: input_file)
+          row_count!
+          added = original_row(row) do |merged_row, merged:|
+            out_csv << merged_row.values_at(*headers)
+            merge_done = true if merged
+          end
+          pending = !added
+          break if merge_done
+          break unless added
+          break if streamed_input.eof?
+        end
+        row = nil unless pending
+        if pending || streamed_input.eof?
+          msg  = "Could not merge row #{m_idx} (#{merging_row[merge_by_field]}) "
+          msg << "because the pivot value does not exist in the original file"
+          msg << ". Skipping (discarded) ..."
+          log(:warn) { msg }
+        end
+      end
+      # finalize
+      loop do
+        row = nil
+        streamed_input.shift do |o_row, i|
+          idx = i
+          row = o_row
+        end
+        break unless row
+        in_index = idx
+        next unless pivotable?(row, idx, file: input_file)
+        row_count!
+        out_csv << row.values_at(*headers)
+        break if streamed_input.eof?
+      end
+    ensure
+      msg  = "Generated file '#{output_filename}' "
+      msg << "with #{row_count} rows (out of #{in_index + 1})."
+      log(:info) { msg } unless simulate?
+    end
+  end
+  # It tracks the current merging row
+  # @return [Nil, Hash] the last merge row when `row` doesn't belong
+  #   or `nil` otherwise
+  def merging_row(row = nil)
+    return @merging_row unless row
+    @merging_row = row.to_h
+  end
+  # It tracks the current grouped row
+  # @return [Nil, Hash] the last grouped row when `row` doesn't belong
+  #   or `nil` otherwise
+  def original_row(row)
+    pivot_value = row[merge_by_field]
+    merge_pivot = merging_row[merge_by_field]
+    if pivot_value > merge_pivot
+      # as both files are sorted, we can't add the original row now
+      # and we need to just return false
+      return false
+    elsif pivot_value < merge_pivot
+      yield(row.to_h, merged: false) if block_given?
+      return true
+    end
+    merged_row = {}
+    merged_row = {merge_by_field => pivot_value}
+    joined_fields.each do |field|
+      original_values = row[field].to_s.split('|').compact.uniq
+      merge_values    = merging_row[field].to_s.split('|').compact.uniq
+      merged_row[field] = (original_values | merge_values).join('|')
+      merged_row[field] = nil if merged_row[field].to_s.strip.empty?
+    end
+    headers_rest.each do |field|
+      merged_row[field] = row[field]
+      merged_row[field] = merging_row[field] if merging_row.key?(field)
+      merged_row[field] = nil if merged_row[field].to_s.strip.empty?
+    end
+    missed_headers = (merging_row.keys - headers)
+    if missed_headers.any? && !warned_missed_headers?
+      msg = "Missing headers in merged file: #{missed_headers.join(', ')}"
+      log(:warn) { msg }
+      @warned_missed_headers = true
+    end
+    merged_row = merged_row.slice(*headers)
+    yield(merged_row, merged: true) if block_given?
+    true
+  end
+  attr_reader :merge, :row_count
+  attr_reader :headers, :headers_rest
+  # Whether if we already warned about merging headers that
+  # are not in the original
+  def warned_missed_headers?
+    @warned_missed_headers ||= false
+  end
+  def headers!(row)
+    return if headers?
+    @headers       = row.to_h.keys
+    @joined_fields = @headers & joined_fields
+    @headers_rest  = @headers - @joined_fields - [merge_by_field]
+    @headers       = [merge_by_field, *@joined_fields, *@headers_rest]
+  end
+  def headers?
+    instance_variable_defined?(:@headers)
+  end
+  def row_count!
+    @row_count ||= 0
+    (@row_count  += 1).tap do |cnt|
+      if (cnt % 500).zero?
+        print "... Done #{cnt} rows            \r"
+        $stdout.flush
+      end
+    end
+  end
+  def pivotable?(row, idx, file:)
+    return false if row.nil?
+    return true  unless row[merge_by_field].to_s.strip.empty?
+    msg  = "Row #{idx} doesn't have value for pivot field '#{merge_by_field}'"
+    msg << " (file: '#{file}'). Skipping (discarded) ..."
+    log(:warn) { msg }
+    false
+  end
+  def streamed_input
+    @streamed_input ||= Eco::CSV::Stream.new(input_file)
+  end
+  def streamed_merging
+    @streamed_merging ||= Eco::CSV::Stream.new(merge_file)
+  end
+  def input_file
+    options.dig(:input, :file, :name)
+  end
+  def merge_file
+    options.dig(:input, :merge_file, :name)
+  end
+  def output_filename
+    return unless input_name
+    File.join(input_dir, "#{input_name}_merged#{input_ext}")
+  end
+  def input_name
+    @input_name ||= File.basename(input_basename, input_ext)
+  end
+  def input_ext
+    @input_ext ||= input_basename.split('.')[1..].join('.').then do |name|
+      ".#{name}"
+    end
+  end
+  def input_basename
+    @input_basename ||= File.basename(input_full_filename)
+  end
+  def input_dir
+    @input_dir = File.dirname(input_full_filename)
+  end
+  def input_full_filename
+    @input_full_filename ||= File.expand_path(input_file)
+  end
+  def require_merge_by_field!(row, file:)
+    return true if row.key?(merge_by_field)
+    msg = "Pivot field '#{merge_by_field}' missing in header of file '#{file}'"
+    log(:error) { msg }
+    raise msg
+  end
+  def merge_by_field
+    return @merge_by_field if instance_variable_defined?(:@merge_by_field)
+    return (@merge_by_field = opts_merge_by) if opts_merge_by
+    unless self.class.const_defined?(:MERGE_BY_FIELD)
+      msg = "(#{self.class}) You must define MERGE_BY_FIELD constant"
+      log(:error) { msg }
+      raise msg
+    end
+    @merge_by_field = self.class::MERGE_BY_FIELD
+  end
+  def joined_fields
+    return @joined_fields if instance_variable_defined?(:@joined_fields)
+    unless self.class.const_defined?(:JOINED_FIELDS)
+      msg = "(#{self.class}) You must define JOINED_FIELDS constant"
+      log(:error) { msg }
+      raise msg
+    end
+    @joined_fields ||= [self.class::JOINED_FIELDS].flatten.compact.tap do |flds|
+      next unless flds.empty?
+      log(:warn) {
+        msg  = 'There were no fields to be joined (JOINED_FIELDS). '
+        msg << 'This means all fields present in the merging file '
+        msg << ' will be overridden in the original file.'
+        msg
+      }
+    end
+  end
+  def opts_merge_by
+    options.dig(:input, :merge_by_field)
+  end
+end

data/lib/eco/api/usecases/default/utils/split_csv_case.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 class Eco::API::UseCases::Default::Utils::SplitCsv < Eco::API::Common::Loaders::UseCase
   require_relative 'cli/split_csv_cli'
-  MAX_ROWS = 15_000
+  MAX_ROWS = :unused
   name 'split-csv'
   type :other
@@ -15,6 +15,7 @@ class Eco::API::UseCases::Default::Utils::SplitCsv < Eco::API::Common::Loaders::
         input_file,
         max_rows: max_rows,
         start_at: start_at,
+        **params,
         &filter
       ).tap do |split|
         msg  = []
@@ -31,6 +32,10 @@ class Eco::API::UseCases::Default::Utils::SplitCsv < Eco::API::Common::Loaders::
   private
+  def params
+    {}
+  end
   def filter
     nil
   end

data/lib/eco/api/usecases/default/utils.rb CHANGED Viewed

@@ -14,4 +14,5 @@ require_relative 'utils/split_json_case'
 require_relative 'utils/json_to_csv_case'
 require_relative 'utils/sort_csv_case'
 require_relative 'utils/group_csv_case'
+require_relative 'utils/merge_csv_case'
 require_relative 'utils/entries_to_csv_case'

data/lib/eco/api/usecases/graphql/helpers/location/command/result.rb CHANGED Viewed

@@ -40,7 +40,7 @@ module Eco::API::UseCases::GraphQL::Helpers::Location::Command
       return nil unless error?
       msg  = []
-      msg << "(#{command} '#{node_id}') #{error.message}"
+      msg << "(#{command_type} '#{node_id}') #{error.message}"
       feed = []
       feed.concat(error.validationErrors.map(&:message)) unless error.validationErrors.empty?
@@ -55,7 +55,7 @@ module Eco::API::UseCases::GraphQL::Helpers::Location::Command
     end
     def command_input_data
-      input[command]
+      input[command_type]
     end
     def command_id

data/lib/eco/api/usecases/graphql/helpers/location/command/results.rb CHANGED Viewed

@@ -53,7 +53,8 @@ module Eco::API::UseCases::GraphQL::Helpers::Location::Command
         next applied unless with_id_change
         applied.select do |result|
-          next false unless (command = result.command_result_data)
+          # next false unless (command = result.command_result_data)
+          next false unless (command = result.command_input_data)
           command.keys.include?(:newId)
         end

data/lib/eco/api/usecases/graphql/helpers/location/tags_remap/tags_map.rb CHANGED Viewed

@@ -22,9 +22,10 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
         # both are being moved (specific/long mappings first)
         return  1 if from.subset_of?(other.from)
         return -1 if from.superset_of?(other.from)
-        return -1 if (from & other.from).empty?
+        return -1 unless from.intersect?(other.from)
         return -1 if from.length >= other.from.length
         return  1 if from.length <  other.from.length
         -1
       end
@@ -49,16 +50,19 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
       def maps?
         return false if any?(&:empty?)
         return false if from == to
         true
       end
       def rename?
         return false unless maps?
         both? {|set| set.length == 1}
       end
       def move?
         return false unless maps?
         !rename?
       end
     end

data/lib/eco/api/usecases/graphql/helpers/location/tags_remap/tags_set.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
       class << self
         def attr_compare(*attrs)
           attrs.each do |attr|
-            meth = "#{attr}".to_sym # rubocop:disable Style/RedundantInterpolation
+            meth = :"#{attr}"
             define_method meth do |value|
               set.send(meth, to_set(value))
             end
@@ -13,7 +13,7 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
         def attr_operate(*attrs)
           attrs.each do |attr|
-            meth = "#{attr}".to_sym # rubocop:disable Style/RedundantInterpolation
+            meth = :"#{attr}"
             define_method meth do |value|
               self.class.new(set.send(meth, to_set(value)))
             end
@@ -57,6 +57,7 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
       def include?(value)
         value = value.to_s.strip
         return false if value.empty?
         set.include?(value)
       end
@@ -82,7 +83,9 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
         return value.ini_tags.dup if value.is_a?(self.class)
         return value.dup          if value.is_a?(Array)
         return value.to_a         if value.is_a?(Set)
-        raise ArgumentError, "Expecting #{self.class}, Set or Array. Given: #{value.class}"
+        msg = "Expecting #{self.class}, Set or Array. Given: #{value.class}"
+        raise ArgumentError, msg
       end
       def to_set(value)

data/lib/eco/api/usecases/graphql/helpers/location/tags_remap.rb CHANGED Viewed

@@ -22,7 +22,7 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
     end
     def to_csv(filename)
-      CSV.open(filename, "w") do |fd|
+      CSV.open(filename, 'w') do |fd|
         fd << %w[src_tags dst_tags]
         each do |tags_map|
@@ -67,7 +67,8 @@ module Eco::API::UseCases::GraphQL::Helpers::Location
     end
     def <<(pair)
-      raise ArgumentError, "Expecting pair of Array in Array. Given: #{pair}" unless self.class.correct_pair?(pair)
+      msg = "Expecting pair of Array in Array. Given: #{pair}"
+      raise ArgumentError, msg unless self.class.correct_pair?(pair)
       add(*pair)
     end

data/lib/eco/api/usecases/graphql/samples/location/command/dsl.rb CHANGED Viewed

@@ -76,6 +76,8 @@ class Eco::API::UseCases::GraphQL::Samples::Location
         ) do |input, stage|
           next unless input
+          self.id_name_input = input if simulate? && stage == :id_name
           some_update = true
           sliced_batches(
@@ -98,8 +100,8 @@ class Eco::API::UseCases::GraphQL::Samples::Location
         rearchive
       end
-      rescued { delete_or_publish_draft }
-      rescued { manage_remaps_table     }
+      rescued { delete_or_publish_draft            }
+      rescued { manage_remaps_table if some_update }
     end
   end
@@ -131,6 +133,8 @@ class Eco::API::UseCases::GraphQL::Samples::Location
   private
+  attr_accessor :id_name_input
   # Work with adapted diff builders.
   def nodes_diff_class
     Eco::API::UseCases::GraphQL::Helpers::Location::Command::Diffs
@@ -231,11 +235,17 @@ class Eco::API::UseCases::GraphQL::Samples::Location
   end
   def manage_remaps_table
-    return unless results.final_response?
     rescued do
-      results.applied_commands(with_id_change: true) do |result|
-        update_tags_remap_table(result.command)
+      if simulate? && id_name_input
+        id_name_input[:commands].each do |command|
+          update_tags_remap_table(command[:update])
+        end
+      elsif results.final_response?
+        results.applied_commands(with_id_change: true).each do |result|
+          update_tags_remap_table(result.command_input_data)
+        end
+      else
+        return
       end
     end

data/lib/eco/api/usecases/graphql/samples/location/command/service/tree_update.rb CHANGED Viewed

@@ -36,8 +36,9 @@ class Eco::API::UseCases::GraphQL::Samples::Location
         # @note the SFTP push only happens if `remote_subfolder` is defined, via:
         #   1. `options.dig(:sftp, :remote_subfolder)`
         #   2. `REMOTE_FOLDER` const
-        def close_handling_tags_remap_csv
+        def close_handling_tags_remap_csv # rubocop:disable Naming/PredicateMethod
           return false unless super
+          return true  if simulate?
           upload(tags_remap_csv_file) unless remote_subfolder.nil?
           true

data/lib/eco/csv/split.rb CHANGED Viewed

@@ -3,14 +3,17 @@ module Eco
     class Split
       include Eco::Language::AuxiliarLogger
+      MAX_ROWS_DEFAULT = 1_000_000
       attr_reader :filename
-      def initialize(filename, max_rows:, start_at: nil, **kargs)
+      def initialize(filename, max_rows: :unused, start_at: nil, **kargs)
         msg = "File '#{filename}' does not exist"
         raise ArgumentError, msg unless ::File.exist?(filename)
         @filename = filename
         @max_rows = max_rows
+        @max_rows = MAX_ROWS_DEFAULT if max_rows == :unused
         @start_at = start_at
         @params   = kargs
@@ -34,16 +37,17 @@ module Eco
         @out_files ||= []
       end
-      # @yield [idx, file] a block to spot the filename
+      # @yield [row, ridx, fidx, file] block to spot if the row should be included
       # @yieldparam idx [Integer] the number of the file
       # @yieldparam file [String] the default name of the file
-      # @yieldreturn [String] the filename of the file `idx`.
-      #   - If `nil` it will create its own filename convention
+      # @yieldparam fidx [Integer] the number of the file
+      # @yieldparam file [String] the default name of the file
+      # @yieldreturn [Bollean] whether the row should be included
       # @return [Array<String>] names of the generated files
-      def call(&block)
+      def call(&filter)
         stream.for_each(start_at_idx: start_at) do |row, ridx|
           self.total_count += 1
-          copy_row(row, ridx, &block)
+          copy_row(row, ridx, &filter)
         end
         out_files
@@ -56,33 +60,42 @@ module Eco
       attr_reader :params
       attr_reader :idx, :max_rows, :start_at
-      attr_reader :headers, :row_idx
+      attr_reader :headers, :row_idx, :out_row_idx
+      attr_reader :last_cut_desc
       attr_accessor :exception
-      def copy_row(row, ridx, &block)
+      def copy_row(row, ridx)
         @headers ||= row.headers
         @row_idx   = ridx
-        current_csv(ridx) do |csv, fidx, file_out|
+        current_csv(row) do |csv, fidx, file_out|
           included   = true
-          included &&= !block || yield(row, ridx, fidx, file_out)
+          included &&= yield(row, ridx, fidx, file_out) if block_given?
           next unless included
+          @out_row_idx    += 1
           self.copy_count += 1
           csv             << row.fields
         end
       end
-      def current_csv(ridx)
-        if split?(ridx) || @csv.nil?
-          puts "Split at row #{row_idx}"
+      def current_csv(row)
+        if (cut = split?(row, &splitter)) || @csv.nil?
+          cut = nil if cut.is_a?(TrueClass) || cut.to_s.empty? || !cut
+          msg = "Split at row #{row_idx}"
+          msg << " (cut: #{cut})" unless cut.nil?
+          puts msg
+          @last_cut_desc = cut unless cut.nil?
           @csv&.close
-          out_filename = generate_name(next_idx)
+          out_filename = generate_name(next_idx, desc: last_cut_desc)
           @csv         = ::CSV.open(out_filename, "w")
           @csv        << headers
           out_files   << out_filename
+          @out_row_idx = 0
         end
         yield(@csv, idx, out_files.last) if block_given?
@@ -90,8 +103,19 @@ module Eco
         @csv
       end
-      def split?(ridx)
-        ((ridx + 1) % max_rows).zero?
+      # @note client scripts can tweak this method.
+      def split?(row)
+        return yield(row, row_idx) if block_given?
+        ((row_idx + 1) % max_rows).zero?
+      end
+      def splitter
+        @splitter ||= params[:splitter]
+      end
+      def splitter?
+        splitter.is_a?(Proc)
       end
       def next_idx
@@ -103,11 +127,15 @@ module Eco
       end
       def stream
-        @stream ||= Eco::CSV::Stream.new(filename, **params)
+        @stream ||= Eco::CSV::Stream.new(
+          filename,
+          **params
+        )
       end
-      def generate_name(fidx)
-        File.join(input_dir, "#{input_name}_#{file_number(fidx)}#{input_ext}")
+      def generate_name(fidx, desc: nil)
+        desc = "_#{desc}" unless desc.nil?
+        File.join(input_dir, "#{input_name}_#{file_number(fidx)}#{desc}#{input_ext}")
       end
       def file_number(num)

data/lib/eco/csv/stream.rb CHANGED Viewed

@@ -3,6 +3,16 @@ module Eco
     class Stream
       include Eco::Language::AuxiliarLogger
+      CSV_PARAMS = %i[
+        col_sep row_sep quote_char
+        headers skip_blanks skip_lines
+        nil_value empty_value
+        converters unconverted_fields
+        return_headers header_converters
+        liberal_parsing
+        field_size_limit
+      ].freeze
       attr_reader :filename
       def initialize(filename, **kargs)
@@ -16,9 +26,42 @@ module Eco
         init
       end
+      def eof?
+        started? && !row
+      end
+      def started?
+        @started ||= false
+      end
+      def shift
+        raise ArgumentError, 'Expecting block, but not given.' unless block_given?
+        @started = true
+        yield(row, next_idx) if (self.row = csv.shift)
+      rescue StandardError => err
+        self.exception = err
+        raise
+      ensure
+        unless row || !fd.is_a?(::File)
+          fd.close
+          @fd = nil
+        end
+        if exception
+          # Give some feedback if it crashes
+          msg  = []
+          msg << "Last row IDX: #{idx}"
+          msg << "Last row content: #{row.to_h.pretty_inspect}"
+          puts msg
+          log(:debug) { msg.join("\n") }
+        end
+      end
       def for_each(start_at_idx: 0)
         raise ArgumentError, 'Expecting block, but not given.' unless block_given?
+        @started = true
         move_to_idx(start_at_idx)
         yield(row, next_idx) while (self.row = csv.shift)
@@ -38,6 +81,7 @@ module Eco
       end
       def move_to_idx(start_at_idx)
+        @started       = true
         start_at_idx ||= 0
         next_idx while (idx < start_at_idx) && (self.row = csv.shift)
       end
@@ -58,12 +102,18 @@ module Eco
         return @csv if instance_variable_defined?(:@csv)
         @fd  = ::File.open(filename, 'r')
-        @csv = Eco::CSV.new(fd, **params)
+        @csv = Eco::CSV.new(fd, **params.slice(*csv_params))
       end
       def init
         @idx ||= 0 # rubocop:disable Naming/MemoizedInstanceVariableName
       end
+      def csv_params
+        return self.class::CSV_PARAMS if self.class.const_defined?(:CSV_PARAMS)
+        CSV_PARAMS
+      end
     end
   end
 end

data/lib/eco/csv.rb CHANGED Viewed

@@ -19,7 +19,7 @@ module Eco
       end
       # Splits the csv `filename` into `max_rows`
-      # @yield [row, ridx, fidx, file]
+      # @yield [row, ridx, fidx, file] block to spot if the row should be included
       # @yieldparam row [Integer] the row
       # @yieldparam ridx [Integer] the index of the row in the source file
       # @yieldparam fidx [Integer] the number of the file
@@ -29,15 +29,18 @@ module Eco
       # @param max_rows [Integer] number of rows per file
       # @param start_at [Integer] row that sets the starting point.
       #   Leave empty for the full set of rows.
+      # @param kargs [Hash] additional parameters
+      #   - `:splitter` [Proc] custom splitter (criteria)
+      #     - Receives the row idx and the row itself
       # @return [Eco::CSV::Split]
-      def split(filename, max_rows:, start_at: nil, **kargs, &block)
+      def split(filename, max_rows: :unused, start_at: nil, **kargs, &filter)
         Eco::CSV::Split.new(
           filename,
           max_rows: max_rows,
           start_at: start_at,
           **kargs
         ).tap do |splitter|
-          splitter.call(&block)
+          splitter.call(&filter)
         end
       end

data/lib/eco/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Eco
-  VERSION = '3.2.12'.freeze
+  VERSION = '3.2.13'.freeze
 end

metadata CHANGED Viewed

@@ -1,14 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: eco-helpers
 version: !ruby/object:Gem::Version
-  version: 3.2.12
+  version: 3.2.13
 platform: ruby
 authors:
 - Oscar Segura
-autorequire:
 bindir: bin
 cert_chain: []
-date: 2026-01-19 00:00:00.000000000 Z
+date: 1980-01-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: byebug
@@ -536,7 +535,6 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 6.7.0
-description:
 email:
 - oscar@ecoportal.co.nz
 executables: []
@@ -799,12 +797,14 @@ files:
 - lib/eco/api/usecases/default/utils/cli/entries_to_csv_cli.rb
 - lib/eco/api/usecases/default/utils/cli/group_csv_cli.rb
 - lib/eco/api/usecases/default/utils/cli/json_to_csv_cli.rb
+- lib/eco/api/usecases/default/utils/cli/merge_csv_cli.rb
 - lib/eco/api/usecases/default/utils/cli/sort_csv_cli.rb
 - lib/eco/api/usecases/default/utils/cli/split_csv_cli.rb
 - lib/eco/api/usecases/default/utils/cli/split_json_cli.rb
 - lib/eco/api/usecases/default/utils/entries_to_csv_case.rb
 - lib/eco/api/usecases/default/utils/group_csv_case.rb
 - lib/eco/api/usecases/default/utils/json_to_csv_case.rb
+- lib/eco/api/usecases/default/utils/merge_csv_case.rb
 - lib/eco/api/usecases/default/utils/sort_csv_case.rb
 - lib/eco/api/usecases/default/utils/split_csv_case.rb
 - lib/eco/api/usecases/default/utils/split_json_case.rb
@@ -1083,7 +1083,6 @@ licenses:
 - MIT
 metadata:
   rubygems_mfa_required: 'true'
-post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -1098,8 +1097,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.5.23
-signing_key:
+rubygems_version: 4.0.8
 specification_version: 4
 summary: eco-helpers to manage people api cases
 test_files: []