RubyGems - data_shifter - Versions diffs - 0.1.0 → 0.3.0 - Mend

data_shifter 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/.husky/pre-commit +0 -3
data/CHANGELOG.md +39 -0
data/README.md +158 -46
data/lib/data_shifter/configuration.rb +42 -0
data/lib/data_shifter/errors.rb +46 -0
data/lib/data_shifter/internal/colors.rb +71 -0
data/lib/data_shifter/internal/env.rb +8 -6
data/lib/data_shifter/internal/log_deduplicator.rb +149 -0
data/lib/data_shifter/internal/output.rb +118 -69
data/lib/data_shifter/internal/side_effect_guards.rb +120 -0
data/lib/data_shifter/shift.rb +212 -23
data/lib/data_shifter/version.rb +1 -1
data/lib/data_shifter.rb +21 -0
data/lib/generators/data_shift_generator.rb +90 -13
metadata +21 -3
data/lib/data_shifter/rubocop.rb +0 -4
data/lib/rubocop/cop/data_shifter/skip_transaction_guard_dry_run.rb +0 -55

data/lib/data_shifter/internal/log_deduplicator.rb ADDED Viewed

@@ -0,0 +1,149 @@
+# frozen_string_literal: true
+require "digest"
+require "logger"
+module DataShifter
+  module Internal
+    # A proxy logger that suppresses repeated log messages during a shift run.
+    # Uses a hash of the message as the key for memory efficiency.
+    # First occurrence is forwarded; subsequent occurrences are counted but not forwarded.
+    # At the end, prints a summary of suppressed messages via puts.
+    class LogDeduplicator
+      attr_reader :real_logger, :cap, :seen
+      def initialize(real_logger, cap:)
+        @real_logger = real_logger
+        @cap = cap
+        @seen = {}
+      end
+      def add(severity, message = nil, progname = nil, &block)
+        msg = block ? block.call : message
+        key = message_key(severity, progname, msg)
+        if @seen.key?(key)
+          @seen[key][:count] += 1
+          nil
+        else
+          enforce_cap
+          @seen[key] = { count: 1, message: truncate_message(msg || progname), severity: }
+          @real_logger.add(severity, message, progname, &block)
+        end
+      end
+      def debug(message = nil, progname = nil, &)
+        add(Logger::DEBUG, message, progname, &)
+      end
+      def info(message = nil, progname = nil, &)
+        add(Logger::INFO, message, progname, &)
+      end
+      def warn(message = nil, progname = nil, &)
+        add(Logger::WARN, message, progname, &)
+      end
+      def error(message = nil, progname = nil, &)
+        add(Logger::ERROR, message, progname, &)
+      end
+      def fatal(message = nil, progname = nil, &)
+        add(Logger::FATAL, message, progname, &)
+      end
+      def unknown(message = nil, progname = nil, &)
+        add(Logger::UNKNOWN, message, progname, &)
+      end
+      def <<(msg)
+        key = message_key(Logger::INFO, nil, msg)
+        if @seen.key?(key)
+          @seen[key][:count] += 1
+        else
+          enforce_cap
+          @seen[key] = { count: 1, message: truncate_message(msg), severity: Logger::INFO }
+          @real_logger << msg
+        end
+      end
+      def level
+        @real_logger.level
+      end
+      def level=(val)
+        @real_logger.level = val
+      end
+      def formatter
+        @real_logger.formatter
+      end
+      def formatter=(val)
+        @real_logger.formatter = val
+      end
+      def close
+        @real_logger.close
+      end
+      def suppressed_messages
+        @seen.select { |_k, v| v[:count] > 1 }
+      end
+      def print_summary
+        suppressed = suppressed_messages
+        return if suppressed.empty?
+        puts "\n[DataShifter] Suppressed repeated log messages:"
+        suppressed.each_value do |entry|
+          count = entry[:count] - 1
+          snippet = entry[:message].to_s[0, 100]
+          snippet = "#{snippet}..." if entry[:message].to_s.length > 100
+          puts "  #{count}x suppressed: #{snippet.inspect}"
+        end
+      end
+      def method_missing(method, ...)
+        @real_logger.send(method, ...)
+      end
+      def respond_to_missing?(method, include_private = false)
+        @real_logger.respond_to?(method, include_private) || super
+      end
+      class << self
+        def with_deduplicating_logger(real_logger, cap:)
+          proxy = new(real_logger, cap:)
+          yield proxy
+        ensure
+          proxy&.print_summary
+        end
+      end
+      private
+      def message_key(severity, progname, message)
+        normalized = "#{severity}:#{progname}:#{message}"
+        Digest::SHA256.hexdigest(normalized)
+      end
+      def truncate_message(msg)
+        str = msg.to_s
+        str.length > 200 ? "#{str[0, 200]}..." : str
+      end
+      def enforce_cap
+        return if @seen.size < @cap
+        singles = @seen.select { |_k, v| v[:count] == 1 }
+        singles.each_key { |k| @seen.delete(k) } if singles.any?
+        return unless @seen.size >= @cap
+        oldest_key = @seen.keys.first
+        @seen.delete(oldest_key)
+      end
+    end
+  end
+end

data/lib/data_shifter/internal/output.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require_relative "colors"
 module DataShifter
   module Internal
     # Output formatting utilities for data shift runs.
@@ -11,126 +13,165 @@ module DataShifter
         none: "none",
       }.freeze
+      SKIP_REASONS_DISPLAY_LIMIT = 10
+      DIVIDER = "=" * 60
+      SEPARATOR = "-" * 60
       module_function
+      # --- Public header methods ---
       def print_header(io:, shift_class:, total:, label:, dry_run:, transaction_mode:, status_interval:)
-        io.puts ""
-        io.puts "=" * 60
-        io.puts shift_class.name || "DataShifter::Shift (anonymous)"
-        io.puts "\"#{shift_class.description}\"" if shift_class.description.present?
-        io.puts "-" * 60
-        io.puts "Mode:        #{dry_run ? "DRY RUN (no changes will be persisted)" : "LIVE"}"
+        print_header_top(io:, shift_class:, dry_run:)
         io.puts "Records:     #{total} #{label}"
         io.puts "Transaction: #{TRANSACTION_MODE_LABELS[transaction_mode]}"
+        print_header_bottom(io:, status_interval:)
+      end
-        status_line = build_status_line(status_interval)
-        io.puts "Status:      #{status_line} for live progress (no abort)" if status_line
-        io.puts "=" * 60
-        io.puts ""
+      def print_task_header(io:, shift_class:, block_count:, dry_run:, transaction_mode:, status_interval:)
+        print_header_top(io:, shift_class:, dry_run:)
+        io.puts "Tasks:       #{block_count}" if block_count >= 2
+        io.puts "Transaction: #{task_transaction_label(transaction_mode)}"
+        print_header_bottom(io:, status_interval:)
       end
-      def print_summary(io:, stats:, errors:, start_time:, dry_run:, transaction_mode:, interrupted:, task_name:, last_successful_id:)
+      # --- Public summary/progress methods ---
+      def print_summary(io:, stats:, errors:, start_time:, dry_run:, transaction_mode:, interrupted:, task_name:, last_successful_id:, skip_reasons: {})
         return unless start_time
-        elapsed = (Time.current - start_time).round(1)
+        has_failures = stats[:failed].positive? || interrupted
         io.puts ""
-        io.puts "=" * 60
-        io.puts summary_title(dry_run:, interrupted:)
-        io.puts "-" * 60
-        io.puts "Duration:    #{elapsed}s"
-        io.puts "Processed:   #{stats[:processed]}"
-        io.puts "Succeeded:   #{stats[:succeeded]}"
-        io.puts "Failed:      #{stats[:failed]}"
-        io.puts "Skipped:     #{stats[:skipped]}"
+        io.puts summary_divider(has_failures:, io:)
+        io.puts summary_title(dry_run:, interrupted:, has_failures:, io:)
+        io.puts Colors.dim(SEPARATOR, io:)
+        print_stats(io:, stats:, start_time:, skip_reasons:)
         print_errors(io:, errors:) if errors.any?
         print_interrupt_warning(io:, transaction_mode:, dry_run:) if interrupted
         print_dry_run_instructions(io:, task_name:) if dry_run && !interrupted
         print_continue_from_hint(io:, task_name:, last_successful_id:, dry_run:, transaction_mode:, errors:)
-        io.puts "=" * 60
+        io.puts summary_divider(has_failures:, io:)
       end
-      def print_progress(io:, stats:, errors:, start_time:, status_interval:)
+      def print_progress(io:, stats:, errors:, start_time:, status_interval:, skip_reasons: {})
         return unless start_time
-        elapsed = (Time.current - start_time).round(1)
         io.puts ""
-        io.puts "=" * 60
-        trigger = if status_interval
-                    "every #{status_interval}s (STATUS_INTERVAL)"
-                  elsif Signal.list.key?("INFO")
-                    "Ctrl+T"
-                  else
-                    "SIGUSR1"
-                  end
-        io.puts "STATUS (still running) — triggered by #{trigger}"
-        io.puts "-" * 60
-        io.puts "Duration:    #{elapsed}s"
-        io.puts "Processed:   #{stats[:processed]}"
-        io.puts "Succeeded:   #{stats[:succeeded]}"
-        io.puts "Failed:      #{stats[:failed]}"
-        io.puts "Skipped:     #{stats[:skipped]}"
+        io.puts Colors.cyan(DIVIDER, io:)
+        io.puts "#{Colors.cyan("STATUS (still running)", io:)} — triggered by #{status_trigger(status_interval)}"
+        io.puts Colors.dim(SEPARATOR, io:)
+        print_stats(io:, stats:, start_time:, skip_reasons:)
         print_errors(io:, errors:) if errors.any?
-        io.puts "=" * 60
+        io.puts Colors.cyan(DIVIDER, io:)
         io.puts ""
       end
       def print_errors(io:, errors:)
         io.puts ""
-        io.puts "ERRORS:"
-        errors.each do |err|
-          io.puts "  #{err[:record]}: #{err[:error]}"
-          err[:backtrace]&.each { |line| io.puts "    #{line}" }
+        io.puts Colors.error("ERRORS:", io:)
+        errors.each { |err| print_single_error(io:, err:) }
+      end
+      # --- Private helpers ---
+      def print_header_top(io:, shift_class:, dry_run:)
+        io.puts ""
+        io.puts Colors.dim(DIVIDER, io:)
+        io.puts Colors.bold(shift_class.name || "DataShifter::Shift (anonymous)", io:)
+        io.puts Colors.dim("\"#{shift_class.description}\"", io:) if shift_class.description.present?
+        io.puts Colors.dim(SEPARATOR, io:)
+        io.puts "Mode:        #{mode_label(dry_run:, io:)}"
+      end
+      def print_header_bottom(io:, status_interval:)
+        status_line = build_status_line(status_interval)
+        io.puts Colors.dim("Status:      #{status_line} for live progress (no abort)", io:) if status_line
+        io.puts Colors.dim(DIVIDER, io:)
+        io.puts ""
+      end
+      def print_stats(io:, stats:, start_time:, skip_reasons:)
+        elapsed = (Time.current - start_time).round(1)
+        io.puts "Duration:    #{elapsed}s"
+        io.puts "Processed:   #{stats[:processed]}"
+        io.puts "Succeeded:   #{Colors.green(stats[:succeeded].to_s, io:)}"
+        io.puts "Failed:      #{Colors.red(stats[:failed].to_s, io:)}" if stats[:failed].positive?
+        io.puts "Skipped:     #{Colors.yellow(stats[:skipped].to_s, io:)}" if stats[:skipped].positive?
+        print_skip_reasons(io:, skip_reasons:) if skip_reasons.any?
+      end
+      def print_single_error(io:, err:)
+        lines = err[:error].to_s.split("\n")
+        io.puts "  #{Colors.red(err[:record].to_s, io:)}: #{lines.first}"
+        lines.drop(1).each { |line| io.puts "    #{line}" }
+        err[:backtrace]&.each { |line| io.puts Colors.dim("    #{line}", io:) }
+      end
+      def mode_label(dry_run:, io:)
+        if dry_run
+          "#{Colors.cyan("DRY RUN", io:)} (no changes will be persisted)"
+        else
+          Colors.warning("LIVE", io:)
         end
       end
-      def summary_title(dry_run:, interrupted:)
+      def task_transaction_label(mode)
+        mode == :per_record ? "per-task" : TRANSACTION_MODE_LABELS[mode]
+      end
+      def summary_divider(has_failures:, io:)
+        has_failures ? Colors.red(DIVIDER, io:) : Colors.green(DIVIDER, io:)
+      end
+      def summary_title(dry_run:, interrupted:, has_failures: false, io: $stdout)
         base = dry_run ? "SUMMARY (DRY RUN)" : "SUMMARY"
-        interrupted ? "#{base} - INTERRUPTED" : base
+        title = interrupted ? "#{base} - INTERRUPTED" : base
+        has_failures ? Colors.error(title, io:) : Colors.success(title, io:)
       end
-      def print_interrupt_warning(io:, transaction_mode:, dry_run:)
-        io.puts ""
-        if transaction_mode == :none
-          io.puts "[!] INTERRUPTED: `transaction false` mode was active."
-          io.puts "    Some DB changes may have been applied before interruption."
-          io.puts "    Non-DB side effects (API calls, emails, etc.) are not rolled back."
-          io.puts "    Review the database state before re-running."
-        elsif dry_run
-          io.puts "[!] INTERRUPTED: All DB changes have been rolled back (dry run)."
-          io.puts "    Non-DB side effects (API calls, emails, etc.) are not rolled back."
+      def status_trigger(status_interval)
+        if status_interval
+          "every #{status_interval}s (STATUS_INTERVAL)"
+        elsif Signal.list.key?("INFO")
+          "Ctrl+T"
         else
-          io.puts "[!] INTERRUPTED: DB transaction has been rolled back."
-          io.puts "    No DB changes were persisted."
-          io.puts "    Non-DB side effects (API calls, emails, etc.) are not rolled back."
+          "SIGUSR1"
         end
       end
+      def print_interrupt_warning(io:, transaction_mode:, dry_run:)
+        msg = if transaction_mode == :none
+                "`transaction false` mode was active. Some DB changes may have been applied."
+              elsif dry_run
+                "All DB changes have been rolled back (dry run)."
+              else
+                "DB transaction has been rolled back. No DB changes were persisted."
+              end
+        io.puts ""
+        io.puts "#{Colors.warning("[!] INTERRUPTED:", io:)} #{msg}"
+        io.puts "    Non-DB side effects (API calls, emails, etc.) are not rolled back."
+      end
       def print_dry_run_instructions(io:, task_name:)
         io.puts ""
-        io.puts "[!] No changes were saved."
+        io.puts Colors.cyan("[!] No changes were saved.", io:)
         return unless task_name.present?
         io.puts "To apply these changes, run:"
-        io.puts "    COMMIT=1 rake data:shift:#{task_name}"
+        io.puts "    #{Colors.bold("COMMIT=1 rake data:shift:#{task_name}", io:)}"
       end
       def print_continue_from_hint(io:, task_name:, last_successful_id:, dry_run:, transaction_mode:, errors:)
-        return if dry_run
-        return unless transaction_mode == :none
-        return if errors.empty?
-        return unless last_successful_id
-        return unless task_name.present?
+        return if dry_run || transaction_mode != :none || errors.empty? || !last_successful_id || !task_name.present?
         io.puts ""
         io.puts "To resume from the last successful record:"
-        io.puts "    CONTINUE_FROM=#{last_successful_id} COMMIT=1 rake data:shift:#{task_name}"
+        io.puts "    #{Colors.bold("CONTINUE_FROM=#{last_successful_id} COMMIT=1 rake data:shift:#{task_name}", io:)}"
       end
       def build_status_line(status_interval)
@@ -145,6 +186,14 @@ module DataShifter
           status_tips.join(" or ")
         end
       end
+      def print_skip_reasons(io:, skip_reasons:)
+        return if skip_reasons.empty?
+        top = skip_reasons.sort_by { |_reason, count| -count }.first(SKIP_REASONS_DISPLAY_LIMIT)
+        formatted = top.map { |reason, count| "\"#{reason}\" (#{count})" }.join(", ")
+        io.puts "             #{formatted}"
+      end
     end
   end
 end

data/lib/data_shifter/internal/side_effect_guards.rb ADDED Viewed

@@ -0,0 +1,120 @@
+# frozen_string_literal: true
+require "uri"
+module DataShifter
+  module Internal
+    # Applies and restores side-effect guards during dry runs so that HTTP, mail,
+    # and job enqueues are blocked (or faked) unless explicitly allowed.
+    #
+    # Production impact:
+    # - WebMock: required only when apply_webmock runs (i.e. during a dry run), so commit-only
+    #   production runs never load WebMock. On restore we revert to the previous state (enable!
+    #   or disable!) so e.g. specs that had WebMock enabled are not left with it disabled.
+    # - ActionMailer / ActiveJob / Sidekiq: no extra loading; we only toggle existing config
+    #   for the duration of the block and restore in ensure, so impact is scoped to the run.
+    module SideEffectGuards
+      class << self
+        # Applies side-effect guards, yields, then restores. Call only when running in dry run.
+        def with_guards(shift_class:, &block)
+          saved = {}
+          apply_guards(shift_class, saved)
+          block.call
+        rescue webmock_net_connect_error => e
+          host = extract_host_from_webmock_message(e.message)
+          raise DataShifter::ExternalRequestNotAllowedError.new(attempted_host: host), cause: e
+        ensure
+          restore_guards(saved) if saved.any?
+        end
+        private
+        def apply_guards(shift_class, saved)
+          apply_webmock(shift_class, saved)
+          # rubocop:disable Style/CombinableDefined -- parent must be checked first to avoid NameError when constant not loaded
+          apply_action_mailer(saved) if defined?(ActionMailer) && defined?(ActionMailer::Base)
+          apply_active_job(saved) if defined?(ActiveJob) && defined?(ActiveJob::Base)
+          apply_sidekiq(saved) if defined?(Sidekiq) && defined?(Sidekiq::Testing)
+          # rubocop:enable Style/CombinableDefined
+        end
+        def apply_webmock(shift_class, saved)
+          if defined?(WebMock)
+            # WebMock already loaded (e.g. in specs); capture so we can restore
+            saved[:webmock_was_enabled] = net_http_webmock_enabled?
+          else
+            require "webmock"
+            saved[:webmock_was_enabled] = false
+          end
+          WebMock.enable!
+          allowed = allowed_net_hosts(shift_class)
+          opts = allowed.any? ? { allow: allowed } : {}
+          WebMock.disable_net_connect!(**opts)
+          saved[:webmock] = true
+        end
+        def net_http_webmock_enabled?
+          Net::HTTP.socket_type.to_s.include?("StubSocket")
+        rescue StandardError
+          false
+        end
+        def allowed_net_hosts(shift_class)
+          per_shift = shift_class.respond_to?(:_allow_external_requests) ? shift_class._allow_external_requests : []
+          global = DataShifter.config.allow_external_requests
+          Array(per_shift) + Array(global)
+        end
+        def webmock_net_connect_error
+          return WebMock::NetConnectNotAllowedError if defined?(WebMock::NetConnectNotAllowedError)
+          Class.new(StandardError) # never matched when WebMock not loaded
+        end
+        def extract_host_from_webmock_message(message)
+          return nil unless message.is_a?(String)
+          # WebMock format: "Unregistered request: GET https://host/path with headers ..."
+          m = message.match(%r{Unregistered request: \w+ (https?://[^\s]+)})
+          return nil unless m
+          uri = URI.parse(m[1])
+          uri.host
+        rescue URI::InvalidURIError, ArgumentError
+          nil
+        end
+        def apply_action_mailer(saved)
+          saved[:action_mailer_perform_deliveries] = ActionMailer::Base.perform_deliveries
+          ActionMailer::Base.perform_deliveries = false
+        end
+        def apply_active_job(saved)
+          saved[:active_job_adapter] = ActiveJob::Base.queue_adapter
+          ActiveJob::Base.queue_adapter = :test
+        end
+        def apply_sidekiq(saved)
+          return unless Sidekiq::Testing.respond_to?(:fake!)
+          Sidekiq::Testing.fake!
+          saved[:sidekiq] = true
+        end
+        def restore_guards(saved)
+          if saved.delete(:webmock)
+            (saved.delete(:webmock_was_enabled) ? WebMock.enable! : WebMock.disable!)
+          end
+          ActionMailer::Base.perform_deliveries = saved.delete(:action_mailer_perform_deliveries) if saved.key?(:action_mailer_perform_deliveries)
+          ActiveJob::Base.queue_adapter = saved.delete(:active_job_adapter) if saved.key?(:active_job_adapter)
+          return unless saved.delete(:sidekiq)
+          Sidekiq::Testing.disable!
+        end
+      end
+    end
+  end
+end