data_shifter 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+ require "logger"
5
+
6
+ module DataShifter
7
+ module Internal
8
+ # A proxy logger that suppresses repeated log messages during a shift run.
9
+ # Uses a hash of the message as the key for memory efficiency.
10
+ # First occurrence is forwarded; subsequent occurrences are counted but not forwarded.
11
+ # At the end, prints a summary of suppressed messages via puts.
12
+ class LogDeduplicator
13
+ attr_reader :real_logger, :cap, :seen
14
+
15
+ def initialize(real_logger, cap:)
16
+ @real_logger = real_logger
17
+ @cap = cap
18
+ @seen = {}
19
+ end
20
+
21
+ def add(severity, message = nil, progname = nil, &block)
22
+ msg = block ? block.call : message
23
+ key = message_key(severity, progname, msg)
24
+
25
+ if @seen.key?(key)
26
+ @seen[key][:count] += 1
27
+ nil
28
+ else
29
+ enforce_cap
30
+ @seen[key] = { count: 1, message: truncate_message(msg || progname), severity: }
31
+ @real_logger.add(severity, message, progname, &block)
32
+ end
33
+ end
34
+
35
+ def debug(message = nil, progname = nil, &)
36
+ add(Logger::DEBUG, message, progname, &)
37
+ end
38
+
39
+ def info(message = nil, progname = nil, &)
40
+ add(Logger::INFO, message, progname, &)
41
+ end
42
+
43
+ def warn(message = nil, progname = nil, &)
44
+ add(Logger::WARN, message, progname, &)
45
+ end
46
+
47
+ def error(message = nil, progname = nil, &)
48
+ add(Logger::ERROR, message, progname, &)
49
+ end
50
+
51
+ def fatal(message = nil, progname = nil, &)
52
+ add(Logger::FATAL, message, progname, &)
53
+ end
54
+
55
+ def unknown(message = nil, progname = nil, &)
56
+ add(Logger::UNKNOWN, message, progname, &)
57
+ end
58
+
59
+ def <<(msg)
60
+ key = message_key(Logger::INFO, nil, msg)
61
+ if @seen.key?(key)
62
+ @seen[key][:count] += 1
63
+ else
64
+ enforce_cap
65
+ @seen[key] = { count: 1, message: truncate_message(msg), severity: Logger::INFO }
66
+ @real_logger << msg
67
+ end
68
+ end
69
+
70
+ def level
71
+ @real_logger.level
72
+ end
73
+
74
+ def level=(val)
75
+ @real_logger.level = val
76
+ end
77
+
78
+ def formatter
79
+ @real_logger.formatter
80
+ end
81
+
82
+ def formatter=(val)
83
+ @real_logger.formatter = val
84
+ end
85
+
86
+ def close
87
+ @real_logger.close
88
+ end
89
+
90
+ def suppressed_messages
91
+ @seen.select { |_k, v| v[:count] > 1 }
92
+ end
93
+
94
+ def print_summary
95
+ suppressed = suppressed_messages
96
+ return if suppressed.empty?
97
+
98
+ puts "\n[DataShifter] Suppressed repeated log messages:"
99
+ suppressed.each_value do |entry|
100
+ count = entry[:count] - 1
101
+ snippet = entry[:message].to_s[0, 100]
102
+ snippet = "#{snippet}..." if entry[:message].to_s.length > 100
103
+ puts " #{count}x suppressed: #{snippet.inspect}"
104
+ end
105
+ end
106
+
107
+ def method_missing(method, ...)
108
+ @real_logger.send(method, ...)
109
+ end
110
+
111
+ def respond_to_missing?(method, include_private = false)
112
+ @real_logger.respond_to?(method, include_private) || super
113
+ end
114
+
115
+ class << self
116
+ def with_deduplicating_logger(real_logger, cap:)
117
+ proxy = new(real_logger, cap:)
118
+ yield proxy
119
+ ensure
120
+ proxy&.print_summary
121
+ end
122
+ end
123
+
124
+ private
125
+
126
+ def message_key(severity, progname, message)
127
+ normalized = "#{severity}:#{progname}:#{message}"
128
+ Digest::SHA256.hexdigest(normalized)
129
+ end
130
+
131
+ def truncate_message(msg)
132
+ str = msg.to_s
133
+ str.length > 200 ? "#{str[0, 200]}..." : str
134
+ end
135
+
136
+ def enforce_cap
137
+ return if @seen.size < @cap
138
+
139
+ singles = @seen.select { |_k, v| v[:count] == 1 }
140
+ singles.each_key { |k| @seen.delete(k) } if singles.any?
141
+
142
+ return unless @seen.size >= @cap
143
+
144
+ oldest_key = @seen.keys.first
145
+ @seen.delete(oldest_key)
146
+ end
147
+ end
148
+ end
149
+ end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "colors"
4
+
3
5
  module DataShifter
4
6
  module Internal
5
7
  # Output formatting utilities for data shift runs.
@@ -11,126 +13,165 @@ module DataShifter
11
13
  none: "none",
12
14
  }.freeze
13
15
 
16
+ SKIP_REASONS_DISPLAY_LIMIT = 10
17
+ DIVIDER = "=" * 60
18
+ SEPARATOR = "-" * 60
19
+
14
20
  module_function
15
21
 
22
+ # --- Public header methods ---
23
+
16
24
  def print_header(io:, shift_class:, total:, label:, dry_run:, transaction_mode:, status_interval:)
17
- io.puts ""
18
- io.puts "=" * 60
19
- io.puts shift_class.name || "DataShifter::Shift (anonymous)"
20
- io.puts "\"#{shift_class.description}\"" if shift_class.description.present?
21
- io.puts "-" * 60
22
- io.puts "Mode: #{dry_run ? "DRY RUN (no changes will be persisted)" : "LIVE"}"
25
+ print_header_top(io:, shift_class:, dry_run:)
23
26
  io.puts "Records: #{total} #{label}"
24
27
  io.puts "Transaction: #{TRANSACTION_MODE_LABELS[transaction_mode]}"
28
+ print_header_bottom(io:, status_interval:)
29
+ end
25
30
 
26
- status_line = build_status_line(status_interval)
27
- io.puts "Status: #{status_line} for live progress (no abort)" if status_line
28
-
29
- io.puts "=" * 60
30
- io.puts ""
31
+ def print_task_header(io:, shift_class:, block_count:, dry_run:, transaction_mode:, status_interval:)
32
+ print_header_top(io:, shift_class:, dry_run:)
33
+ io.puts "Tasks: #{block_count}" if block_count >= 2
34
+ io.puts "Transaction: #{task_transaction_label(transaction_mode)}"
35
+ print_header_bottom(io:, status_interval:)
31
36
  end
32
37
 
33
- def print_summary(io:, stats:, errors:, start_time:, dry_run:, transaction_mode:, interrupted:, task_name:, last_successful_id:)
38
+ # --- Public summary/progress methods ---
39
+
40
+ def print_summary(io:, stats:, errors:, start_time:, dry_run:, transaction_mode:, interrupted:, task_name:, last_successful_id:, skip_reasons: {})
34
41
  return unless start_time
35
42
 
36
- elapsed = (Time.current - start_time).round(1)
43
+ has_failures = stats[:failed].positive? || interrupted
44
+
37
45
  io.puts ""
38
- io.puts "=" * 60
39
- io.puts summary_title(dry_run:, interrupted:)
40
- io.puts "-" * 60
41
- io.puts "Duration: #{elapsed}s"
42
- io.puts "Processed: #{stats[:processed]}"
43
- io.puts "Succeeded: #{stats[:succeeded]}"
44
- io.puts "Failed: #{stats[:failed]}"
45
- io.puts "Skipped: #{stats[:skipped]}"
46
+ io.puts summary_divider(has_failures:, io:)
47
+ io.puts summary_title(dry_run:, interrupted:, has_failures:, io:)
48
+ io.puts Colors.dim(SEPARATOR, io:)
49
+ print_stats(io:, stats:, start_time:, skip_reasons:)
46
50
 
47
51
  print_errors(io:, errors:) if errors.any?
48
52
  print_interrupt_warning(io:, transaction_mode:, dry_run:) if interrupted
49
53
  print_dry_run_instructions(io:, task_name:) if dry_run && !interrupted
50
54
  print_continue_from_hint(io:, task_name:, last_successful_id:, dry_run:, transaction_mode:, errors:)
51
55
 
52
- io.puts "=" * 60
56
+ io.puts summary_divider(has_failures:, io:)
53
57
  end
54
58
 
55
- def print_progress(io:, stats:, errors:, start_time:, status_interval:)
59
+ def print_progress(io:, stats:, errors:, start_time:, status_interval:, skip_reasons: {})
56
60
  return unless start_time
57
61
 
58
- elapsed = (Time.current - start_time).round(1)
59
62
  io.puts ""
60
- io.puts "=" * 60
61
-
62
- trigger = if status_interval
63
- "every #{status_interval}s (STATUS_INTERVAL)"
64
- elsif Signal.list.key?("INFO")
65
- "Ctrl+T"
66
- else
67
- "SIGUSR1"
68
- end
69
-
70
- io.puts "STATUS (still running) — triggered by #{trigger}"
71
- io.puts "-" * 60
72
- io.puts "Duration: #{elapsed}s"
73
- io.puts "Processed: #{stats[:processed]}"
74
- io.puts "Succeeded: #{stats[:succeeded]}"
75
- io.puts "Failed: #{stats[:failed]}"
76
- io.puts "Skipped: #{stats[:skipped]}"
63
+ io.puts Colors.cyan(DIVIDER, io:)
64
+ io.puts "#{Colors.cyan("STATUS (still running)", io:)} — triggered by #{status_trigger(status_interval)}"
65
+ io.puts Colors.dim(SEPARATOR, io:)
66
+ print_stats(io:, stats:, start_time:, skip_reasons:)
77
67
 
78
68
  print_errors(io:, errors:) if errors.any?
79
69
 
80
- io.puts "=" * 60
70
+ io.puts Colors.cyan(DIVIDER, io:)
81
71
  io.puts ""
82
72
  end
83
73
 
84
74
  def print_errors(io:, errors:)
85
75
  io.puts ""
86
- io.puts "ERRORS:"
87
- errors.each do |err|
88
- io.puts " #{err[:record]}: #{err[:error]}"
89
- err[:backtrace]&.each { |line| io.puts " #{line}" }
76
+ io.puts Colors.error("ERRORS:", io:)
77
+ errors.each { |err| print_single_error(io:, err:) }
78
+ end
79
+
80
+ # --- Private helpers ---
81
+
82
+ def print_header_top(io:, shift_class:, dry_run:)
83
+ io.puts ""
84
+ io.puts Colors.dim(DIVIDER, io:)
85
+ io.puts Colors.bold(shift_class.name || "DataShifter::Shift (anonymous)", io:)
86
+ io.puts Colors.dim("\"#{shift_class.description}\"", io:) if shift_class.description.present?
87
+ io.puts Colors.dim(SEPARATOR, io:)
88
+ io.puts "Mode: #{mode_label(dry_run:, io:)}"
89
+ end
90
+
91
+ def print_header_bottom(io:, status_interval:)
92
+ status_line = build_status_line(status_interval)
93
+ io.puts Colors.dim("Status: #{status_line} for live progress (no abort)", io:) if status_line
94
+ io.puts Colors.dim(DIVIDER, io:)
95
+ io.puts ""
96
+ end
97
+
98
+ def print_stats(io:, stats:, start_time:, skip_reasons:)
99
+ elapsed = (Time.current - start_time).round(1)
100
+ io.puts "Duration: #{elapsed}s"
101
+ io.puts "Processed: #{stats[:processed]}"
102
+ io.puts "Succeeded: #{Colors.green(stats[:succeeded].to_s, io:)}"
103
+ io.puts "Failed: #{Colors.red(stats[:failed].to_s, io:)}" if stats[:failed].positive?
104
+ io.puts "Skipped: #{Colors.yellow(stats[:skipped].to_s, io:)}" if stats[:skipped].positive?
105
+ print_skip_reasons(io:, skip_reasons:) if skip_reasons.any?
106
+ end
107
+
108
+ def print_single_error(io:, err:)
109
+ lines = err[:error].to_s.split("\n")
110
+ io.puts " #{Colors.red(err[:record].to_s, io:)}: #{lines.first}"
111
+ lines.drop(1).each { |line| io.puts " #{line}" }
112
+ err[:backtrace]&.each { |line| io.puts Colors.dim(" #{line}", io:) }
113
+ end
114
+
115
+ def mode_label(dry_run:, io:)
116
+ if dry_run
117
+ "#{Colors.cyan("DRY RUN", io:)} (no changes will be persisted)"
118
+ else
119
+ Colors.warning("LIVE", io:)
90
120
  end
91
121
  end
92
122
 
93
- def summary_title(dry_run:, interrupted:)
123
+ def task_transaction_label(mode)
124
+ mode == :per_record ? "per-task" : TRANSACTION_MODE_LABELS[mode]
125
+ end
126
+
127
+ def summary_divider(has_failures:, io:)
128
+ has_failures ? Colors.red(DIVIDER, io:) : Colors.green(DIVIDER, io:)
129
+ end
130
+
131
+ def summary_title(dry_run:, interrupted:, has_failures: false, io: $stdout)
94
132
  base = dry_run ? "SUMMARY (DRY RUN)" : "SUMMARY"
95
- interrupted ? "#{base} - INTERRUPTED" : base
133
+ title = interrupted ? "#{base} - INTERRUPTED" : base
134
+ has_failures ? Colors.error(title, io:) : Colors.success(title, io:)
96
135
  end
97
136
 
98
- def print_interrupt_warning(io:, transaction_mode:, dry_run:)
99
- io.puts ""
100
- if transaction_mode == :none
101
- io.puts "[!] INTERRUPTED: `transaction false` mode was active."
102
- io.puts " Some DB changes may have been applied before interruption."
103
- io.puts " Non-DB side effects (API calls, emails, etc.) are not rolled back."
104
- io.puts " Review the database state before re-running."
105
- elsif dry_run
106
- io.puts "[!] INTERRUPTED: All DB changes have been rolled back (dry run)."
107
- io.puts " Non-DB side effects (API calls, emails, etc.) are not rolled back."
137
+ def status_trigger(status_interval)
138
+ if status_interval
139
+ "every #{status_interval}s (STATUS_INTERVAL)"
140
+ elsif Signal.list.key?("INFO")
141
+ "Ctrl+T"
108
142
  else
109
- io.puts "[!] INTERRUPTED: DB transaction has been rolled back."
110
- io.puts " No DB changes were persisted."
111
- io.puts " Non-DB side effects (API calls, emails, etc.) are not rolled back."
143
+ "SIGUSR1"
112
144
  end
113
145
  end
114
146
 
147
+ def print_interrupt_warning(io:, transaction_mode:, dry_run:)
148
+ msg = if transaction_mode == :none
149
+ "`transaction false` mode was active. Some DB changes may have been applied."
150
+ elsif dry_run
151
+ "All DB changes have been rolled back (dry run)."
152
+ else
153
+ "DB transaction has been rolled back. No DB changes were persisted."
154
+ end
155
+ io.puts ""
156
+ io.puts "#{Colors.warning("[!] INTERRUPTED:", io:)} #{msg}"
157
+ io.puts " Non-DB side effects (API calls, emails, etc.) are not rolled back."
158
+ end
159
+
115
160
  def print_dry_run_instructions(io:, task_name:)
116
161
  io.puts ""
117
- io.puts "[!] No changes were saved."
162
+ io.puts Colors.cyan("[!] No changes were saved.", io:)
118
163
  return unless task_name.present?
119
164
 
120
165
  io.puts "To apply these changes, run:"
121
- io.puts " COMMIT=1 rake data:shift:#{task_name}"
166
+ io.puts " #{Colors.bold("COMMIT=1 rake data:shift:#{task_name}", io:)}"
122
167
  end
123
168
 
124
169
  def print_continue_from_hint(io:, task_name:, last_successful_id:, dry_run:, transaction_mode:, errors:)
125
- return if dry_run
126
- return unless transaction_mode == :none
127
- return if errors.empty?
128
- return unless last_successful_id
129
- return unless task_name.present?
170
+ return if dry_run || transaction_mode != :none || errors.empty? || !last_successful_id || !task_name.present?
130
171
 
131
172
  io.puts ""
132
173
  io.puts "To resume from the last successful record:"
133
- io.puts " CONTINUE_FROM=#{last_successful_id} COMMIT=1 rake data:shift:#{task_name}"
174
+ io.puts " #{Colors.bold("CONTINUE_FROM=#{last_successful_id} COMMIT=1 rake data:shift:#{task_name}", io:)}"
134
175
  end
135
176
 
136
177
  def build_status_line(status_interval)
@@ -145,6 +186,14 @@ module DataShifter
145
186
  status_tips.join(" or ")
146
187
  end
147
188
  end
189
+
190
+ def print_skip_reasons(io:, skip_reasons:)
191
+ return if skip_reasons.empty?
192
+
193
+ top = skip_reasons.sort_by { |_reason, count| -count }.first(SKIP_REASONS_DISPLAY_LIMIT)
194
+ formatted = top.map { |reason, count| "\"#{reason}\" (#{count})" }.join(", ")
195
+ io.puts " #{formatted}"
196
+ end
148
197
  end
149
198
  end
150
199
  end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+
5
+ module DataShifter
6
+ module Internal
7
+ # Applies and restores side-effect guards during dry runs so that HTTP, mail,
8
+ # and job enqueues are blocked (or faked) unless explicitly allowed.
9
+ #
10
+ # Production impact:
11
+ # - WebMock: required only when apply_webmock runs (i.e. during a dry run), so commit-only
12
+ # production runs never load WebMock. On restore we revert to the previous state (enable!
13
+ # or disable!) so e.g. specs that had WebMock enabled are not left with it disabled.
14
+ # - ActionMailer / ActiveJob / Sidekiq: no extra loading; we only toggle existing config
15
+ # for the duration of the block and restore in ensure, so impact is scoped to the run.
16
+ module SideEffectGuards
17
+ class << self
18
+ # Applies side-effect guards, yields, then restores. Call only when running in dry run.
19
+ def with_guards(shift_class:, &block)
20
+ saved = {}
21
+ apply_guards(shift_class, saved)
22
+ block.call
23
+ rescue webmock_net_connect_error => e
24
+ host = extract_host_from_webmock_message(e.message)
25
+ raise DataShifter::ExternalRequestNotAllowedError.new(attempted_host: host), cause: e
26
+ ensure
27
+ restore_guards(saved) if saved.any?
28
+ end
29
+
30
+ private
31
+
32
+ def apply_guards(shift_class, saved)
33
+ apply_webmock(shift_class, saved)
34
+ # rubocop:disable Style/CombinableDefined -- parent must be checked first to avoid NameError when constant not loaded
35
+ apply_action_mailer(saved) if defined?(ActionMailer) && defined?(ActionMailer::Base)
36
+ apply_active_job(saved) if defined?(ActiveJob) && defined?(ActiveJob::Base)
37
+ apply_sidekiq(saved) if defined?(Sidekiq) && defined?(Sidekiq::Testing)
38
+ # rubocop:enable Style/CombinableDefined
39
+ end
40
+
41
+ def apply_webmock(shift_class, saved)
42
+ if defined?(WebMock)
43
+ # WebMock already loaded (e.g. in specs); capture so we can restore
44
+ saved[:webmock_was_enabled] = net_http_webmock_enabled?
45
+ else
46
+ require "webmock"
47
+ saved[:webmock_was_enabled] = false
48
+ end
49
+ WebMock.enable!
50
+ allowed = allowed_net_hosts(shift_class)
51
+ opts = allowed.any? ? { allow: allowed } : {}
52
+ WebMock.disable_net_connect!(**opts)
53
+ saved[:webmock] = true
54
+ end
55
+
56
+ def net_http_webmock_enabled?
57
+ Net::HTTP.socket_type.to_s.include?("StubSocket")
58
+ rescue StandardError
59
+ false
60
+ end
61
+
62
+ def allowed_net_hosts(shift_class)
63
+ per_shift = shift_class.respond_to?(:_allow_external_requests) ? shift_class._allow_external_requests : []
64
+ global = DataShifter.config.allow_external_requests
65
+ Array(per_shift) + Array(global)
66
+ end
67
+
68
+ def webmock_net_connect_error
69
+ return WebMock::NetConnectNotAllowedError if defined?(WebMock::NetConnectNotAllowedError)
70
+
71
+ Class.new(StandardError) # never matched when WebMock not loaded
72
+ end
73
+
74
+ def extract_host_from_webmock_message(message)
75
+ return nil unless message.is_a?(String)
76
+
77
+ # WebMock format: "Unregistered request: GET https://host/path with headers ..."
78
+ m = message.match(%r{Unregistered request: \w+ (https?://[^\s]+)})
79
+ return nil unless m
80
+
81
+ uri = URI.parse(m[1])
82
+ uri.host
83
+ rescue URI::InvalidURIError, ArgumentError
84
+ nil
85
+ end
86
+
87
+ def apply_action_mailer(saved)
88
+ saved[:action_mailer_perform_deliveries] = ActionMailer::Base.perform_deliveries
89
+ ActionMailer::Base.perform_deliveries = false
90
+ end
91
+
92
+ def apply_active_job(saved)
93
+ saved[:active_job_adapter] = ActiveJob::Base.queue_adapter
94
+ ActiveJob::Base.queue_adapter = :test
95
+ end
96
+
97
+ def apply_sidekiq(saved)
98
+ return unless Sidekiq::Testing.respond_to?(:fake!)
99
+
100
+ Sidekiq::Testing.fake!
101
+ saved[:sidekiq] = true
102
+ end
103
+
104
+ def restore_guards(saved)
105
+ if saved.delete(:webmock)
106
+ (saved.delete(:webmock_was_enabled) ? WebMock.enable! : WebMock.disable!)
107
+ end
108
+
109
+ ActionMailer::Base.perform_deliveries = saved.delete(:action_mailer_perform_deliveries) if saved.key?(:action_mailer_perform_deliveries)
110
+
111
+ ActiveJob::Base.queue_adapter = saved.delete(:active_job_adapter) if saved.key?(:active_job_adapter)
112
+
113
+ return unless saved.delete(:sidekiq)
114
+
115
+ Sidekiq::Testing.disable!
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end