ez_logs_agent 0.1.10 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +88 -0
- data/README.md +25 -8
- data/lib/ez_logs_agent/buffer.rb +14 -0
- data/lib/ez_logs_agent/bulk_sql_parser.rb +312 -0
- data/lib/ez_logs_agent/capturers/active_job_capturer.rb +28 -3
- data/lib/ez_logs_agent/capturers/bulk_database_capturer.rb +578 -0
- data/lib/ez_logs_agent/capturers/database_capturer.rb +46 -58
- data/lib/ez_logs_agent/encrypted_attributes.rb +45 -0
- data/lib/ez_logs_agent/event_builder.rb +4 -1
- data/lib/ez_logs_agent/railtie.rb +8 -4
- data/lib/ez_logs_agent/sanitizer.rb +8 -20
- data/lib/ez_logs_agent/sensitive_patterns.rb +82 -0
- data/lib/ez_logs_agent/version.rb +1 -1
- data/lib/ez_logs_agent.rb +4 -0
- metadata +5 -1
|
@@ -0,0 +1,578 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "active_support/notifications"
|
|
4
|
+
|
|
5
|
+
module EzLogsAgent
|
|
6
|
+
module Capturers
|
|
7
|
+
# Captures bulk SQL operations that bypass ActiveRecord lifecycle
|
|
8
|
+
# callbacks: delete_all, update_all, insert_all, upsert_all.
|
|
9
|
+
#
|
|
10
|
+
# ## Why this exists
|
|
11
|
+
#
|
|
12
|
+
# DatabaseCapturer (the per-row sibling) hooks after_create/_update/_destroy
|
|
13
|
+
# to capture rich per-record context (saved_changes, encrypted_attributes,
|
|
14
|
+
# display_name). That model breaks for bulk ops — Rails issues a single
|
|
15
|
+
# UPDATE/DELETE/INSERT statement against the database WITHOUT instantiating
|
|
16
|
+
# records, so the callbacks never fire. Customer code like
|
|
17
|
+
#
|
|
18
|
+
# Order.where(status: "cart").delete_all
|
|
19
|
+
# Library.where(closed: true).update_all(status: "active")
|
|
20
|
+
# User.insert_all([{name: "a"}, {name: "b"}])
|
|
21
|
+
#
|
|
22
|
+
# plus `dependent: :delete_all` cascades during a parent destroy, are all
|
|
23
|
+
# invisible to the callback-based path. This capturer fills the gap.
|
|
24
|
+
#
|
|
25
|
+
# ## How it works
|
|
26
|
+
#
|
|
27
|
+
# Subscribes to "sql.active_record" — the standard Rails instrumentation
|
|
28
|
+
# API every observability tool uses (Datadog APM, AppSignal, Skylight).
|
|
29
|
+
# On every SQL statement the host app runs, we get a payload with
|
|
30
|
+
# the raw SQL, binds, name, and row_count. We filter aggressively
|
|
31
|
+
# to ONLY four operations (delete_all / update_all / insert_all /
|
|
32
|
+
# upsert_all) by SQL shape detection (BulkSqlParser.detect_operation),
|
|
33
|
+
# then parse + sanitize + ship.
|
|
34
|
+
#
|
|
35
|
+
# ## Dedup vs DatabaseCapturer
|
|
36
|
+
#
|
|
37
|
+
# Per-row CRUD (`user.save`, `order.destroy`) fires `after_*` callbacks
|
|
38
|
+
# AND produces an `sql.active_record` notification with a singular name
|
|
39
|
+
# ("User Update", "Order Destroy"). DatabaseCapturer captures these via
|
|
40
|
+
# callbacks; this capturer ignores them because their SQL shape is NOT
|
|
41
|
+
# one of the four bulk operations. Mutually exclusive — no double-capture.
|
|
42
|
+
#
|
|
43
|
+
# Cascade case: `Company has_many :orders, dependent: :delete_all` issues
|
|
44
|
+
# a single DELETE for the children. Callbacks don't fire on the children
|
|
45
|
+
# (delete_all bypasses them by design), but this capturer catches the
|
|
46
|
+
# bulk DELETE. The parent's `after_destroy` is captured separately by
|
|
47
|
+
# DatabaseCapturer. Both events share the request's correlation_id and
|
|
48
|
+
# land under the same Action shell. Reader sees parent + cascade as
|
|
49
|
+
# sibling rows on the timeline — the right narrative.
|
|
50
|
+
#
|
|
51
|
+
# ## Wire shape (matches server EventIngest expectations)
|
|
52
|
+
#
|
|
53
|
+
# {
|
|
54
|
+
# source_type: "bulk_database",
|
|
55
|
+
# source_data: {
|
|
56
|
+
# model_class: "Order",
|
|
57
|
+
# operation: "delete_all" | "update_all" | "insert_all" | "upsert_all",
|
|
58
|
+
# row_count: 50000,
|
|
59
|
+
# where_template: "\"orders\".\"status\" = $1",
|
|
60
|
+
# where_binds: [{column: "status", value: "cart"}],
|
|
61
|
+
# set: {"status" => "paid"}, # only update_all
|
|
62
|
+
# columns: ["name", "email"] # only insert_all / upsert_all
|
|
63
|
+
# },
|
|
64
|
+
# correlation_id: ...,
|
|
65
|
+
# resource_ids: [{resource_type: "Order", resource_id: "bulk:50000"}],
|
|
66
|
+
# outcome: "success",
|
|
67
|
+
# duration_ms: <finish - start>
|
|
68
|
+
# }
|
|
69
|
+
#
|
|
70
|
+
# The "bulk:<count>" sentinel resource_id is required because the server's
|
|
71
|
+
# ResourceAggregationStage drops entries with nil resource_id. The
|
|
72
|
+
# display layer detects the sentinel and renders "Order (50,000 rows)"
|
|
73
|
+
# without a clickable entity link.
|
|
74
|
+
module BulkDatabaseCapturer
|
|
75
|
+
# AR's `payload[:name]` convention for the four bulk operations
|
|
76
|
+
# (verified against Rails 7.0–8.0 + SQLite/PG/MySQL):
|
|
77
|
+
#
|
|
78
|
+
# delete_all → "<Model> Delete All"
|
|
79
|
+
# update_all → "<Model> Update All"
|
|
80
|
+
# insert_all → "<Model> Insert" (or "<Model> Bulk Insert" on older PG)
|
|
81
|
+
# upsert_all → "<Model> Upsert" (or "<Model> Bulk Upsert" on older PG)
|
|
82
|
+
#
|
|
83
|
+
# Per-row CRUD uses singular operation verbs:
|
|
84
|
+
# user.save (new) → "<Model> Create"
|
|
85
|
+
# user.update → "<Model> Update" (no " All")
|
|
86
|
+
# user.destroy → "<Model> Destroy"
|
|
87
|
+
#
|
|
88
|
+
# So the four bulk shapes are uniquely identified by either:
|
|
89
|
+
# - ending in " All" (covers Delete All / Update All), OR
|
|
90
|
+
# - the words Insert / Upsert (which are NEVER used for per-row CRUD
|
|
91
|
+
# — per-row inserts are tagged "Create", per-row updates "Update").
|
|
92
|
+
#
|
|
93
|
+
# SQL shape detection (BulkSqlParser.detect_operation) is the actual
|
|
94
|
+
# authority — this filter is only a sub-µs pre-pass to skip non-bulk
|
|
95
|
+
# notifications without parsing SQL.
|
|
96
|
+
BULK_NAME_HINT = / All\z| (Bulk )?(Insert|Upsert)\z/.freeze
|
|
97
|
+
|
|
98
|
+
class << self
|
|
99
|
+
attr_reader :subscriber
|
|
100
|
+
|
|
101
|
+
# Installs the AS::Notifications subscription. Idempotent — calling
|
|
102
|
+
# twice is a no-op (would otherwise produce double-events because
|
|
103
|
+
# AS::Notifications.subscribe is itself NOT idempotent).
|
|
104
|
+
#
|
|
105
|
+
# Called from Railtie.install_database_capturer alongside the
|
|
106
|
+
# per-row DatabaseCapturer.install. Both gated by the same
|
|
107
|
+
# `capture_database` configuration flag — no new toggle.
|
|
108
|
+
def install
|
|
109
|
+
return if @installed
|
|
110
|
+
|
|
111
|
+
# Cache configuration values that the hot path checks per
|
|
112
|
+
# notification. Re-read on every install (specs reinstall
|
|
113
|
+
# after toggling config). Runtime mutations to these settings
|
|
114
|
+
# require uninstall! + install to take effect — acceptable
|
|
115
|
+
# because nobody flips this at runtime in production.
|
|
116
|
+
@capture_enabled =
|
|
117
|
+
begin
|
|
118
|
+
::EzLogsAgent.configuration.capture_database
|
|
119
|
+
rescue StandardError
|
|
120
|
+
false
|
|
121
|
+
end
|
|
122
|
+
@excluded_tables =
|
|
123
|
+
begin
|
|
124
|
+
::EzLogsAgent.configuration.all_excluded_tables.dup.freeze
|
|
125
|
+
rescue StandardError
|
|
126
|
+
[].freeze
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
install_row_count_capture!
|
|
130
|
+
|
|
131
|
+
# 5-arity block bypasses the `*args` splat allocation per
|
|
132
|
+
# notification — measurable on hot paths where we ignore
|
|
133
|
+
# ~99% of events. Block accepts positional args matching the
|
|
134
|
+
# AS::N convention: (name, start, finish, id, payload).
|
|
135
|
+
@subscriber = ::ActiveSupport::Notifications.subscribe("sql.active_record") do |name, started, finished, _id, payload|
|
|
136
|
+
handle_notification(name, started, finished, payload)
|
|
137
|
+
end
|
|
138
|
+
@installed = true
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Patches ActiveRecord::Relation's bulk methods to backfill the
|
|
142
|
+
# affected-row count on the most recently captured bulk_database
|
|
143
|
+
# event. See the comment on RelationRowCountStash below for why
|
|
144
|
+
# this is necessary (Rails' payload[:row_count] is unreliable
|
|
145
|
+
# for plain DELETE/UPDATE on PG).
|
|
146
|
+
def install_row_count_capture!
|
|
147
|
+
return if @relation_patched
|
|
148
|
+
return unless defined?(::ActiveRecord::Relation)
|
|
149
|
+
|
|
150
|
+
::ActiveRecord::Relation.prepend(RelationRowCountStash)
|
|
151
|
+
@relation_patched = true
|
|
152
|
+
rescue StandardError => e
|
|
153
|
+
# Patching is best-effort — if AR's Relation isn't there or the
|
|
154
|
+
# prepend raises, the capturer still works with payload[:row_count].
|
|
155
|
+
::EzLogsAgent::Logger.debug("[BulkDatabaseCapturer] could not patch Relation: #{e.class}: #{e.message}")
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Patches the row_count on the most recently buffered bulk_database
|
|
159
|
+
# event when its model matches `model_class`. Called from the
|
|
160
|
+
# RelationRowCountStash module immediately after `super` returns
|
|
161
|
+
# from delete_all / update_all. The buffer's `peek_last` API is
|
|
162
|
+
# the lightweight read path; we mutate in place because the
|
|
163
|
+
# event hash hasn't been serialized yet.
|
|
164
|
+
def backfill_row_count(real_count, model_class)
|
|
165
|
+
return if real_count.nil?
|
|
166
|
+
|
|
167
|
+
tail = ::EzLogsAgent::Buffer.peek_last
|
|
168
|
+
return unless tail.is_a?(Hash)
|
|
169
|
+
return unless tail[:source_type] == "bulk_database"
|
|
170
|
+
return unless tail.dig(:source_data, :model_class) == model_class.name
|
|
171
|
+
|
|
172
|
+
tail[:source_data][:row_count] = real_count
|
|
173
|
+
if (rids = tail[:resource_ids]).is_a?(Array) && rids.first.is_a?(Hash)
|
|
174
|
+
rids.first[:resource_id] = "bulk:#{real_count}"
|
|
175
|
+
end
|
|
176
|
+
rescue StandardError => e
|
|
177
|
+
::EzLogsAgent::Logger.debug("[BulkDatabaseCapturer] backfill_row_count failed: #{e.class}: #{e.message}")
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Backfills the affected-row count on the most recently captured
|
|
182
|
+
# bulk_database event. Order of operations:
|
|
183
|
+
#
|
|
184
|
+
# 1. Customer calls Relation#delete_all (or update_all).
|
|
185
|
+
# 2. AR runs the SQL → sql.active_record fires → our AS::N
|
|
186
|
+
# handler captures the event with row_count=payload[:row_count]
|
|
187
|
+
# (often 0 on PG for plain DELETE).
|
|
188
|
+
# 3. delete_all's super returns the affected count to us.
|
|
189
|
+
# 4. We patch the most-recently-buffered bulk_database event's
|
|
190
|
+
# source_data[:row_count] AND its sentinel resource_id so the
|
|
191
|
+
# timeline shows the real number.
|
|
192
|
+
#
|
|
193
|
+
# This is a thin shim — only the count is touched, never the SQL,
|
|
194
|
+
# context, or wire shape. If the buffer's tail isn't a bulk_database
|
|
195
|
+
# event for our model (e.g. AS::N skipped it), we leave it alone.
|
|
196
|
+
module RelationRowCountStash
|
|
197
|
+
def delete_all
|
|
198
|
+
result = super
|
|
199
|
+
::EzLogsAgent::Capturers::BulkDatabaseCapturer.backfill_row_count(result, klass) if result.is_a?(Integer)
|
|
200
|
+
result
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def update_all(*args, **kwargs, &block)
|
|
204
|
+
result = super
|
|
205
|
+
::EzLogsAgent::Capturers::BulkDatabaseCapturer.backfill_row_count(result, klass) if result.is_a?(Integer)
|
|
206
|
+
result
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
class << self
|
|
211
|
+
|
|
212
|
+
# Removes the subscription. Specs use this between examples to
|
|
213
|
+
# avoid leaked subscribers; production never calls it.
|
|
214
|
+
def uninstall!
|
|
215
|
+
::ActiveSupport::Notifications.unsubscribe(@subscriber) if @subscriber
|
|
216
|
+
@subscriber = nil
|
|
217
|
+
@installed = false
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Per-notification entry point. Wraps everything in `rescue Exception`
|
|
221
|
+
# because an AS::N handler that raises pollutes the host's subscriber
|
|
222
|
+
# chain and (depending on the chain order) can break OTHER observability
|
|
223
|
+
# tools listening on the same channel. Hard rule: bulk capture failures
|
|
224
|
+
# never propagate.
|
|
225
|
+
def handle_notification(_event_name, started, finished, payload)
|
|
226
|
+
# Hot path: this runs once per SQL statement in the host app —
|
|
227
|
+
# often thousands of times per second on a busy tenant. EVERY
|
|
228
|
+
# branch above the cheapest filter has to be fast. Order is:
|
|
229
|
+
#
|
|
230
|
+
# 1. Cheapest possible name check (string suffix, no regex,
|
|
231
|
+
# no method dispatch on the configuration object). This
|
|
232
|
+
# rejects ~99% of notifications in <1 µs.
|
|
233
|
+
# 2. Then capture_enabled? (config check).
|
|
234
|
+
# 3. Then everything else.
|
|
235
|
+
# Cheapest possible early-exit: a single instance-variable
|
|
236
|
+
# read. This branch fires on EVERY SQL statement in the host
|
|
237
|
+
# app and must not allocate or call into configuration.
|
|
238
|
+
return unless @capture_enabled
|
|
239
|
+
return unless payload.is_a?(Hash)
|
|
240
|
+
name = payload[:name]
|
|
241
|
+
return unless name && name_eligible?(name)
|
|
242
|
+
|
|
243
|
+
operation = ::EzLogsAgent::BulkSqlParser.detect_operation(payload[:sql])
|
|
244
|
+
return unless operation
|
|
245
|
+
|
|
246
|
+
model_class = resolve_model_class(payload[:sql])
|
|
247
|
+
return if model_class.nil?
|
|
248
|
+
return if table_excluded?(model_class)
|
|
249
|
+
|
|
250
|
+
parse_result = ::EzLogsAgent::BulkSqlParser.parse(
|
|
251
|
+
sql: payload[:sql],
|
|
252
|
+
type_casted_binds: payload[:type_casted_binds]
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# Drop Rails framework rewrites that aren't real business activity:
|
|
256
|
+
#
|
|
257
|
+
# * Foreign-key nullification — when a parent has `dependent:
|
|
258
|
+
# :restrict_with_error` AND the child's `belongs_to :parent`
|
|
259
|
+
# is `optional: true`, Rails rewrites `child.delete_all`
|
|
260
|
+
# into `UPDATE children SET parent_id = NULL WHERE ...`
|
|
261
|
+
# before the destroy. There is no business meaning here;
|
|
262
|
+
# it's framework cleanup before the parent goes away.
|
|
263
|
+
#
|
|
264
|
+
# * Counter-cache / increment! — `Model.increment_counter(:x)`
|
|
265
|
+
# and `record.increment!(:x)` compile to
|
|
266
|
+
# `UPDATE ... SET x = COALESCE(x, 0) + N WHERE id = ?`,
|
|
267
|
+
# which is Rails plumbing for a numeric counter bump, not
|
|
268
|
+
# a user-visible change.
|
|
269
|
+
#
|
|
270
|
+
# Both produce high-volume noise on the timeline (see EZLogs's
|
|
271
|
+
# own dogfood where Company#increment_actions_count! fires
|
|
272
|
+
# per ingest batch). Filtering at the capturer means they
|
|
273
|
+
# don't ride the wire at all.
|
|
274
|
+
return if framework_rewrite?(operation, parse_result)
|
|
275
|
+
|
|
276
|
+
source_data = build_source_data(
|
|
277
|
+
operation: operation,
|
|
278
|
+
model_class: model_class,
|
|
279
|
+
row_count: payload[:row_count],
|
|
280
|
+
parse_result: parse_result
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
duration_ms = ((finished - started) * 1000).to_i
|
|
284
|
+
|
|
285
|
+
event = ::EzLogsAgent::EventBuilder.build(
|
|
286
|
+
source_type: :bulk_database,
|
|
287
|
+
source_data: source_data,
|
|
288
|
+
outcome: :success,
|
|
289
|
+
correlation_id: ::EzLogsAgent::Correlation.current,
|
|
290
|
+
resource_ids: build_resource_ids(model_class, source_data[:row_count]),
|
|
291
|
+
context: nil,
|
|
292
|
+
duration_ms: duration_ms
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
::EzLogsAgent::Buffer.push(event)
|
|
296
|
+
rescue Exception => e # rubocop:disable Lint/RescueException
|
|
297
|
+
# See class comment: a raise from an AS::N handler hurts other
|
|
298
|
+
# subscribers, so we swallow EVERYTHING (not just StandardError).
|
|
299
|
+
# Logged at error level so a regression surfaces in customer
|
|
300
|
+
# debug output, but never re-raised.
|
|
301
|
+
safe_log_error("handle_notification", e)
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# Fast pre-filter — checks the name field WITHOUT touching SQL.
|
|
305
|
+
# Returns false for the vast majority of notifications (per-row
|
|
306
|
+
# CRUD, SCHEMA, TRANSACTION, internal lookups).
|
|
307
|
+
#
|
|
308
|
+
# @param payload [Hash, nil]
|
|
309
|
+
# @return [Boolean]
|
|
310
|
+
def eligible_payload?(payload)
|
|
311
|
+
return false unless payload.is_a?(Hash)
|
|
312
|
+
name = payload[:name]
|
|
313
|
+
name.is_a?(String) && name_eligible?(name)
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
# Cheapest possible bulk-name check — string `end_with?` calls,
|
|
317
|
+
# no regex compilation, no method dispatch. Runs on the hot path.
|
|
318
|
+
# The four shapes we care about (per AR convention):
|
|
319
|
+
# "<Model> Delete All", "<Model> Update All",
|
|
320
|
+
# "<Model> Insert", "<Model> Upsert"
|
|
321
|
+
# (Older Rails 7 also used " Bulk Insert" / " Bulk Upsert" — those
|
|
322
|
+
# still end with "Insert"/"Upsert" so end_with? catches them.)
|
|
323
|
+
# Per-row CRUD names like "<Model> Create", "<Model> Update",
|
|
324
|
+
# "<Model> Destroy" fail all four suffix checks and bail in <1 µs.
|
|
325
|
+
def name_eligible?(name)
|
|
326
|
+
name.end_with?(" All") || name.end_with?(" Insert") || name.end_with?(" Upsert")
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
# Looks up the model class from the SQL's table name. Returns nil
|
|
330
|
+
# for SQL we can't attribute (raw multi-table queries, anonymous
|
|
331
|
+
# adapter SQL, schema introspection). Skipping these is correct —
|
|
332
|
+
# we'd have nothing to display anyway.
|
|
333
|
+
def resolve_model_class(sql)
|
|
334
|
+
return nil if sql.nil?
|
|
335
|
+
|
|
336
|
+
table = extract_table_name(sql)
|
|
337
|
+
return nil if table.nil?
|
|
338
|
+
|
|
339
|
+
# Try the descendants list first — cheap and works in environments
|
|
340
|
+
# where models are already eager-loaded (production, Sidekiq workers).
|
|
341
|
+
loaded = ::ActiveRecord::Base.descendants.find do |klass|
|
|
342
|
+
klass.respond_to?(:table_name) && klass.table_name == table && !klass.abstract_class?
|
|
343
|
+
end
|
|
344
|
+
return loaded if loaded
|
|
345
|
+
|
|
346
|
+
# Fallback for development mode and any lazy-autoload path: derive
|
|
347
|
+
# the model class name from the table name via Rails' inflector
|
|
348
|
+
# and try to safe_constantize it. ActiveRecord's reflection class
|
|
349
|
+
# caches it on first call, so subsequent bulk ops on the same
|
|
350
|
+
# table hit the descendants path above.
|
|
351
|
+
constant_name = table.to_s.classify
|
|
352
|
+
klass = constant_name.safe_constantize
|
|
353
|
+
return klass if klass.is_a?(Class) &&
|
|
354
|
+
klass < ::ActiveRecord::Base &&
|
|
355
|
+
klass.respond_to?(:table_name) &&
|
|
356
|
+
klass.table_name == table
|
|
357
|
+
|
|
358
|
+
nil
|
|
359
|
+
rescue StandardError
|
|
360
|
+
nil
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# Extracts the unquoted table name from the FROM / INTO / UPDATE
|
|
364
|
+
# clause. Handles all three identifier-quote styles (PG/SQLite/
|
|
365
|
+
# MySQL). Returns nil on unparseable SQL.
|
|
366
|
+
def extract_table_name(sql)
|
|
367
|
+
# DELETE FROM "table"
|
|
368
|
+
if (m = sql.match(/\ADELETE FROM\s+["`]?([^"`\s]+)["`]?/i))
|
|
369
|
+
return m[1]
|
|
370
|
+
end
|
|
371
|
+
# UPDATE "table"
|
|
372
|
+
if (m = sql.match(/\AUPDATE\s+["`]?([^"`\s]+)["`]?/i))
|
|
373
|
+
return m[1]
|
|
374
|
+
end
|
|
375
|
+
# INSERT INTO "table"
|
|
376
|
+
if (m = sql.match(/\AINSERT INTO\s+["`]?([^"`\s]+)["`]?/i))
|
|
377
|
+
return m[1]
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
nil
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
# Builds the source_data hash from the parser result, applying
|
|
384
|
+
# encrypted_attributes drop + sensitive-pattern masking on
|
|
385
|
+
# column-keyed values.
|
|
386
|
+
def build_source_data(operation:, model_class:, row_count:, parse_result:)
|
|
387
|
+
base = {
|
|
388
|
+
model_class: model_class.name,
|
|
389
|
+
operation: operation.to_s,
|
|
390
|
+
row_count: row_count
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
return base if parse_result[:unparseable]
|
|
394
|
+
|
|
395
|
+
if (set = parse_result[:set])
|
|
396
|
+
base[:set] = mask_set_hash(set, model_class)
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
if (template = parse_result[:where_template])
|
|
400
|
+
base[:where_template] = template
|
|
401
|
+
base[:where_binds] = mask_where_binds(parse_result[:where_binds], model_class)
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
if (columns = parse_result[:columns])
|
|
405
|
+
base[:columns] = filter_columns(columns, model_class)
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
base
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
# Walks `{ column => value }` from update_all SET, masking values
|
|
412
|
+
# whose column is encrypted OR matches a sensitive pattern. Date /
|
|
413
|
+
# Time / BigDecimal values get JSON-formatted so they don't collapse
|
|
414
|
+
# to "[Object]" downstream.
|
|
415
|
+
def mask_set_hash(set, model_class)
|
|
416
|
+
set.each_with_object({}) do |(col, value), acc|
|
|
417
|
+
acc[col] =
|
|
418
|
+
if ::EzLogsAgent::EncryptedAttributes.attribute?(model_class, col)
|
|
419
|
+
"[FILTERED]"
|
|
420
|
+
elsif ::EzLogsAgent::SensitivePatterns.match?(col)
|
|
421
|
+
"[FILTERED]"
|
|
422
|
+
else
|
|
423
|
+
format_value_for_json(value)
|
|
424
|
+
end
|
|
425
|
+
end
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
# Walks the array of {column:, value:} bind entries from the WHERE
|
|
429
|
+
# parser, same masking rules as mask_set_hash. Binds whose column
|
|
430
|
+
# is nil (the parser couldn't attribute them) ride through with
|
|
431
|
+
# the formatted value — display falls back to template substitution.
|
|
432
|
+
def mask_where_binds(binds, model_class)
|
|
433
|
+
(binds || []).map do |bind|
|
|
434
|
+
col = bind[:column]
|
|
435
|
+
value = bind[:value]
|
|
436
|
+
masked_value =
|
|
437
|
+
if col && (::EzLogsAgent::EncryptedAttributes.attribute?(model_class, col) ||
|
|
438
|
+
::EzLogsAgent::SensitivePatterns.match?(col))
|
|
439
|
+
"[FILTERED]"
|
|
440
|
+
else
|
|
441
|
+
format_value_for_json(value)
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
{ column: col, value: masked_value }
|
|
445
|
+
end
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
# For insert_all / upsert_all, we ship column names ONLY (no
|
|
449
|
+
# values — product decision). Sensitive column names still need
|
|
450
|
+
# masking so the column LIST itself doesn't hint "this table has
|
|
451
|
+
# a `password` column". Drop sensitive columns from the displayed
|
|
452
|
+
# list; replace with the literal marker so the count remains true.
|
|
453
|
+
def filter_columns(columns, model_class)
|
|
454
|
+
columns.map do |col|
|
|
455
|
+
if ::EzLogsAgent::EncryptedAttributes.attribute?(model_class, col)
|
|
456
|
+
"[FILTERED]"
|
|
457
|
+
elsif ::EzLogsAgent::SensitivePatterns.match?(col)
|
|
458
|
+
"[FILTERED]"
|
|
459
|
+
else
|
|
460
|
+
col
|
|
461
|
+
end
|
|
462
|
+
end
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
# Builds the sentinel resource entry. row_count may be nil (Rails
|
|
466
|
+
# < 7 didn't ship it; some adapters still don't), and 0 is also
|
|
467
|
+
# not informative (PG's update_all returns 0 in some paths) —
|
|
468
|
+
# fall back to "many" so the display reads naturally and the
|
|
469
|
+
# server-side ResourceAggregationStage doesn't drop it.
|
|
470
|
+
def build_resource_ids(model_class, row_count)
|
|
471
|
+
count_str = (row_count.is_a?(Integer) && row_count > 0) ? row_count.to_s : "many"
|
|
472
|
+
[{ resource_type: model_class.name, resource_id: "bulk:#{count_str}" }]
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
# Mirrors DatabaseCapturer's same-named guard. capture_database = false
|
|
476
|
+
# disables both capturers in one switch.
|
|
477
|
+
def capture_enabled?
|
|
478
|
+
::EzLogsAgent.configuration.capture_database
|
|
479
|
+
rescue StandardError
|
|
480
|
+
false
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
# Uses DatabaseCapturer's existing all_excluded_tables list — one
|
|
484
|
+
# config knob, both capturers obey it. The list is memoized at
|
|
485
|
+
# install time (`@excluded_tables`) so we don't pay a Hash
|
|
486
|
+
# method dispatch on every captured event. Customers who change
|
|
487
|
+
# config at runtime need to call uninstall! + install — the same
|
|
488
|
+
# constraint that already applies to `capture_database`.
|
|
489
|
+
def table_excluded?(model_class)
|
|
490
|
+
return false unless model_class.respond_to?(:table_name)
|
|
491
|
+
|
|
492
|
+
@excluded_tables.include?(model_class.table_name)
|
|
493
|
+
rescue StandardError
|
|
494
|
+
false
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
# Detects Rails-generated update_all SQL that we can't render
|
|
498
|
+
# meaningfully OR that is pure framework noise. We are deliberately
|
|
499
|
+
# narrow here — anything that COULD be a real change a customer
|
|
500
|
+
# cares about (even SET col = NULL on N rows) we keep.
|
|
501
|
+
#
|
|
502
|
+
# Filtered shapes:
|
|
503
|
+
#
|
|
504
|
+
# 1. Empty SET hash — the parser couldn't extract any column →
|
|
505
|
+
# value pairs (e.g. unquoted column names in raw SQL). The
|
|
506
|
+
# captured event would render with no "Set X to Y" detail
|
|
507
|
+
# and no humanized filter, just "Updated 26 things" with an
|
|
508
|
+
# empty Operation block. That's misleading.
|
|
509
|
+
#
|
|
510
|
+
# 2. Counter cache / increment! / decrement! — any SET value is
|
|
511
|
+
# a `COALESCE(<col>, 0) + N` expression. Rails uses this
|
|
512
|
+
# shape for `increment_counter`, `increment!`,
|
|
513
|
+
# `update_counters`. The semantics are "bump a number by N",
|
|
514
|
+
# which is plumbing-grade noise: high volume (per-request
|
|
515
|
+
# counter bumps fire dozens of times per minute on a busy
|
|
516
|
+
# tenant) and zero business meaning to a non-technical
|
|
517
|
+
# reader. On the EZLogs server alone these would dominate
|
|
518
|
+
# the timeline.
|
|
519
|
+
#
|
|
520
|
+
# SET <fk> = NULL on N rows IS captured — even though Rails sometimes
|
|
521
|
+
# generates it implicitly as cleanup before a destroy, it's also
|
|
522
|
+
# the shape of a real customer-issued nullification (soft-orphaning,
|
|
523
|
+
# disassociating tags from an item, etc.), and from the SQL alone
|
|
524
|
+
# we can't tell the two apart. Showing it honestly is the right call:
|
|
525
|
+
# the reader sees that N rows had their column X set to NULL.
|
|
526
|
+
#
|
|
527
|
+
# @return [Boolean]
|
|
528
|
+
def framework_rewrite?(operation, parse_result)
|
|
529
|
+
return false unless operation == :update_all
|
|
530
|
+
return false unless parse_result.is_a?(Hash)
|
|
531
|
+
|
|
532
|
+
set = parse_result[:set]
|
|
533
|
+
|
|
534
|
+
# Empty SET hash: parser bailed. Drop — would render as an
|
|
535
|
+
# empty Operation block.
|
|
536
|
+
return true if set.is_a?(Hash) && set.empty? && !parse_result[:unparseable]
|
|
537
|
+
|
|
538
|
+
return false unless set.is_a?(Hash) && set.any?
|
|
539
|
+
|
|
540
|
+
# Counter-cache / increment! — any value contains COALESCE().
|
|
541
|
+
# Distinct from a deliberate UPDATE because the SET expression
|
|
542
|
+
# references the column on its own RHS, which no business
|
|
543
|
+
# update_all ever does.
|
|
544
|
+
return true if set.values.any? { |v| v.to_s.include?("COALESCE(") }
|
|
545
|
+
|
|
546
|
+
false
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
# Same formatter as DatabaseCapturer. Keeps Date / Time / BigDecimal
|
|
550
|
+
# from collapsing to "[Object]" when they reach Sanitizer / wire.
|
|
551
|
+
def format_value_for_json(value)
|
|
552
|
+
case value
|
|
553
|
+
when ::Time, ::DateTime
|
|
554
|
+
value.iso8601
|
|
555
|
+
when ::Date
|
|
556
|
+
value.to_s
|
|
557
|
+
when ::BigDecimal
|
|
558
|
+
value.to_f
|
|
559
|
+
when ::Array
|
|
560
|
+
value.map { |v| format_value_for_json(v) }
|
|
561
|
+
else
|
|
562
|
+
value
|
|
563
|
+
end
|
|
564
|
+
end
|
|
565
|
+
|
|
566
|
+
def safe_log_error(stage, exception)
|
|
567
|
+
::EzLogsAgent::Logger.error(
|
|
568
|
+
"[BulkDatabaseCapturer] #{stage} failed: #{exception.class} - #{exception.message}"
|
|
569
|
+
)
|
|
570
|
+
rescue StandardError
|
|
571
|
+
# Even logging can fail in pathological boot states. We've done
|
|
572
|
+
# everything reasonable; drop the event silently.
|
|
573
|
+
nil
|
|
574
|
+
end
|
|
575
|
+
end
|
|
576
|
+
end
|
|
577
|
+
end
|
|
578
|
+
end
|