ez_logs_agent 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1094 @@
1
+ # frozen_string_literal: true
2
+
3
+ module EzLogsAgent
4
+ module Middleware
5
+ # Rack middleware for capturing HTTP requests as Events.
6
+ #
7
+ # Responsibilities:
8
+ # - Wrap request lifecycle
9
+ # - Measure duration
10
+ # - Extract HTTP metadata
11
+ # - Build Events via EventBuilder
12
+ # - Push Events into Buffer
13
+ # - Never crash host application
14
+ #
15
+ # This middleware is purely additive and defensive.
16
+ # It NEVER swallows exceptions from downstream.
17
+ class HttpRequest
18
+ def initialize(app)
19
+ @app = app
20
+ end
21
+
22
+ # Rack interface: call(env)
23
+ # @param env [Hash] Rack environment
24
+ # @return [Array] Rack response tuple [status, headers, body]
25
+ def call(env)
26
+ # Skip capturing if disabled or path excluded
27
+ return @app.call(env) unless should_capture?(env)
28
+
29
+ # Generate correlation_id at request entry
30
+ # This establishes the correlation context for all downstream operations
31
+ # (model callbacks, job enqueues, nested service calls, etc.)
32
+ setup_correlation
33
+
34
+ start_time = current_time_ms
35
+ status = nil
36
+ exception = nil
37
+ graphql_response_errors = nil
38
+
39
+ begin
40
+ # Call downstream app
41
+ status, headers, body = @app.call(env)
42
+
43
+ # SAFETY CRITICAL: We NEVER touch, modify, reassign, or interfere with
44
+ # the response body in any way. The body variable flows through untouched.
45
+ #
46
+ # For GraphQL error detection, we use a READ-ONLY extraction that:
47
+ # - Never reassigns the body variable
48
+ # - Never calls any methods on the body object
49
+ # - Only reads if body is a plain Array of plain Strings (already buffered)
50
+ # - Returns nil on ANY doubt
51
+ #
52
+ # This sacrifices some error detection for absolute safety.
53
+ if graphql_request?(env) && status == 200
54
+ graphql_response_errors = safely_read_graphql_errors(body)
55
+ end
56
+
57
+ [status, headers, body]
58
+ rescue => e
59
+ # Capture exception but re-raise after
60
+ exception = e
61
+ raise
62
+ ensure
63
+ # Extract actor after request completes (when controller context is available)
64
+ # This sets Actor.current for inclusion in event context
65
+ extract_and_set_actor(env)
66
+
67
+ # Always capture event (success or failure)
68
+ capture_event(env, start_time, status, exception, graphql_response_errors)
69
+
70
+ # Clean up correlation and actor context after request completes
71
+ clear_correlation
72
+ clear_actor
73
+ end
74
+ end
75
+
76
+ private
77
+
78
+ # Determine if request should be captured
79
+ # @param env [Hash] Rack environment
80
+ # @return [Boolean]
81
+ def should_capture?(env)
82
+ return false unless capture_enabled?
83
+ return false if excluded_path?(env)
84
+ true
85
+ rescue => e
86
+ log_error("should_capture? failed: #{e.message}")
87
+ false # Fail closed: don't capture if decision logic crashes
88
+ end
89
+
90
+ # Check if HTTP capture is enabled
91
+ # @return [Boolean]
92
+ def capture_enabled?
93
+ EzLogsAgent.configuration.capture_http
94
+ rescue
95
+ false # Defensive: assume disabled if config unavailable
96
+ end
97
+
98
+ # Check if request path is excluded
99
+ # Uses all_excluded_paths which combines defaults with user-configured
100
+ # Supports both exact match and prefix match (paths ending with *)
101
+ # Also checks file extensions (e.g., .js, .css, .png)
102
+ # @param env [Hash] Rack environment
103
+ # @return [Boolean]
104
+ def excluded_path?(env)
105
+ path = env["PATH_INFO"] || ""
106
+
107
+ # Check path patterns first
108
+ all_excluded = EzLogsAgent.configuration.all_excluded_paths
109
+ return true if all_excluded.any? { |pattern| path_matches?(path, pattern) }
110
+
111
+ # Check file extensions
112
+ return true if excluded_extension?(path)
113
+
114
+ false
115
+ rescue => e
116
+ log_error("excluded_path? failed: #{e.message}")
117
+ false # Defensive: don't exclude if check fails
118
+ end
119
+
120
+ # Check if path matches an exclusion pattern
121
+ # @param path [String] Request path
122
+ # @param pattern [String] Exclusion pattern
123
+ # Supports:
124
+ # - Exact match: "/favicon.ico"
125
+ # - Prefix match: "/rails/active_storage*"
126
+ # - Suffix match: "*/logout" (matches /admin/logout, /logout)
127
+ # - Contains match: "*/logout*" (matches /admin/logout, /logout/callback)
128
+ # @return [Boolean]
129
+ def path_matches?(path, pattern)
130
+ starts_with_star = pattern.start_with?("*")
131
+ ends_with_star = pattern.end_with?("*")
132
+
133
+ if starts_with_star && ends_with_star
134
+ # Contains match: "*/logout*" matches any path containing "/logout"
135
+ middle = pattern[1..-2] # Remove both *s
136
+ path.include?(middle)
137
+ elsif starts_with_star
138
+ # Suffix match: "*/logout" matches paths ending with "/logout"
139
+ suffix = pattern[1..-1] # Remove leading *
140
+ path.end_with?(suffix)
141
+ elsif ends_with_star
142
+ # Prefix match: "/rails/active_storage*" matches "/rails/active_storage/..."
143
+ path.start_with?(pattern.chomp("*"))
144
+ else
145
+ # Exact match
146
+ path == pattern
147
+ end
148
+ end
149
+
150
+ # Check if path has an excluded file extension
151
+ # @param path [String] Request path
152
+ # @return [Boolean]
153
+ def excluded_extension?(path)
154
+ return false if path.nil? || path.empty?
155
+
156
+ # Get extensions to check
157
+ extensions = EzLogsAgent::Configuration::DEFAULT_EXCLUDED_EXTENSIONS
158
+
159
+ # Check if path ends with any excluded extension
160
+ extensions.any? { |ext| path.end_with?(ext) }
161
+ rescue => e
162
+ log_error("excluded_extension? failed: #{e.message}")
163
+ false
164
+ end
165
+
166
+ # Capture event and push to buffer
167
+ # @param env [Hash] Rack environment
168
+ # @param start_time [Integer] Request start time (ms)
169
+ # @param status [Integer, nil] HTTP status code
170
+ # @param exception [Exception, nil] Exception if raised
171
+ # @param graphql_response_errors [String, nil] GraphQL errors from response body
172
+ # @return [void]
173
+ def capture_event(env, start_time, status, exception, graphql_response_errors = nil)
174
+ duration_ms = current_time_ms - start_time
175
+ outcome, error_message = determine_outcome(status, exception, graphql_response_errors)
176
+
177
+ # Convert start_time (ms) to Time object for EventBuilder
178
+ # This ensures events are timestamped at START, not completion
179
+ start_timestamp = Time.at(start_time / 1000.0).utc
180
+
181
+ event = build_event(
182
+ env: env,
183
+ status: status,
184
+ duration_ms: duration_ms,
185
+ outcome: outcome,
186
+ error_message: error_message,
187
+ timestamp: start_timestamp
188
+ )
189
+
190
+ EzLogsAgent::Buffer.push(event) if event
191
+ rescue => e
192
+ # NEVER crash host app, even if event capture fails
193
+ log_error("capture_event failed: #{e.message}")
194
+ end
195
+
196
+ # Build Event hash via EventBuilder
197
+ # @param env [Hash] Rack environment
198
+ # @param status [Integer, nil] HTTP status code
199
+ # @param duration_ms [Integer] Request duration
200
+ # @param outcome [String] "success" or "failure"
201
+ # @param error_message [String, nil] Error message if failure
202
+ # @param timestamp [Time] Request start time
203
+ # @return [Hash, nil] Event hash or nil if build fails or should be skipped
204
+ def build_event(env:, status:, duration_ms:, outcome:, error_message:, timestamp:)
205
+ source_data = extract_source_data(env, status)
206
+
207
+ # Skip if source_data indicates this request should not be captured
208
+ return nil if source_data == :skip
209
+
210
+ EzLogsAgent::EventBuilder.build(
211
+ source_type: "http_request",
212
+ source_data: source_data,
213
+ outcome: outcome,
214
+ correlation_id: current_correlation_id,
215
+ resource_ids: [],
216
+ context: extract_context,
217
+ duration_ms: duration_ms,
218
+ error_message: error_message,
219
+ timestamp: timestamp
220
+ )
221
+ rescue => e
222
+ log_error("build_event failed: #{e.message}")
223
+ nil # Return nil if event construction fails
224
+ end
225
+
226
+ # Extract source_data hash from Rack env
227
+ # @param env [Hash] Rack environment
228
+ # @param status [Integer, nil] HTTP status code
229
+ # @return [Hash]
230
+ def extract_source_data(env, status)
231
+ base_data = {
232
+ method: env["REQUEST_METHOD"],
233
+ path: env["PATH_INFO"],
234
+ status_code: status,
235
+ controller: extract_controller(env),
236
+ action: extract_action(env),
237
+ format: extract_format(env),
238
+ user_agent: env["HTTP_USER_AGENT"],
239
+ remote_ip: extract_remote_ip(env)
240
+ }
241
+
242
+ # Add GraphQL metadata if this is a GraphQL request
243
+ graphql_data = extract_graphql_metadata(env)
244
+
245
+ # Skip capturing GraphQL queries
246
+ return :skip if graphql_data == :skip
247
+
248
+ if graphql_data
249
+ base_data.merge!(graphql_data)
250
+ else
251
+ # For non-GraphQL requests, capture request params
252
+ # - GET requests: query string params
253
+ # - POST/PATCH/PUT/DELETE: body params
254
+ rest_params = extract_rest_params(env)
255
+ base_data[:request_params] = rest_params if rest_params
256
+ end
257
+
258
+ base_data.compact # Remove nil values
259
+ rescue => e
260
+ log_error("extract_source_data failed: #{e.message}")
261
+ {} # Return empty hash if extraction fails
262
+ end
263
+
264
+ # Extract Rails controller name (if available)
265
+ # @param env [Hash] Rack environment
266
+ # @return [String, nil]
267
+ def extract_controller(env)
268
+ return nil unless env["action_dispatch.request.path_parameters"]
269
+
270
+ env["action_dispatch.request.path_parameters"][:controller]
271
+ end
272
+
273
+ # Extract Rails action name (if available)
274
+ # @param env [Hash] Rack environment
275
+ # @return [String, nil]
276
+ def extract_action(env)
277
+ return nil unless env["action_dispatch.request.path_parameters"]
278
+
279
+ env["action_dispatch.request.path_parameters"][:action]
280
+ end
281
+
282
+ # Extract format (json, html, etc.) if available
283
+ # @param env [Hash] Rack environment
284
+ # @return [String, nil]
285
+ def extract_format(env)
286
+ # Try Rails format first
287
+ if env["action_dispatch.request.path_parameters"]
288
+ format = env["action_dispatch.request.path_parameters"][:format]
289
+ return format if format
290
+ end
291
+
292
+ # Fallback to CONTENT_TYPE parsing
293
+ content_type = env["CONTENT_TYPE"]
294
+ return "json" if content_type&.include?("json")
295
+ return "xml" if content_type&.include?("xml")
296
+
297
+ nil
298
+ end
299
+
300
+ # Extract remote IP address
301
+ # @param env [Hash] Rack environment
302
+ # @return [String, nil]
303
+ def extract_remote_ip(env)
304
+ # Check X-Forwarded-For first (proxy scenarios)
305
+ if env["HTTP_X_FORWARDED_FOR"]
306
+ return env["HTTP_X_FORWARDED_FOR"].split(",").first&.strip
307
+ end
308
+
309
+ # Fallback to REMOTE_ADDR
310
+ env["REMOTE_ADDR"]
311
+ end
312
+
313
+ # Extract GraphQL metadata from request body (if applicable)
314
+ # @param env [Hash] Rack environment
315
+ # @return [Hash, nil] GraphQL metadata or nil if not a GraphQL request
316
+ # @return [:skip] Special return value indicating request should not be captured
317
+ def extract_graphql_metadata(env)
318
+ # Only process POST requests to /graphql path
319
+ return nil unless env["REQUEST_METHOD"] == "POST"
320
+ return nil unless graphql_path?(env["PATH_INFO"])
321
+
322
+ # Read request body
323
+ body = read_request_body(env)
324
+ return nil if body.nil? || body.empty?
325
+
326
+ # Parse JSON body
327
+ parsed = JSON.parse(body)
328
+ return nil unless parsed.is_a?(Hash)
329
+
330
+ query_string = parsed["query"]
331
+ operation_name = parsed["operationName"]
332
+
333
+ # If operationName not provided, try to extract from query string
334
+ # e.g., "query blueprints($ids: [ID!]) { ... }" → "blueprints"
335
+ operation_name ||= extract_operation_name_from_query(query_string)
336
+
337
+ # Check if operation is excluded (introspection, etc.)
338
+ return :skip if excluded_graphql_operation?(operation_name)
339
+
340
+ # Extract GraphQL operation details from query string
341
+ operation_type = infer_operation_type(query_string)
342
+
343
+ # If we couldn't determine type from query, infer from operation name
344
+ operation_type ||= infer_operation_type_from_name(operation_name)
345
+
346
+ # Extract and sanitize GraphQL variables
347
+ variables = sanitize_graphql_variables(parsed["variables"])
348
+
349
+ # Capture all GraphQL operations (queries, mutations, subscriptions)
350
+ # Server-side significance classification handles filtering in the UI
351
+ {
352
+ graphql_operation: operation_name || "anonymous",
353
+ graphql_type: operation_type,
354
+ graphql_variables: variables
355
+ }.compact
356
+ rescue JSON::ParserError => e
357
+ log_error("extract_graphql_metadata JSON parse failed: #{e.message}")
358
+ nil
359
+ rescue => e
360
+ log_error("extract_graphql_metadata failed: #{e.message}")
361
+ nil
362
+ end
363
+
364
+ # Extract params from REST requests
365
+ # - GET requests: query string params
366
+ # - POST/PATCH/PUT/DELETE: body params
367
+ #
368
+ # @param env [Hash] Rack environment
369
+ # @return [Hash, nil] Sanitized request params or nil
370
+ def extract_rest_params(env)
371
+ method = env["REQUEST_METHOD"]
372
+
373
+ # Skip GraphQL endpoints (handled separately)
374
+ return nil if graphql_path?(env["PATH_INFO"])
375
+
376
+ if method == "GET"
377
+ extract_query_string_params(env)
378
+ elsif %w[POST PATCH PUT DELETE].include?(method)
379
+ extract_body_params(env)
380
+ end
381
+ rescue => e
382
+ log_error("extract_rest_params failed: #{e.message}")
383
+ nil
384
+ end
385
+
386
+ # Extract and sanitize query string params for GET requests
387
+ # @param env [Hash] Rack environment
388
+ # @return [Hash, nil] Sanitized query params or nil
389
+ def extract_query_string_params(env)
390
+ query_string = env["QUERY_STRING"]
391
+ return nil if query_string.nil? || query_string.empty?
392
+
393
+ # Parse query string using Rack's utility
394
+ params = Rack::Utils.parse_nested_query(query_string)
395
+ return nil if params.empty?
396
+
397
+ sanitize_rest_params(params)
398
+ rescue => e
399
+ log_error("extract_query_string_params failed: #{e.message}")
400
+ nil
401
+ end
402
+
403
+ # Extract body params from mutation requests (POST, PATCH, PUT, DELETE)
404
+ #
405
+ # Tries multiple sources in order:
406
+ # 1. Rails filtered_parameters (after controller processing)
407
+ # 2. Rack form hash (for form-encoded submissions)
408
+ # 3. JSON body parsing (for API requests)
409
+ #
410
+ # @param env [Hash] Rack environment
411
+ # @return [Hash, nil] Sanitized request params or nil
412
+ def extract_body_params(env)
413
+ # Try Rails filtered_parameters first (best option - already filtered)
414
+ params = extract_rails_params(env)
415
+ return sanitize_rest_params(params) if params && !params.empty?
416
+
417
+ # Try Rack's parsed form data (for form submissions)
418
+ params = extract_rack_form_params(env)
419
+ return sanitize_rest_params(params) if params && !params.empty?
420
+
421
+ # Fallback: parse JSON body if Content-Type is JSON
422
+ params = extract_json_body_params(env)
423
+ return sanitize_rest_params(params) if params && !params.empty?
424
+
425
+ nil
426
+ rescue => e
427
+ log_error("extract_body_params failed: #{e.message}")
428
+ nil
429
+ end
430
+
431
+ # Extract params from Rails request if available
432
+ # @param env [Hash] Rack environment
433
+ # @return [Hash, nil]
434
+ def extract_rails_params(env)
435
+ # Rails stores filtered params after controller processing
436
+ request = env["action_dispatch.request"]
437
+ return nil unless request
438
+ return nil unless request.respond_to?(:filtered_parameters)
439
+
440
+ # Get filtered params (sensitive values already filtered by Rails)
441
+ params = request.filtered_parameters
442
+ return nil unless params.is_a?(Hash)
443
+
444
+ # Remove Rails internal params (controller, action, format)
445
+ params = params.except("controller", "action", "format", "authenticity_token")
446
+ params.empty? ? nil : params
447
+ rescue => e
448
+ log_error("extract_rails_params failed: #{e.message}")
449
+ nil
450
+ end
451
+
452
+ # Extract params from Rack's form hash (for form-encoded submissions)
453
+ # @param env [Hash] Rack environment
454
+ # @return [Hash, nil]
455
+ def extract_rack_form_params(env)
456
+ # Rack stores parsed form data in this key
457
+ params = env["rack.request.form_hash"]
458
+ return nil unless params.is_a?(Hash)
459
+ return nil if params.empty?
460
+
461
+ # Remove Rails internal params
462
+ params = params.except("controller", "action", "format", "authenticity_token", "_method")
463
+ params.empty? ? nil : params
464
+ rescue => e
465
+ log_error("extract_rack_form_params failed: #{e.message}")
466
+ nil
467
+ end
468
+
469
+ # Extract params from JSON body
470
+ # @param env [Hash] Rack environment
471
+ # @return [Hash, nil]
472
+ def extract_json_body_params(env)
473
+ content_type = env["CONTENT_TYPE"].to_s
474
+ return nil unless content_type.include?("json")
475
+
476
+ body = read_request_body(env)
477
+ return nil if body.nil? || body.empty?
478
+
479
+ parsed = JSON.parse(body)
480
+ parsed.is_a?(Hash) ? parsed : nil
481
+ rescue JSON::ParserError => e
482
+ log_error("extract_json_body_params JSON parse failed: #{e.message}")
483
+ nil
484
+ rescue => e
485
+ log_error("extract_json_body_params failed: #{e.message}")
486
+ nil
487
+ end
488
+
489
+ # Sanitize REST params (reuses GraphQL sanitization logic)
490
+ # @param params [Hash] Request params
491
+ # @return [Hash, nil]
492
+ def sanitize_rest_params(params)
493
+ return nil if params.nil? || !params.is_a?(Hash) || params.empty?
494
+
495
+ # Remove Rails internal params if present
496
+ params = params.except("controller", "action", "format", "authenticity_token")
497
+ return nil if params.empty?
498
+
499
+ params.each_with_object({}) do |(key, value), result|
500
+ result[key] = sanitize_variable_value(key, value)
501
+ end
502
+ rescue => e
503
+ log_error("sanitize_rest_params failed: #{e.message}")
504
+ nil
505
+ end
506
+
507
+ # Check if path indicates a GraphQL endpoint
508
+ # @param path [String] Request path
509
+ # @return [Boolean]
510
+ def graphql_path?(path)
511
+ return false if path.nil?
512
+ path == "/graphql" || path.start_with?("/graphql/")
513
+ end
514
+
515
+ # Check if request is a GraphQL POST request
516
+ # Used to determine if we should inspect response body for errors
517
+ # @param env [Hash] Rack environment
518
+ # @return [Boolean]
519
+ def graphql_request?(env)
520
+ env["REQUEST_METHOD"] == "POST" && graphql_path?(env["PATH_INFO"])
521
+ rescue => e
522
+ log_error("graphql_request? failed: #{e.message}")
523
+ false
524
+ end
525
+
526
+ # Maximum response body size to read for GraphQL error extraction (32KB)
527
+ # GraphQL error responses are small JSON - if it's bigger, skip it
528
+ MAX_GRAPHQL_RESPONSE_SIZE = 32 * 1024
529
+
530
+ # PARANOID-SAFE GraphQL error extraction
531
+ #
532
+ # This method extracts GraphQL errors from the response body with
533
+ # ABSOLUTE SAFETY guarantees. It will return nil on ANY doubt.
534
+ #
535
+ # SAFETY RULES (non-negotiable):
536
+ # 1. NEVER call behavior-inducing methods on the body (.each, .read, .close, .to_a, etc.)
537
+ # 2. NEVER reassign or replace the body
538
+ # 3. ONLY use instance_variable_get to peek inside wrappers (read-only memory access)
539
+ # 4. ONLY proceed if we find a plain Array of plain Strings
540
+ # 5. ONLY proceed if total size is small
541
+ # 6. On ANY exception, return nil immediately
542
+ #
543
+ # WHY SO PARANOID:
544
+ # - This gem runs in production apps we don't control
545
+ # - Response bodies can be: streaming, file-backed, lazy, proxied, etc.
546
+ # - Calling methods on unknown body types can: hang, crash, corrupt, consume memory
547
+ # - We'd rather miss some GraphQL errors than risk breaking ANY app
548
+ #
549
+ # WHAT WE DO:
550
+ # - Use instance_variable_get (pure memory read, zero side effects) to peek inside wrappers
551
+ # - Rack::BodyProxy stores the real body in @body instance variable
552
+ # - ActionDispatch::Response::RackBody stores it in @response.@stream.@buf or similar
553
+ # - We try to find the underlying Array without calling any methods
554
+ #
555
+ # @param body [Object] Rack response body - WE DO NOT TOUCH THIS (except instance_variable_get)
556
+ # @return [String, nil] Error message or nil (nil = safe default)
557
+ def safely_read_graphql_errors(body)
558
+ # Try to extract the underlying array from the body
559
+ # This uses ONLY instance_variable_get which is a pure memory read
560
+ inner_array = extract_inner_array_safely(body)
561
+
562
+ # If we couldn't find a plain Array, give up
563
+ return nil unless inner_array
564
+
565
+ # Now validate and read the array
566
+ read_array_for_graphql_errors(inner_array)
567
+ rescue
568
+ # ANY exception = return nil, never crash, never log (logging might fail too)
569
+ # We intentionally swallow everything here for maximum safety
570
+ nil
571
+ end
572
+
573
+ # Extract the underlying Array from a response body using ONLY instance_variable_get
574
+ #
575
+ # instance_variable_get is a PURE MEMORY READ - it cannot:
576
+ # - Trigger any callbacks or side effects
577
+ # - Cause network IO
578
+ # - Modify any state
579
+ # - Raise exceptions (except if the object is frozen in weird ways)
580
+ #
581
+ # This makes it safe to peek inside wrapper objects.
582
+ #
583
+ # @param body [Object] Response body (might be Array, BodyProxy, or other wrapper)
584
+ # @return [Array, nil] The underlying Array if found and safe, nil otherwise
585
+ def extract_inner_array_safely(body)
586
+ # CASE 1: Already a plain Array - perfect
587
+ return body if body.class == Array
588
+
589
+ # CASE 2: Rack::BodyProxy - wraps body in @body instance variable
590
+ # This is the most common case in Rails
591
+ if body.class.name == "Rack::BodyProxy"
592
+ inner = body.instance_variable_get(:@body)
593
+ return inner if inner.class == Array
594
+ # If @body is also wrapped, don't go deeper - too risky
595
+ return nil
596
+ end
597
+
598
+ # CASE 3: ActionDispatch::Response::RackBody
599
+ # Structure: RackBody -> @response -> @stream -> @buf (Array)
600
+ # This is complex, let's try one level only
601
+ if body.class.name == "ActionDispatch::Response::RackBody"
602
+ response = body.instance_variable_get(:@response)
603
+ return nil unless response
604
+
605
+ # Try to get the stream
606
+ stream = response.instance_variable_get(:@stream)
607
+ return nil unless stream
608
+
609
+ # Try to get the buffer
610
+ buf = stream.instance_variable_get(:@buf)
611
+ return buf if buf.class == Array
612
+
613
+ return nil
614
+ end
615
+
616
+ # CASE 4: Unknown wrapper type - don't risk it
617
+ # We only handle known, well-understood wrapper types
618
+ nil
619
+ rescue
620
+ # Any error during extraction = give up safely
621
+ nil
622
+ end
623
+
624
+ # Read an Array for GraphQL errors with full validation
625
+ #
626
+ # PRECONDITION: inner_array.class == Array (verified by caller)
627
+ #
628
+ # @param inner_array [Array] The array to read (must be exactly Array class)
629
+ # @return [String, nil] Error message or nil
630
+ def read_array_for_graphql_errors(inner_array)
631
+ # SAFETY CHECK 1: Must not be empty
632
+ return nil if inner_array.empty?
633
+
634
+ # SAFETY CHECK 2: Every element must be exactly String class
635
+ # We check class == String, not is_a?(String), to reject subclasses
636
+ # that might have overridden methods
637
+ return nil unless inner_array.all? { |part| part.class == String }
638
+
639
+ # SAFETY CHECK 3: Calculate total size
640
+ # Array#sum with a block is safe - it just iterates our verified Array
641
+ total_size = inner_array.sum { |part| part.bytesize }
642
+
643
+ # SAFETY CHECK 4: Must be small (error responses are small)
644
+ return nil if total_size > MAX_GRAPHQL_RESPONSE_SIZE
645
+
646
+ # SAFE TO PROCEED: inner_array is a plain Array of plain Strings, small size
647
+ # Array#join creates a NEW string, does not modify the array
648
+ response_text = inner_array.join
649
+
650
+ # Parse and extract errors
651
+ parse_graphql_errors(response_text)
652
+ rescue
653
+ # Any error = return nil safely
654
+ nil
655
+ end
656
+
657
+ # Parse GraphQL response JSON and extract error messages
658
+ #
659
+ # This only receives a String that we created via join, so it's safe.
660
+ #
661
+ # @param response_text [String] Response body text (our copy, not original)
662
+ # @return [String, nil] Combined error message or nil if no errors
663
+ def parse_graphql_errors(response_text)
664
+ return nil if response_text.nil? || response_text.empty?
665
+
666
+ parsed = JSON.parse(response_text)
667
+ return nil unless parsed.class == Hash # Exact class check
668
+
669
+ errors = parsed["errors"]
670
+ return nil unless errors.class == Array && !errors.empty? # Exact class check
671
+
672
+ # Extract message from each error object
673
+ messages = errors.map { |err| err["message"] if err.class == Hash }.compact
674
+ return nil if messages.empty?
675
+
676
+ # Return single message or join multiple with semicolon
677
+ messages.size == 1 ? messages.first : messages.join("; ")
678
+ rescue JSON::ParserError
679
+ # Not valid JSON, skip
680
+ nil
681
+ rescue
682
+ # Any other error, skip silently
683
+ nil
684
+ end
685
+
686
+ # Check if GraphQL operation should be excluded from capture
687
+ # Supports exact match and prefix match (patterns ending with *)
688
+ # @param operation_name [String, nil] GraphQL operation name
689
+ # @return [Boolean]
690
+ def excluded_graphql_operation?(operation_name)
691
+ return false if operation_name.nil? || operation_name.empty?
692
+
693
+ excluded = EzLogsAgent.configuration.all_excluded_graphql_operations
694
+ excluded.any? { |pattern| graphql_operation_matches?(operation_name, pattern) }
695
+ rescue => e
696
+ log_error("excluded_graphql_operation? failed: #{e.message}")
697
+ false # Defensive: don't exclude if check fails
698
+ end
699
+
700
+ # Check if operation name matches an exclusion pattern
701
+ # @param name [String] Operation name
702
+ # @param pattern [String] Exclusion pattern (exact or with * for prefix)
703
+ # @return [Boolean]
704
+ def graphql_operation_matches?(name, pattern)
705
+ if pattern.end_with?("*")
706
+ # Prefix match: "__*" matches "__schema", "__type", etc.
707
+ name.start_with?(pattern.chomp("*"))
708
+ else
709
+ # Exact match
710
+ name == pattern
711
+ end
712
+ end
713
+
714
+ # Read request body from Rack env
715
+ # @param env [Hash] Rack environment
716
+ # @return [String, nil] Request body or nil
717
+ def read_request_body(env)
718
+ input = env["rack.input"]
719
+ return nil unless input
720
+
721
+ # Read and rewind for downstream middleware/app
722
+ body = input.read
723
+ input.rewind
724
+ body
725
+ rescue => e
726
+ log_error("read_request_body failed: #{e.message}")
727
+ nil
728
+ end
729
+
730
+ # Infer GraphQL operation type from query string
731
+ # @param query [String, nil] GraphQL query string
732
+ # @return [String, nil] "query", "mutation", or "subscription"
733
+ def infer_operation_type(query)
734
+ return nil if query.nil? || query.empty?
735
+
736
+ # Simple pattern matching for operation type
737
+ # Use \A anchor (start of string) not ^ (start of line) to avoid
738
+ # matching field names like "subscription" in multiline queries
739
+ case query.strip
740
+ when /\A\s*mutation/i
741
+ "mutation"
742
+ when /\A\s*subscription/i
743
+ "subscription"
744
+ when /\A\s*query/i, /\A\s*\{/
745
+ "query"
746
+ else
747
+ nil
748
+ end
749
+ rescue => e
750
+ log_error("infer_operation_type failed: #{e.message}")
751
+ nil
752
+ end
753
+
754
+ # Infer GraphQL operation type from operation name
755
+ # Uses naming conventions: Get*, Fetch*, List*, Find* are queries
756
+ # @param operation_name [String, nil] GraphQL operation name
757
+ # @return [String, nil] "query", "mutation", or nil
758
+ def infer_operation_type_from_name(operation_name)
759
+ return nil if operation_name.nil? || operation_name.empty?
760
+
761
+ # Query prefixes (read operations)
762
+ query_prefixes = %w[Get Fetch List Find Load Search Query]
763
+
764
+ # Check if operation name starts with a query prefix
765
+ if query_prefixes.any? { |prefix| operation_name.start_with?(prefix) }
766
+ return "query"
767
+ end
768
+
769
+ # Mutation prefixes (write operations)
770
+ mutation_prefixes = %w[Create Update Delete Remove Add Set Upsert]
771
+
772
+ if mutation_prefixes.any? { |prefix| operation_name.start_with?(prefix) }
773
+ return "mutation"
774
+ end
775
+
776
+ # Can't determine from name alone
777
+ nil
778
+ rescue => e
779
+ log_error("infer_operation_type_from_name failed: #{e.message}")
780
+ nil
781
+ end
782
+
783
+ # Extract GraphQL operation name from query string
784
+ # Parses the operation name from queries like:
785
+ # "query blueprints($ids: [ID!]) { ... }" → "blueprints"
786
+ # "mutation CreateUser($input: CreateUserInput!) { ... }" → "CreateUser"
787
+ # "query { users { id } }" → nil (anonymous query)
788
+ # "{ users { id } }" → nil (shorthand query)
789
+ #
790
+ # @param query [String, nil] GraphQL query string
791
+ # @return [String, nil] Operation name or nil if not found/anonymous
792
+ def extract_operation_name_from_query(query)
793
+ return nil if query.nil? || query.empty?
794
+
795
+ # Match: query/mutation/subscription followed by optional whitespace,
796
+ # then an operation name (identifier), then either ( or { or whitespace
797
+ # The operation name is a GraphQL identifier: [_A-Za-z][_0-9A-Za-z]*
798
+ match = query.match(/\A\s*(?:query|mutation|subscription)\s+([_A-Za-z][_0-9A-Za-z]*)/i)
799
+ return match[1] if match
800
+
801
+ # No named operation found (anonymous query or shorthand syntax)
802
+ nil
803
+ rescue => e
804
+ log_error("extract_operation_name_from_query failed: #{e.message}")
805
+ nil
806
+ end
807
+
808
+ # Default sensitive variable key patterns
809
+ # These are redacted from GraphQL variables before capture
810
+ SENSITIVE_VARIABLE_PATTERNS = %w[
811
+ password passwd pwd
812
+ token access_token refresh_token api_token auth_token
813
+ secret api_secret client_secret
814
+ api_key apikey private_key privatekey secret_key secretkey
815
+ credential auth authorization
816
+ encrypted encrypted_data
817
+ ssn social_security
818
+ credit_card card_number cvv cvc
819
+ ].freeze
820
+
821
+ # Maximum array size to display in full (larger arrays show count only)
822
+ MAX_ARRAY_DISPLAY_SIZE = 5
823
+
824
+ # Sanitize GraphQL variables for safe capture
825
+ # - Redacts sensitive variable values
826
+ # - Replaces nested objects with "[Object]"
827
+ # - Shows small primitive arrays in full, large ones as "[Array(N)]"
828
+ # - Returns nil for empty/invalid input
829
+ #
830
+ # @param variables [Hash, nil] GraphQL variables from request
831
+ # @return [Hash, nil] Sanitized variables or nil
832
+ def sanitize_graphql_variables(variables)
833
+ return nil if variables.nil?
834
+ return nil unless variables.is_a?(Hash)
835
+ return nil if variables.empty?
836
+
837
+ variables.each_with_object({}) do |(key, value), result|
838
+ result[key] = sanitize_variable_value(key, value)
839
+ end
840
+ rescue => e
841
+ log_error("sanitize_graphql_variables failed: #{e.message}")
842
+ nil
843
+ end
844
+
845
+ # Sanitize a single variable value
846
+ # @param key [String] Variable key name
847
+ # @param value [Object] Variable value
848
+ # @param depth [Integer] Current nesting depth (to prevent infinite recursion)
849
+ # @return [Object] Sanitized value
850
+ def sanitize_variable_value(key, value, depth = 0)
851
+ # Check if key is sensitive
852
+ if sensitive_variable_key?(key)
853
+ return "[FILTERED]"
854
+ end
855
+
856
+ # Handle nested objects - recursively sanitize up to max depth
857
+ if value.is_a?(Hash)
858
+ return sanitize_nested_object(value, depth)
859
+ end
860
+
861
+ # Handle arrays - show contents for small primitive arrays, count for large/complex
862
+ if value.is_a?(Array)
863
+ return sanitize_array_value(value, depth)
864
+ end
865
+
866
+ # Primitive values pass through (strings, numbers, booleans, nil)
867
+ value
868
+ end
869
+
870
+ # Maximum nesting depth for recursive sanitization
871
+ MAX_NESTING_DEPTH = 3
872
+
873
+ # Sanitize a nested object (Hash)
874
+ # Recursively sanitizes each key-value pair up to MAX_NESTING_DEPTH
875
+ #
876
+ # @param hash [Hash] Nested object to sanitize
877
+ # @param depth [Integer] Current nesting depth
878
+ # @return [Hash, String] Sanitized hash or "[Object]" if too deep
879
+ def sanitize_nested_object(hash, depth)
880
+ return "[Object]" if depth >= MAX_NESTING_DEPTH
881
+ return {} if hash.empty?
882
+
883
+ hash.each_with_object({}) do |(key, value), result|
884
+ result[key] = sanitize_variable_value(key, value, depth + 1)
885
+ end
886
+ end
887
+
888
+ # Sanitize an array value
889
+ # - Small arrays of primitives: show full contents
890
+ # - Large arrays: show first few with count
891
+ # - Arrays with objects: recursively sanitize each object
892
+ #
893
+ # @param array [Array] Array value to sanitize
894
+ # @param depth [Integer] Current nesting depth
895
+ # @return [Array, String] Sanitized array or placeholder string
896
+ def sanitize_array_value(array, depth = 0)
897
+ return [] if array.empty?
898
+
899
+ # Check if all elements are primitives (strings, numbers, booleans, nil)
900
+ all_primitives = array.all? { |item| primitive_value?(item) }
901
+
902
+ if all_primitives
903
+ # Small primitive arrays: show full contents
904
+ if array.size <= MAX_ARRAY_DISPLAY_SIZE
905
+ return array
906
+ else
907
+ # Large primitive arrays: show first few with count
908
+ preview = array.first(MAX_ARRAY_DISPLAY_SIZE)
909
+ return "#{preview}... (#{array.size} total)"
910
+ end
911
+ else
912
+ # Arrays with complex values: recursively sanitize each item
913
+ if array.size <= MAX_ARRAY_DISPLAY_SIZE
914
+ return array.map { |item| sanitize_array_item(item, depth) }
915
+ else
916
+ # Large arrays with objects: show first few sanitized with count
917
+ preview = array.first(MAX_ARRAY_DISPLAY_SIZE).map { |item| sanitize_array_item(item, depth) }
918
+ return { "_truncated" => true, "_count" => array.size, "_preview" => preview }
919
+ end
920
+ end
921
+ end
922
+
923
+ # Sanitize a single array item
924
+ # @param item [Object] Array item to sanitize
925
+ # @param depth [Integer] Current nesting depth
926
+ # @return [Object] Sanitized item
927
+ def sanitize_array_item(item, depth)
928
+ if primitive_value?(item)
929
+ item
930
+ elsif item.is_a?(Hash)
931
+ sanitize_nested_object(item, depth + 1)
932
+ elsif item.is_a?(Array)
933
+ sanitize_array_value(item, depth + 1)
934
+ else
935
+ "[Object]"
936
+ end
937
+ end
938
+
939
+ # Check if a value is a primitive (string, number, boolean, nil)
940
+ # @param value [Object] Value to check
941
+ # @return [Boolean]
942
+ def primitive_value?(value)
943
+ value.nil? ||
944
+ value.is_a?(String) ||
945
+ value.is_a?(Numeric) ||
946
+ value.is_a?(TrueClass) ||
947
+ value.is_a?(FalseClass)
948
+ end
949
+
950
+ # Check if a variable key name matches a sensitive pattern
951
+ # @param key [String] Variable key name
952
+ # @return [Boolean]
953
+ def sensitive_variable_key?(key)
954
+ key_lower = key.to_s.downcase
955
+
956
+ # Check default patterns
957
+ return true if SENSITIVE_VARIABLE_PATTERNS.any? { |pattern| key_lower.include?(pattern) }
958
+
959
+ # Check user-configured patterns
960
+ user_patterns = EzLogsAgent.configuration.excluded_graphql_variable_keys || []
961
+ user_patterns.any? { |pattern| key_lower.include?(pattern.to_s.downcase) }
962
+ rescue => e
963
+ log_error("sensitive_variable_key? failed: #{e.message}")
964
+ true # Fail safe: treat as sensitive if check fails
965
+ end
966
+
967
+ # Extract context hash including actor if available
968
+ # @return [Hash, nil]
969
+ def extract_context
970
+ context = {}
971
+
972
+ # Include actor if set (via actor_from_request hook or with_actor)
973
+ actor = EzLogsAgent::Actor.current
974
+ context[:actor] = actor if actor
975
+
976
+ # Include legacy user_id if present in RequestStore
977
+ # (for backward compatibility)
978
+ if defined?(RequestStore) && RequestStore.store[:user_id]
979
+ context[:user_id] = RequestStore.store[:user_id]
980
+ end
981
+
982
+ context.empty? ? nil : context
983
+ rescue => e
984
+ log_error("extract_context failed: #{e.message}")
985
+ nil
986
+ end
987
+
988
+ # Extract actor using configured hook and set in Actor.current
989
+ # @param env [Hash] Rack environment
990
+ # @return [void]
991
+ def extract_and_set_actor(env)
992
+ hook = EzLogsAgent.configuration.actor_from_request
993
+ return unless hook.respond_to?(:call)
994
+
995
+ # Get controller instance from env if available (Rails)
996
+ controller = env["action_controller.instance"]
997
+
998
+ # Call the hook with request env and controller
999
+ actor = hook.call(env, controller)
1000
+ EzLogsAgent::Actor.current = actor if actor
1001
+ rescue => e
1002
+ log_error("extract_and_set_actor failed: #{e.message}")
1003
+ # Continue without actor - better than crashing
1004
+ end
1005
+
1006
+ # Clear actor context after request completes
1007
+ # @return [void]
1008
+ def clear_actor
1009
+ EzLogsAgent::Actor.clear
1010
+ rescue => e
1011
+ log_error("clear_actor failed: #{e.message}")
1012
+ # Ignore - cleanup failure is not critical
1013
+ end
1014
+
1015
+ # Setup correlation context for this request
1016
+ # Generates a new correlation_id and stores it in RequestStore
1017
+ # @return [void]
1018
+ def setup_correlation
1019
+ EzLogsAgent::Correlation.current = EzLogsAgent::Correlation.generate
1020
+ rescue => e
1021
+ log_error("setup_correlation failed: #{e.message}")
1022
+ # Continue without correlation - better than crashing
1023
+ end
1024
+
1025
+ # Clear correlation context after request completes
1026
+ # @return [void]
1027
+ def clear_correlation
1028
+ EzLogsAgent::Correlation.clear
1029
+ rescue => e
1030
+ log_error("clear_correlation failed: #{e.message}")
1031
+ # Ignore - cleanup failure is not critical
1032
+ end
1033
+
1034
+ # Get current correlation ID from Correlation module
1035
+ # @return [String, nil]
1036
+ def current_correlation_id
1037
+ EzLogsAgent::Correlation.current
1038
+ rescue => e
1039
+ log_error("current_correlation_id failed: #{e.message}")
1040
+ nil
1041
+ end
1042
+
1043
+ # Determine outcome and error_message based on status, exception, and GraphQL errors
1044
+ #
1045
+ # Outcome rules (from user's perspective):
1046
+ # - Exception raised = failure (system error)
1047
+ # - 4xx status = failure (user's intent was not fulfilled)
1048
+ # - 5xx status = failure (server error)
1049
+ # - GraphQL errors in response = failure (business-level error)
1050
+ # - 2xx/3xx status = success
1051
+ # - nil status = success (no response yet, optimistic)
1052
+ #
1053
+ # @param status [Integer, nil] HTTP status code
1054
+ # @param exception [Exception, nil] Exception if raised
1055
+ # @param graphql_response_errors [String, nil] GraphQL errors from response body
1056
+ # @return [Array<String, String|nil>] [outcome, error_message]
1057
+ def determine_outcome(status, exception, graphql_response_errors = nil)
1058
+ # Exception raised = failure
1059
+ if exception
1060
+ return ["failure", "#{exception.class}: #{exception.message}"]
1061
+ end
1062
+
1063
+ # 4xx/5xx = failure (user's intent was not fulfilled)
1064
+ if status && status >= 400
1065
+ return ["failure", "HTTP #{status}"]
1066
+ end
1067
+
1068
+ # GraphQL errors in response body = failure
1069
+ # This catches authorization errors, validation errors, etc. that return HTTP 200
1070
+ if graphql_response_errors
1071
+ return ["failure", graphql_response_errors]
1072
+ end
1073
+
1074
+ # 2xx, 3xx, nil = success
1075
+ ["success", nil]
1076
+ end
1077
+
1078
+ # Get current time in milliseconds
1079
+ # @return [Integer]
1080
+ def current_time_ms
1081
+ (Time.now.to_f * 1000).to_i
1082
+ end
1083
+
1084
+ # Log error (defensive, never crashes)
1085
+ # @param message [String]
1086
+ # @return [void]
1087
+ def log_error(message)
1088
+ EzLogsAgent::Logger.error("[HttpRequest] #{message}")
1089
+ rescue
1090
+ # Logging must never crash middleware
1091
+ end
1092
+ end
1093
+ end
1094
+ end