dilisense_pep_client 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,505 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "concurrent-ruby"
4
+
5
+ module DilisensePepClient
6
+ # Comprehensive metrics collection and monitoring system for FinTech operations
7
+ #
8
+ # This class provides enterprise-grade metrics collection specifically designed for
9
+ # financial services applications that require detailed operational monitoring,
10
+ # performance tracking, and regulatory compliance reporting.
11
+ #
12
+ # The metrics system supports four fundamental metric types:
13
+ # - Counters: Ever-increasing values (e.g., total requests, errors)
14
+ # - Gauges: Values that can increase or decrease (e.g., active connections, memory usage)
15
+ # - Histograms: Distribution of values over time (e.g., response times, request sizes)
16
+ # - Timers: Specialized histograms for measuring operation duration
17
+ #
18
+ # Features:
19
+ # - Thread-safe concurrent operations using concurrent-ruby
20
+ # - FinTech-specific metric categories (screening, compliance, security)
21
+ # - Business metrics for PEP screening operations
22
+ # - Prometheus export format for monitoring integration
23
+ # - Memory-efficient histogram storage with automatic value rotation
24
+ # - Comprehensive tagging system for metric dimensions
25
+ # - Built-in percentile calculations (P50, P95, P99)
26
+ # - Security and compliance event tracking
27
+ # - Circuit breaker metrics integration
28
+ #
29
+ # @example Basic counter usage
30
+ # metrics = Metrics.new
31
+ # metrics.increment_counter("api_requests", tags: { endpoint: "/v1/check" })
32
+ #
33
+ # @example Timing operations
34
+ # result = metrics.time_operation("database_query") do
35
+ # database.execute_query(sql)
36
+ # end
37
+ #
38
+ # @example Recording screening events
39
+ # metrics.record_screening_request(
40
+ # type: "individual",
41
+ # search_terms_count: 2,
42
+ # user_id: "user123"
43
+ # )
44
+ class Metrics
45
+ # Standard metric types supported by the system
46
+ # Each type has specific behaviors and use cases for monitoring
47
+ METRIC_TYPES = {
48
+ counter: :counter, # Monotonically increasing values
49
+ gauge: :gauge, # Values that can increase or decrease
50
+ histogram: :histogram, # Distribution of values with percentiles
51
+ timer: :timer # Specialized histogram for timing operations
52
+ }.freeze
53
+
54
+ # FinTech-specific metric categories for organized monitoring
55
+ # Each category groups related metrics for easier analysis and alerting
56
+ CATEGORIES = {
57
+ screening: "screening", # PEP/sanctions screening operations
58
+ performance: "performance", # Response times, throughput, resource usage
59
+ security: "security", # Authentication, authorization, security events
60
+ compliance: "compliance", # Regulatory compliance, audit events
61
+ reliability: "reliability", # Circuit breakers, failures, retries
62
+ business: "business" # Business KPIs, user activity, revenue metrics
63
+ }.freeze
64
+
65
+ # Initialize a new metrics collection instance
66
+ #
67
+ # @param service_name [String] Name of the service for metric prefixing (default: "dilisense_pep_client")
68
+ def initialize(service_name: "dilisense_pep_client")
69
+ @service_name = service_name
70
+ @metrics = Concurrent::Map.new # Thread-safe metric storage
71
+ @start_time = Time.now # Track service uptime
72
+ @mutex = Mutex.new # Synchronize metric creation operations
73
+ end
74
+
75
+ # Increment a counter metric by specified value
76
+ # Counter metrics track monotonically increasing values like total requests, errors, or events
77
+ #
78
+ # @param name [String, Symbol] Name of the counter metric
79
+ # @param value [Integer] Value to increment by (default: 1)
80
+ # @param tags [Hash] Tag dimensions for the metric (e.g., { endpoint: "/api/v1" })
81
+ # @param category [Symbol] Metric category for organization (default: :screening)
82
+ #
83
+ # @example Track API requests
84
+ # increment_counter("api_requests", tags: { endpoint: "/v1/check", method: "POST" })
85
+ #
86
+ # @example Track errors with multiple increments
87
+ # increment_counter("errors_total", value: 5, category: :reliability)
88
+ def increment_counter(name, value: 1, tags: {}, category: :screening)
89
+ metric_name = build_metric_name(name, category)
90
+ metric = get_or_create_metric(metric_name, :counter, tags)
91
+ metric[:value].add(value) # Thread-safe atomic increment
92
+ metric[:last_updated] = Time.now
93
+
94
+ log_metric_update(:counter, metric_name, value, tags)
95
+ end
96
+
97
+ def decrement_counter(name, value: 1, tags: {}, category: :screening)
98
+ increment_counter(name, value: -value, tags: tags, category: category)
99
+ end
100
+
101
+ # Gauge metrics - values that can go up or down
102
+ def set_gauge(name, value, tags: {}, category: :performance)
103
+ metric_name = build_metric_name(name, category)
104
+ metric = get_or_create_metric(metric_name, :gauge, tags)
105
+ metric[:value].value = value
106
+ metric[:last_updated] = Time.now
107
+
108
+ log_metric_update(:gauge, metric_name, value, tags)
109
+ end
110
+
111
+ def increment_gauge(name, value: 1, tags: {}, category: :performance)
112
+ metric_name = build_metric_name(name, category)
113
+ metric = get_or_create_metric(metric_name, :gauge, tags)
114
+ new_value = metric[:value].increment(value)
115
+ metric[:last_updated] = Time.now
116
+
117
+ log_metric_update(:gauge, metric_name, new_value, tags)
118
+ new_value
119
+ end
120
+
121
+ def decrement_gauge(name, value: 1, tags: {}, category: :performance)
122
+ increment_gauge(name, value: -value, tags: tags, category: category)
123
+ end
124
+
125
+ # Histogram metrics - distribution of values
126
+ def record_histogram(name, value, tags: {}, category: :performance)
127
+ metric_name = build_metric_name(name, category)
128
+ metric = get_or_create_metric(metric_name, :histogram, tags)
129
+
130
+ histogram_data = metric[:histogram_data]
131
+ histogram_data[:count].increment
132
+ histogram_data[:sum].add(value)
133
+ histogram_data[:values] << value
134
+
135
+ # Keep only last 1000 values to prevent memory issues
136
+ if histogram_data[:values].size > 1000
137
+ histogram_data[:values].shift(histogram_data[:values].size - 1000)
138
+ end
139
+
140
+ metric[:last_updated] = Time.now
141
+ update_histogram_stats(metric, value)
142
+
143
+ log_metric_update(:histogram, metric_name, value, tags)
144
+ end
145
+
146
+ # Timer metrics - measure duration of operations
147
+ def time_operation(name, tags: {}, category: :performance, &block)
148
+ start_time = Time.now
149
+
150
+ begin
151
+ result = block.call
152
+ duration = (Time.now - start_time) * 1000 # Convert to milliseconds
153
+ record_histogram("#{name}_duration_ms", duration, tags: tags, category: category)
154
+ increment_counter("#{name}_success_total", tags: tags, category: category)
155
+
156
+ result
157
+ rescue => error
158
+ duration = (Time.now - start_time) * 1000
159
+ record_histogram("#{name}_duration_ms", duration, tags: tags.merge(status: "error"), category: category)
160
+ increment_counter("#{name}_error_total", tags: tags.merge(error_type: error.class.name), category: category)
161
+
162
+ raise
163
+ end
164
+ end
165
+
166
+ # Business metrics specific to PEP screening
167
+ def record_screening_request(type:, search_terms_count: 1, user_id: nil)
168
+ tags = {
169
+ screening_type: type,
170
+ terms_count: search_terms_count,
171
+ user_id: user_id ? "present" : "absent"
172
+ }
173
+
174
+ increment_counter("screening_requests_total", tags: tags, category: :business)
175
+ set_gauge("active_screening_sessions", get_active_sessions_count, category: :business)
176
+ end
177
+
178
+ def record_screening_response(
179
+ type:,
180
+ records_found:,
181
+ processing_time_ms:,
182
+ data_sources: [],
183
+ cache_hit: false
184
+ )
185
+ tags = {
186
+ screening_type: type,
187
+ records_found: records_found > 0 ? "found" : "none",
188
+ cache_status: cache_hit ? "hit" : "miss"
189
+ }
190
+
191
+ increment_counter("screening_responses_total", tags: tags, category: :business)
192
+ record_histogram("screening_processing_time_ms", processing_time_ms, tags: tags, category: :performance)
193
+
194
+ if records_found > 0
195
+ record_histogram("screening_records_found", records_found, tags: tags, category: :business)
196
+ increment_counter("potential_matches_total", value: records_found, tags: tags, category: :compliance)
197
+ end
198
+
199
+ data_sources.each do |source|
200
+ increment_counter("data_source_usage_total", tags: tags.merge(source: source), category: :business)
201
+ end
202
+ end
203
+
204
+ def record_api_call(endpoint:, status_code:, duration_ms:, response_size: nil)
205
+ tags = {
206
+ endpoint: endpoint,
207
+ status_code: status_code,
208
+ status_class: "#{status_code.to_s[0]}xx"
209
+ }
210
+
211
+ increment_counter("api_requests_total", tags: tags, category: :performance)
212
+ record_histogram("api_duration_ms", duration_ms, tags: tags, category: :performance)
213
+
214
+ if response_size
215
+ record_histogram("api_response_size_bytes", response_size, tags: tags, category: :performance)
216
+ end
217
+
218
+ if status_code >= 400
219
+ increment_counter("api_errors_total", tags: tags, category: :reliability)
220
+ end
221
+ end
222
+
223
+ def record_security_event(event_type:, severity:, user_id: nil)
224
+ tags = {
225
+ event_type: event_type,
226
+ severity: severity,
227
+ user_present: user_id ? "yes" : "no"
228
+ }
229
+
230
+ increment_counter("security_events_total", tags: tags, category: :security)
231
+
232
+ if severity == :critical
233
+ increment_counter("critical_security_events_total", tags: tags, category: :security)
234
+ end
235
+ end
236
+
237
+ def record_compliance_event(framework:, event_type:, status:)
238
+ tags = {
239
+ framework: framework,
240
+ event_type: event_type,
241
+ status: status
242
+ }
243
+
244
+ increment_counter("compliance_events_total", tags: tags, category: :compliance)
245
+
246
+ if status == "violation"
247
+ increment_counter("compliance_violations_total", tags: tags, category: :compliance)
248
+ end
249
+ end
250
+
251
+ def record_circuit_breaker_event(service:, state:, failure_count: 0)
252
+ tags = { service: service, state: state }
253
+
254
+ increment_counter("circuit_breaker_events_total", tags: tags, category: :reliability)
255
+ set_gauge("circuit_breaker_failure_count", failure_count, tags: tags, category: :reliability)
256
+
257
+ if state == "open"
258
+ increment_counter("circuit_breaker_trips_total", tags: tags, category: :reliability)
259
+ end
260
+ end
261
+
262
+ # Retrieve metrics data
263
+ def get_metric(name, category: nil)
264
+ metric_name = category ? build_metric_name(name, category) : name
265
+ metric = @metrics[metric_name]
266
+
267
+ return nil unless metric
268
+
269
+ case metric[:type]
270
+ when :counter, :gauge
271
+ {
272
+ name: metric_name,
273
+ type: metric[:type],
274
+ value: metric[:value].value,
275
+ tags: metric[:tags],
276
+ last_updated: metric[:last_updated]
277
+ }
278
+ when :histogram
279
+ histogram_data = metric[:histogram_data]
280
+ {
281
+ name: metric_name,
282
+ type: metric[:type],
283
+ count: histogram_data[:count].value,
284
+ sum: histogram_data[:sum].value,
285
+ mean: calculate_mean(metric),
286
+ min: metric[:min],
287
+ max: metric[:max],
288
+ p50: calculate_percentile(metric, 0.5),
289
+ p95: calculate_percentile(metric, 0.95),
290
+ p99: calculate_percentile(metric, 0.99),
291
+ tags: metric[:tags],
292
+ last_updated: metric[:last_updated]
293
+ }
294
+ end
295
+ end
296
+
297
+ def get_all_metrics(category: nil)
298
+ metrics_data = {}
299
+
300
+ @metrics.each do |name, metric|
301
+ next if category && !name.include?(CATEGORIES[category])
302
+
303
+ metrics_data[name] = get_metric(name)
304
+ end
305
+
306
+ metrics_data
307
+ end
308
+
309
+ def get_summary
310
+ {
311
+ service_name: @service_name,
312
+ uptime_seconds: Time.now - @start_time,
313
+ total_metrics: @metrics.size,
314
+ categories: get_metrics_by_category,
315
+ last_updated: @metrics.values.map { |m| m[:last_updated] }.max,
316
+ system_info: {
317
+ ruby_version: RUBY_VERSION,
318
+ platform: RUBY_PLATFORM,
319
+ process_id: Process.pid,
320
+ memory_usage: get_memory_usage
321
+ }
322
+ }
323
+ end
324
+
325
+ def export_prometheus_format
326
+ output = []
327
+
328
+ @metrics.each do |name, metric|
329
+ case metric[:type]
330
+ when :counter
331
+ output << "# TYPE #{name} counter"
332
+ output << format_prometheus_line(name, metric[:value].value, metric[:tags])
333
+ when :gauge
334
+ output << "# TYPE #{name} gauge"
335
+ output << format_prometheus_line(name, metric[:value].value, metric[:tags])
336
+ when :histogram
337
+ output << "# TYPE #{name} histogram"
338
+ histogram_data = metric[:histogram_data]
339
+ tags = metric[:tags]
340
+
341
+ output << format_prometheus_line("#{name}_count", histogram_data[:count].value, tags)
342
+ output << format_prometheus_line("#{name}_sum", histogram_data[:sum].value, tags)
343
+
344
+ [0.5, 0.95, 0.99].each do |quantile|
345
+ percentile_tags = tags.merge(quantile: quantile)
346
+ output << format_prometheus_line("#{name}_quantile", calculate_percentile(metric, quantile), percentile_tags)
347
+ end
348
+ end
349
+ end
350
+
351
+ output.join("\n")
352
+ end
353
+
354
+ def reset_metrics!
355
+ @mutex.synchronize do
356
+ @metrics.clear
357
+ Logger.logger.info("Metrics reset", service_name: @service_name)
358
+ end
359
+ end
360
+
361
+ def reset_metric(name, category: nil)
362
+ metric_name = category ? build_metric_name(name, category) : name
363
+ @metrics.delete(metric_name)
364
+ end
365
+
366
+ private
367
+
368
+ def build_metric_name(name, category)
369
+ category_prefix = CATEGORIES[category] || category.to_s
370
+ "#{@service_name}_#{category_prefix}_#{name}".gsub(/[^a-zA-Z0-9_]/, "_")
371
+ end
372
+
373
+ def get_or_create_metric(name, type, tags)
374
+ @metrics.fetch(name) do
375
+ @mutex.synchronize do
376
+ @metrics.fetch(name) do
377
+ @metrics[name] = create_metric(type, tags)
378
+ end
379
+ end
380
+ end
381
+ end
382
+
383
+ def create_metric(type, tags)
384
+ base_metric = {
385
+ type: type,
386
+ tags: tags,
387
+ created_at: Time.now,
388
+ last_updated: Time.now
389
+ }
390
+
391
+ case type
392
+ when :counter, :gauge
393
+ base_metric[:value] = Concurrent::AtomicFixnum.new(0)
394
+ when :histogram
395
+ base_metric.merge!(
396
+ histogram_data: {
397
+ count: Concurrent::AtomicFixnum.new(0),
398
+ sum: Concurrent::AtomicFixnum.new(0),
399
+ values: Concurrent::Array.new
400
+ },
401
+ min: Float::INFINITY,
402
+ max: -Float::INFINITY
403
+ )
404
+ end
405
+
406
+ base_metric
407
+ end
408
+
409
+ def update_histogram_stats(metric, value)
410
+ metric[:min] = [metric[:min], value].min
411
+ metric[:max] = [metric[:max], value].max
412
+ end
413
+
414
+ def calculate_mean(metric)
415
+ histogram_data = metric[:histogram_data]
416
+ count = histogram_data[:count].value
417
+ return 0 if count == 0
418
+
419
+ histogram_data[:sum].value.to_f / count
420
+ end
421
+
422
+ def calculate_percentile(metric, percentile)
423
+ values = metric[:histogram_data][:values].to_a.sort
424
+ return 0 if values.empty?
425
+
426
+ index = (percentile * (values.length - 1)).round
427
+ values[index] || 0
428
+ end
429
+
430
+ def get_active_sessions_count
431
+ # This would be implemented based on your session tracking
432
+ # For now, return a placeholder
433
+ 1
434
+ end
435
+
436
+ def get_metrics_by_category
437
+ categories = {}
438
+
439
+ @metrics.each do |name, _|
440
+ CATEGORIES.each do |category_key, category_name|
441
+ if name.include?(category_name)
442
+ categories[category_key] ||= 0
443
+ categories[category_key] += 1
444
+ break
445
+ end
446
+ end
447
+ end
448
+
449
+ categories
450
+ end
451
+
452
+ def get_memory_usage
453
+ begin
454
+ # Try to get memory usage (works on most Unix systems)
455
+ `ps -o rss= -p #{Process.pid}`.strip.to_i * 1024
456
+ rescue
457
+ 0
458
+ end
459
+ end
460
+
461
+ def format_prometheus_line(name, value, tags)
462
+ if tags.empty?
463
+ "#{name} #{value}"
464
+ else
465
+ tag_string = tags.map { |k, v| "#{k}=\"#{v}\"" }.join(",")
466
+ "#{name}{#{tag_string}} #{value}"
467
+ end
468
+ end
469
+
470
+ def log_metric_update(type, name, value, tags)
471
+ return unless Logger.respond_to?(:logger)
472
+
473
+ Logger.logger.debug("Metric updated", {
474
+ metric_type: type,
475
+ metric_name: name,
476
+ value: value,
477
+ tags: tags,
478
+ service: @service_name
479
+ })
480
+ end
481
+ end
482
+
483
+ # Global metrics registry
484
+ class MetricsRegistry
485
+ def self.instance
486
+ @instance ||= new
487
+ end
488
+
489
+ def initialize
490
+ @metrics = Metrics.new
491
+ end
492
+
493
+ def method_missing(method_name, *args, **kwargs, &block)
494
+ if @metrics.respond_to?(method_name)
495
+ @metrics.send(method_name, *args, **kwargs, &block)
496
+ else
497
+ super
498
+ end
499
+ end
500
+
501
+ def respond_to_missing?(method_name, include_private = false)
502
+ @metrics.respond_to?(method_name, include_private) || super
503
+ end
504
+ end
505
+ end