dilisense_pep_client 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.env.example +2 -0
- data/CLAUDE.md +141 -0
- data/LICENSE +21 -0
- data/Makefile +98 -0
- data/README.md +500 -0
- data/Rakefile +37 -0
- data/dilisense_pep_client.gemspec +51 -0
- data/lib/dilisense_pep_client/audit_logger.rb +653 -0
- data/lib/dilisense_pep_client/circuit_breaker.rb +257 -0
- data/lib/dilisense_pep_client/client.rb +254 -0
- data/lib/dilisense_pep_client/configuration.rb +15 -0
- data/lib/dilisense_pep_client/errors.rb +488 -0
- data/lib/dilisense_pep_client/logger.rb +207 -0
- data/lib/dilisense_pep_client/metrics.rb +505 -0
- data/lib/dilisense_pep_client/validator.rb +456 -0
- data/lib/dilisense_pep_client/version.rb +5 -0
- data/lib/dilisense_pep_client.rb +107 -0
- metadata +246 -0
@@ -0,0 +1,505 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "concurrent-ruby"
|
4
|
+
|
5
|
+
module DilisensePepClient
|
6
|
+
# Comprehensive metrics collection and monitoring system for FinTech operations
|
7
|
+
#
|
8
|
+
# This class provides enterprise-grade metrics collection specifically designed for
|
9
|
+
# financial services applications that require detailed operational monitoring,
|
10
|
+
# performance tracking, and regulatory compliance reporting.
|
11
|
+
#
|
12
|
+
# The metrics system supports four fundamental metric types:
|
13
|
+
# - Counters: Ever-increasing values (e.g., total requests, errors)
|
14
|
+
# - Gauges: Values that can increase or decrease (e.g., active connections, memory usage)
|
15
|
+
# - Histograms: Distribution of values over time (e.g., response times, request sizes)
|
16
|
+
# - Timers: Specialized histograms for measuring operation duration
|
17
|
+
#
|
18
|
+
# Features:
|
19
|
+
# - Thread-safe concurrent operations using concurrent-ruby
|
20
|
+
# - FinTech-specific metric categories (screening, compliance, security)
|
21
|
+
# - Business metrics for PEP screening operations
|
22
|
+
# - Prometheus export format for monitoring integration
|
23
|
+
# - Memory-efficient histogram storage with automatic value rotation
|
24
|
+
# - Comprehensive tagging system for metric dimensions
|
25
|
+
# - Built-in percentile calculations (P50, P95, P99)
|
26
|
+
# - Security and compliance event tracking
|
27
|
+
# - Circuit breaker metrics integration
|
28
|
+
#
|
29
|
+
# @example Basic counter usage
|
30
|
+
# metrics = Metrics.new
|
31
|
+
# metrics.increment_counter("api_requests", tags: { endpoint: "/v1/check" })
|
32
|
+
#
|
33
|
+
# @example Timing operations
|
34
|
+
# result = metrics.time_operation("database_query") do
|
35
|
+
# database.execute_query(sql)
|
36
|
+
# end
|
37
|
+
#
|
38
|
+
# @example Recording screening events
|
39
|
+
# metrics.record_screening_request(
|
40
|
+
# type: "individual",
|
41
|
+
# search_terms_count: 2,
|
42
|
+
# user_id: "user123"
|
43
|
+
# )
|
44
|
+
class Metrics
|
45
|
+
# Standard metric types supported by the system
|
46
|
+
# Each type has specific behaviors and use cases for monitoring
|
47
|
+
METRIC_TYPES = {
|
48
|
+
counter: :counter, # Monotonically increasing values
|
49
|
+
gauge: :gauge, # Values that can increase or decrease
|
50
|
+
histogram: :histogram, # Distribution of values with percentiles
|
51
|
+
timer: :timer # Specialized histogram for timing operations
|
52
|
+
}.freeze
|
53
|
+
|
54
|
+
# FinTech-specific metric categories for organized monitoring
|
55
|
+
# Each category groups related metrics for easier analysis and alerting
|
56
|
+
CATEGORIES = {
|
57
|
+
screening: "screening", # PEP/sanctions screening operations
|
58
|
+
performance: "performance", # Response times, throughput, resource usage
|
59
|
+
security: "security", # Authentication, authorization, security events
|
60
|
+
compliance: "compliance", # Regulatory compliance, audit events
|
61
|
+
reliability: "reliability", # Circuit breakers, failures, retries
|
62
|
+
business: "business" # Business KPIs, user activity, revenue metrics
|
63
|
+
}.freeze
|
64
|
+
|
65
|
+
# Initialize a new metrics collection instance
|
66
|
+
#
|
67
|
+
# @param service_name [String] Name of the service for metric prefixing (default: "dilisense_pep_client")
|
68
|
+
def initialize(service_name: "dilisense_pep_client")
|
69
|
+
@service_name = service_name
|
70
|
+
@metrics = Concurrent::Map.new # Thread-safe metric storage
|
71
|
+
@start_time = Time.now # Track service uptime
|
72
|
+
@mutex = Mutex.new # Synchronize metric creation operations
|
73
|
+
end
|
74
|
+
|
75
|
+
# Increment a counter metric by specified value
|
76
|
+
# Counter metrics track monotonically increasing values like total requests, errors, or events
|
77
|
+
#
|
78
|
+
# @param name [String, Symbol] Name of the counter metric
|
79
|
+
# @param value [Integer] Value to increment by (default: 1)
|
80
|
+
# @param tags [Hash] Tag dimensions for the metric (e.g., { endpoint: "/api/v1" })
|
81
|
+
# @param category [Symbol] Metric category for organization (default: :screening)
|
82
|
+
#
|
83
|
+
# @example Track API requests
|
84
|
+
# increment_counter("api_requests", tags: { endpoint: "/v1/check", method: "POST" })
|
85
|
+
#
|
86
|
+
# @example Track errors with multiple increments
|
87
|
+
# increment_counter("errors_total", value: 5, category: :reliability)
|
88
|
+
def increment_counter(name, value: 1, tags: {}, category: :screening)
|
89
|
+
metric_name = build_metric_name(name, category)
|
90
|
+
metric = get_or_create_metric(metric_name, :counter, tags)
|
91
|
+
metric[:value].add(value) # Thread-safe atomic increment
|
92
|
+
metric[:last_updated] = Time.now
|
93
|
+
|
94
|
+
log_metric_update(:counter, metric_name, value, tags)
|
95
|
+
end
|
96
|
+
|
97
|
+
def decrement_counter(name, value: 1, tags: {}, category: :screening)
|
98
|
+
increment_counter(name, value: -value, tags: tags, category: category)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Gauge metrics - values that can go up or down
|
102
|
+
def set_gauge(name, value, tags: {}, category: :performance)
|
103
|
+
metric_name = build_metric_name(name, category)
|
104
|
+
metric = get_or_create_metric(metric_name, :gauge, tags)
|
105
|
+
metric[:value].value = value
|
106
|
+
metric[:last_updated] = Time.now
|
107
|
+
|
108
|
+
log_metric_update(:gauge, metric_name, value, tags)
|
109
|
+
end
|
110
|
+
|
111
|
+
def increment_gauge(name, value: 1, tags: {}, category: :performance)
|
112
|
+
metric_name = build_metric_name(name, category)
|
113
|
+
metric = get_or_create_metric(metric_name, :gauge, tags)
|
114
|
+
new_value = metric[:value].increment(value)
|
115
|
+
metric[:last_updated] = Time.now
|
116
|
+
|
117
|
+
log_metric_update(:gauge, metric_name, new_value, tags)
|
118
|
+
new_value
|
119
|
+
end
|
120
|
+
|
121
|
+
def decrement_gauge(name, value: 1, tags: {}, category: :performance)
|
122
|
+
increment_gauge(name, value: -value, tags: tags, category: category)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Histogram metrics - distribution of values
|
126
|
+
def record_histogram(name, value, tags: {}, category: :performance)
|
127
|
+
metric_name = build_metric_name(name, category)
|
128
|
+
metric = get_or_create_metric(metric_name, :histogram, tags)
|
129
|
+
|
130
|
+
histogram_data = metric[:histogram_data]
|
131
|
+
histogram_data[:count].increment
|
132
|
+
histogram_data[:sum].add(value)
|
133
|
+
histogram_data[:values] << value
|
134
|
+
|
135
|
+
# Keep only last 1000 values to prevent memory issues
|
136
|
+
if histogram_data[:values].size > 1000
|
137
|
+
histogram_data[:values].shift(histogram_data[:values].size - 1000)
|
138
|
+
end
|
139
|
+
|
140
|
+
metric[:last_updated] = Time.now
|
141
|
+
update_histogram_stats(metric, value)
|
142
|
+
|
143
|
+
log_metric_update(:histogram, metric_name, value, tags)
|
144
|
+
end
|
145
|
+
|
146
|
+
# Timer metrics - measure duration of operations
|
147
|
+
def time_operation(name, tags: {}, category: :performance, &block)
|
148
|
+
start_time = Time.now
|
149
|
+
|
150
|
+
begin
|
151
|
+
result = block.call
|
152
|
+
duration = (Time.now - start_time) * 1000 # Convert to milliseconds
|
153
|
+
record_histogram("#{name}_duration_ms", duration, tags: tags, category: category)
|
154
|
+
increment_counter("#{name}_success_total", tags: tags, category: category)
|
155
|
+
|
156
|
+
result
|
157
|
+
rescue => error
|
158
|
+
duration = (Time.now - start_time) * 1000
|
159
|
+
record_histogram("#{name}_duration_ms", duration, tags: tags.merge(status: "error"), category: category)
|
160
|
+
increment_counter("#{name}_error_total", tags: tags.merge(error_type: error.class.name), category: category)
|
161
|
+
|
162
|
+
raise
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# Business metrics specific to PEP screening
|
167
|
+
def record_screening_request(type:, search_terms_count: 1, user_id: nil)
|
168
|
+
tags = {
|
169
|
+
screening_type: type,
|
170
|
+
terms_count: search_terms_count,
|
171
|
+
user_id: user_id ? "present" : "absent"
|
172
|
+
}
|
173
|
+
|
174
|
+
increment_counter("screening_requests_total", tags: tags, category: :business)
|
175
|
+
set_gauge("active_screening_sessions", get_active_sessions_count, category: :business)
|
176
|
+
end
|
177
|
+
|
178
|
+
def record_screening_response(
|
179
|
+
type:,
|
180
|
+
records_found:,
|
181
|
+
processing_time_ms:,
|
182
|
+
data_sources: [],
|
183
|
+
cache_hit: false
|
184
|
+
)
|
185
|
+
tags = {
|
186
|
+
screening_type: type,
|
187
|
+
records_found: records_found > 0 ? "found" : "none",
|
188
|
+
cache_status: cache_hit ? "hit" : "miss"
|
189
|
+
}
|
190
|
+
|
191
|
+
increment_counter("screening_responses_total", tags: tags, category: :business)
|
192
|
+
record_histogram("screening_processing_time_ms", processing_time_ms, tags: tags, category: :performance)
|
193
|
+
|
194
|
+
if records_found > 0
|
195
|
+
record_histogram("screening_records_found", records_found, tags: tags, category: :business)
|
196
|
+
increment_counter("potential_matches_total", value: records_found, tags: tags, category: :compliance)
|
197
|
+
end
|
198
|
+
|
199
|
+
data_sources.each do |source|
|
200
|
+
increment_counter("data_source_usage_total", tags: tags.merge(source: source), category: :business)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def record_api_call(endpoint:, status_code:, duration_ms:, response_size: nil)
|
205
|
+
tags = {
|
206
|
+
endpoint: endpoint,
|
207
|
+
status_code: status_code,
|
208
|
+
status_class: "#{status_code.to_s[0]}xx"
|
209
|
+
}
|
210
|
+
|
211
|
+
increment_counter("api_requests_total", tags: tags, category: :performance)
|
212
|
+
record_histogram("api_duration_ms", duration_ms, tags: tags, category: :performance)
|
213
|
+
|
214
|
+
if response_size
|
215
|
+
record_histogram("api_response_size_bytes", response_size, tags: tags, category: :performance)
|
216
|
+
end
|
217
|
+
|
218
|
+
if status_code >= 400
|
219
|
+
increment_counter("api_errors_total", tags: tags, category: :reliability)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def record_security_event(event_type:, severity:, user_id: nil)
|
224
|
+
tags = {
|
225
|
+
event_type: event_type,
|
226
|
+
severity: severity,
|
227
|
+
user_present: user_id ? "yes" : "no"
|
228
|
+
}
|
229
|
+
|
230
|
+
increment_counter("security_events_total", tags: tags, category: :security)
|
231
|
+
|
232
|
+
if severity == :critical
|
233
|
+
increment_counter("critical_security_events_total", tags: tags, category: :security)
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def record_compliance_event(framework:, event_type:, status:)
|
238
|
+
tags = {
|
239
|
+
framework: framework,
|
240
|
+
event_type: event_type,
|
241
|
+
status: status
|
242
|
+
}
|
243
|
+
|
244
|
+
increment_counter("compliance_events_total", tags: tags, category: :compliance)
|
245
|
+
|
246
|
+
if status == "violation"
|
247
|
+
increment_counter("compliance_violations_total", tags: tags, category: :compliance)
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
def record_circuit_breaker_event(service:, state:, failure_count: 0)
|
252
|
+
tags = { service: service, state: state }
|
253
|
+
|
254
|
+
increment_counter("circuit_breaker_events_total", tags: tags, category: :reliability)
|
255
|
+
set_gauge("circuit_breaker_failure_count", failure_count, tags: tags, category: :reliability)
|
256
|
+
|
257
|
+
if state == "open"
|
258
|
+
increment_counter("circuit_breaker_trips_total", tags: tags, category: :reliability)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
# Retrieve metrics data
|
263
|
+
def get_metric(name, category: nil)
|
264
|
+
metric_name = category ? build_metric_name(name, category) : name
|
265
|
+
metric = @metrics[metric_name]
|
266
|
+
|
267
|
+
return nil unless metric
|
268
|
+
|
269
|
+
case metric[:type]
|
270
|
+
when :counter, :gauge
|
271
|
+
{
|
272
|
+
name: metric_name,
|
273
|
+
type: metric[:type],
|
274
|
+
value: metric[:value].value,
|
275
|
+
tags: metric[:tags],
|
276
|
+
last_updated: metric[:last_updated]
|
277
|
+
}
|
278
|
+
when :histogram
|
279
|
+
histogram_data = metric[:histogram_data]
|
280
|
+
{
|
281
|
+
name: metric_name,
|
282
|
+
type: metric[:type],
|
283
|
+
count: histogram_data[:count].value,
|
284
|
+
sum: histogram_data[:sum].value,
|
285
|
+
mean: calculate_mean(metric),
|
286
|
+
min: metric[:min],
|
287
|
+
max: metric[:max],
|
288
|
+
p50: calculate_percentile(metric, 0.5),
|
289
|
+
p95: calculate_percentile(metric, 0.95),
|
290
|
+
p99: calculate_percentile(metric, 0.99),
|
291
|
+
tags: metric[:tags],
|
292
|
+
last_updated: metric[:last_updated]
|
293
|
+
}
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
def get_all_metrics(category: nil)
|
298
|
+
metrics_data = {}
|
299
|
+
|
300
|
+
@metrics.each do |name, metric|
|
301
|
+
next if category && !name.include?(CATEGORIES[category])
|
302
|
+
|
303
|
+
metrics_data[name] = get_metric(name)
|
304
|
+
end
|
305
|
+
|
306
|
+
metrics_data
|
307
|
+
end
|
308
|
+
|
309
|
+
def get_summary
|
310
|
+
{
|
311
|
+
service_name: @service_name,
|
312
|
+
uptime_seconds: Time.now - @start_time,
|
313
|
+
total_metrics: @metrics.size,
|
314
|
+
categories: get_metrics_by_category,
|
315
|
+
last_updated: @metrics.values.map { |m| m[:last_updated] }.max,
|
316
|
+
system_info: {
|
317
|
+
ruby_version: RUBY_VERSION,
|
318
|
+
platform: RUBY_PLATFORM,
|
319
|
+
process_id: Process.pid,
|
320
|
+
memory_usage: get_memory_usage
|
321
|
+
}
|
322
|
+
}
|
323
|
+
end
|
324
|
+
|
325
|
+
def export_prometheus_format
|
326
|
+
output = []
|
327
|
+
|
328
|
+
@metrics.each do |name, metric|
|
329
|
+
case metric[:type]
|
330
|
+
when :counter
|
331
|
+
output << "# TYPE #{name} counter"
|
332
|
+
output << format_prometheus_line(name, metric[:value].value, metric[:tags])
|
333
|
+
when :gauge
|
334
|
+
output << "# TYPE #{name} gauge"
|
335
|
+
output << format_prometheus_line(name, metric[:value].value, metric[:tags])
|
336
|
+
when :histogram
|
337
|
+
output << "# TYPE #{name} histogram"
|
338
|
+
histogram_data = metric[:histogram_data]
|
339
|
+
tags = metric[:tags]
|
340
|
+
|
341
|
+
output << format_prometheus_line("#{name}_count", histogram_data[:count].value, tags)
|
342
|
+
output << format_prometheus_line("#{name}_sum", histogram_data[:sum].value, tags)
|
343
|
+
|
344
|
+
[0.5, 0.95, 0.99].each do |quantile|
|
345
|
+
percentile_tags = tags.merge(quantile: quantile)
|
346
|
+
output << format_prometheus_line("#{name}_quantile", calculate_percentile(metric, quantile), percentile_tags)
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
output.join("\n")
|
352
|
+
end
|
353
|
+
|
354
|
+
def reset_metrics!
|
355
|
+
@mutex.synchronize do
|
356
|
+
@metrics.clear
|
357
|
+
Logger.logger.info("Metrics reset", service_name: @service_name)
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
def reset_metric(name, category: nil)
|
362
|
+
metric_name = category ? build_metric_name(name, category) : name
|
363
|
+
@metrics.delete(metric_name)
|
364
|
+
end
|
365
|
+
|
366
|
+
private
|
367
|
+
|
368
|
+
def build_metric_name(name, category)
|
369
|
+
category_prefix = CATEGORIES[category] || category.to_s
|
370
|
+
"#{@service_name}_#{category_prefix}_#{name}".gsub(/[^a-zA-Z0-9_]/, "_")
|
371
|
+
end
|
372
|
+
|
373
|
+
def get_or_create_metric(name, type, tags)
|
374
|
+
@metrics.fetch(name) do
|
375
|
+
@mutex.synchronize do
|
376
|
+
@metrics.fetch(name) do
|
377
|
+
@metrics[name] = create_metric(type, tags)
|
378
|
+
end
|
379
|
+
end
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
383
|
+
def create_metric(type, tags)
|
384
|
+
base_metric = {
|
385
|
+
type: type,
|
386
|
+
tags: tags,
|
387
|
+
created_at: Time.now,
|
388
|
+
last_updated: Time.now
|
389
|
+
}
|
390
|
+
|
391
|
+
case type
|
392
|
+
when :counter, :gauge
|
393
|
+
base_metric[:value] = Concurrent::AtomicFixnum.new(0)
|
394
|
+
when :histogram
|
395
|
+
base_metric.merge!(
|
396
|
+
histogram_data: {
|
397
|
+
count: Concurrent::AtomicFixnum.new(0),
|
398
|
+
sum: Concurrent::AtomicFixnum.new(0),
|
399
|
+
values: Concurrent::Array.new
|
400
|
+
},
|
401
|
+
min: Float::INFINITY,
|
402
|
+
max: -Float::INFINITY
|
403
|
+
)
|
404
|
+
end
|
405
|
+
|
406
|
+
base_metric
|
407
|
+
end
|
408
|
+
|
409
|
+
def update_histogram_stats(metric, value)
|
410
|
+
metric[:min] = [metric[:min], value].min
|
411
|
+
metric[:max] = [metric[:max], value].max
|
412
|
+
end
|
413
|
+
|
414
|
+
def calculate_mean(metric)
|
415
|
+
histogram_data = metric[:histogram_data]
|
416
|
+
count = histogram_data[:count].value
|
417
|
+
return 0 if count == 0
|
418
|
+
|
419
|
+
histogram_data[:sum].value.to_f / count
|
420
|
+
end
|
421
|
+
|
422
|
+
def calculate_percentile(metric, percentile)
|
423
|
+
values = metric[:histogram_data][:values].to_a.sort
|
424
|
+
return 0 if values.empty?
|
425
|
+
|
426
|
+
index = (percentile * (values.length - 1)).round
|
427
|
+
values[index] || 0
|
428
|
+
end
|
429
|
+
|
430
|
+
def get_active_sessions_count
|
431
|
+
# This would be implemented based on your session tracking
|
432
|
+
# For now, return a placeholder
|
433
|
+
1
|
434
|
+
end
|
435
|
+
|
436
|
+
def get_metrics_by_category
|
437
|
+
categories = {}
|
438
|
+
|
439
|
+
@metrics.each do |name, _|
|
440
|
+
CATEGORIES.each do |category_key, category_name|
|
441
|
+
if name.include?(category_name)
|
442
|
+
categories[category_key] ||= 0
|
443
|
+
categories[category_key] += 1
|
444
|
+
break
|
445
|
+
end
|
446
|
+
end
|
447
|
+
end
|
448
|
+
|
449
|
+
categories
|
450
|
+
end
|
451
|
+
|
452
|
+
def get_memory_usage
|
453
|
+
begin
|
454
|
+
# Try to get memory usage (works on most Unix systems)
|
455
|
+
`ps -o rss= -p #{Process.pid}`.strip.to_i * 1024
|
456
|
+
rescue
|
457
|
+
0
|
458
|
+
end
|
459
|
+
end
|
460
|
+
|
461
|
+
def format_prometheus_line(name, value, tags)
|
462
|
+
if tags.empty?
|
463
|
+
"#{name} #{value}"
|
464
|
+
else
|
465
|
+
tag_string = tags.map { |k, v| "#{k}=\"#{v}\"" }.join(",")
|
466
|
+
"#{name}{#{tag_string}} #{value}"
|
467
|
+
end
|
468
|
+
end
|
469
|
+
|
470
|
+
def log_metric_update(type, name, value, tags)
|
471
|
+
return unless Logger.respond_to?(:logger)
|
472
|
+
|
473
|
+
Logger.logger.debug("Metric updated", {
|
474
|
+
metric_type: type,
|
475
|
+
metric_name: name,
|
476
|
+
value: value,
|
477
|
+
tags: tags,
|
478
|
+
service: @service_name
|
479
|
+
})
|
480
|
+
end
|
481
|
+
end
|
482
|
+
|
483
|
+
# Global metrics registry
|
484
|
+
class MetricsRegistry
|
485
|
+
def self.instance
|
486
|
+
@instance ||= new
|
487
|
+
end
|
488
|
+
|
489
|
+
def initialize
|
490
|
+
@metrics = Metrics.new
|
491
|
+
end
|
492
|
+
|
493
|
+
def method_missing(method_name, *args, **kwargs, &block)
|
494
|
+
if @metrics.respond_to?(method_name)
|
495
|
+
@metrics.send(method_name, *args, **kwargs, &block)
|
496
|
+
else
|
497
|
+
super
|
498
|
+
end
|
499
|
+
end
|
500
|
+
|
501
|
+
def respond_to_missing?(method_name, include_private = false)
|
502
|
+
@metrics.respond_to?(method_name, include_private) || super
|
503
|
+
end
|
504
|
+
end
|
505
|
+
end
|