apidepth 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +267 -0
- data/lib/apidepth/collector.rb +305 -0
- data/lib/apidepth/configuration.rb +30 -0
- data/lib/apidepth/event.rb +36 -0
- data/lib/apidepth/net_http_instrumentation.rb +117 -0
- data/lib/apidepth/railtie.rb +83 -0
- data/lib/apidepth/rate_limit_headers.rb +133 -0
- data/lib/apidepth/registry_loader.rb +120 -0
- data/lib/apidepth/vendor_registry.rb +188 -0
- data/lib/apidepth/version.rb +5 -0
- data/lib/apidepth.rb +68 -0
- metadata +144 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# lib/apidepth/net_http_instrumentation.rb
|
|
2
|
+
|
|
3
|
+
module Apidepth
|
|
4
|
+
module NetHTTPInstrumentation
|
|
5
|
+
def request(req, body = nil, &block)
|
|
6
|
+
# Early exits — evaluated in order of cheapness:
|
|
7
|
+
# 1. Recursion guard: we're inside our own collector flush
|
|
8
|
+
# 2. SDK disabled entirely
|
|
9
|
+
# 3. Host is on the customer's ignore list
|
|
10
|
+
# 4. Sample rate: probabilistically skip events
|
|
11
|
+
return super if Thread.current[:apidepth_skip]
|
|
12
|
+
return super unless Apidepth.configuration.enabled
|
|
13
|
+
return super if Apidepth.configuration.ignored_hosts.include?(address)
|
|
14
|
+
return super unless sampled?
|
|
15
|
+
|
|
16
|
+
# Snapshot connection state BEFORE calling super.
|
|
17
|
+
# started? returns true if a keep-alive connection is already open.
|
|
18
|
+
# cold_start events pay for DNS + SSL — that latency belongs to the
|
|
19
|
+
# customer's infrastructure, not the vendor. Tag it so the collector
|
|
20
|
+
# can exclude cold-start events from latency percentile calculations.
|
|
21
|
+
cold_start = !started?
|
|
22
|
+
|
|
23
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
24
|
+
|
|
25
|
+
begin
|
|
26
|
+
response = super
|
|
27
|
+
duration_ms = elapsed_ms(start)
|
|
28
|
+
record_event(req, response, duration_ms, cold_start: cold_start)
|
|
29
|
+
response
|
|
30
|
+
rescue Net::OpenTimeout, Net::ReadTimeout => e
|
|
31
|
+
# Timeouts are the leading indicator of vendor degradation — they
|
|
32
|
+
# appear before the vendor acknowledges an incident. We record them
|
|
33
|
+
# and always re-raise so the customer's error handling is unaffected.
|
|
34
|
+
duration_ms = elapsed_ms(start)
|
|
35
|
+
record_timeout(req, duration_ms, e.class.name, cold_start: cold_start)
|
|
36
|
+
raise
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def elapsed_ms(start)
|
|
43
|
+
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Environment is set once at Railtie boot (or by the customer in configure).
|
|
47
|
+
# Reading it here is a single attr_accessor access — no method dispatch,
|
|
48
|
+
# no defined?() check, no Rails.env call on every outbound HTTP request.
|
|
49
|
+
def resolve_env
|
|
50
|
+
Apidepth.configuration.environment || "unknown"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Probabilistic sampling. At sample_rate 1.0 (default), always returns true.
|
|
54
|
+
# At 0.5, roughly half of events are captured. At 0.0, nothing is captured.
|
|
55
|
+
# The comparison is cheap — the rand call only happens when rate < 1.0.
|
|
56
|
+
def sampled?
|
|
57
|
+
rate = Apidepth.configuration.sample_rate
|
|
58
|
+
rate >= 1.0 || rand < rate
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def record_event(req, response, duration_ms, cold_start:)
|
|
62
|
+
vendor, normalized_path = Apidepth::VendorRegistry.identify(address, req.path)
|
|
63
|
+
return unless vendor
|
|
64
|
+
|
|
65
|
+
status = response.code.to_i
|
|
66
|
+
outcome = case status
|
|
67
|
+
when 200..299 then :success
|
|
68
|
+
when 400..499 then :client_error
|
|
69
|
+
when 500..599 then :server_error
|
|
70
|
+
else :unknown
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
now_ms = Process.clock_gettime(Process::CLOCK_REALTIME, :millisecond)
|
|
74
|
+
rl = Apidepth::RateLimitHeaders.extract(response, now_ms)
|
|
75
|
+
|
|
76
|
+
Apidepth::Collector.instance.record(
|
|
77
|
+
Apidepth::Event.build(
|
|
78
|
+
{
|
|
79
|
+
vendor: vendor,
|
|
80
|
+
endpoint: normalized_path,
|
|
81
|
+
method: req.method,
|
|
82
|
+
status: status,
|
|
83
|
+
outcome: outcome,
|
|
84
|
+
duration_ms: duration_ms,
|
|
85
|
+
cold_start: cold_start,
|
|
86
|
+
env: resolve_env,
|
|
87
|
+
ts: now_ms
|
|
88
|
+
}.merge(rl || {})
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
rescue StandardError
|
|
92
|
+
nil
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def record_timeout(req, duration_ms, error_class, cold_start:)
|
|
96
|
+
vendor, normalized_path = Apidepth::VendorRegistry.identify(address, req.path)
|
|
97
|
+
return unless vendor
|
|
98
|
+
|
|
99
|
+
Apidepth::Collector.instance.record(
|
|
100
|
+
Apidepth::Event.build(
|
|
101
|
+
vendor: vendor,
|
|
102
|
+
endpoint: normalized_path,
|
|
103
|
+
method: req.method,
|
|
104
|
+
status: nil,
|
|
105
|
+
outcome: :timeout,
|
|
106
|
+
error_class: error_class,
|
|
107
|
+
duration_ms: duration_ms,
|
|
108
|
+
cold_start: cold_start,
|
|
109
|
+
env: resolve_env,
|
|
110
|
+
ts: Process.clock_gettime(Process::CLOCK_REALTIME, :millisecond)
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
rescue StandardError
|
|
114
|
+
nil
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# lib/apidepth/railtie.rb
|
|
2
|
+
|
|
3
|
+
module Apidepth
|
|
4
|
+
class Railtie < Rails::Railtie
|
|
5
|
+
# -------------------------------------------------------------------------
|
|
6
|
+
# 1. Validate config early — loud warning beats silent 401s at flush time
|
|
7
|
+
# -------------------------------------------------------------------------
|
|
8
|
+
initializer "apidepth.validate_config", after: :load_config_initializers do
|
|
9
|
+
if Apidepth.configuration.api_key.nil?
|
|
10
|
+
Rails.logger.warn(
|
|
11
|
+
"[Apidepth] No api_key configured — events will not be delivered. " \
|
|
12
|
+
"Add `config.api_key = ENV['APIDEPTH_API_KEY']` to config/initializers/apidepth.rb"
|
|
13
|
+
)
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# -------------------------------------------------------------------------
|
|
18
|
+
# 2. Instrument Net::HTTP and load the remote vendor registry.
|
|
19
|
+
# Runs after all initializers so any gem that reopens Net::HTTP is settled.
|
|
20
|
+
# -------------------------------------------------------------------------
|
|
21
|
+
initializer "apidepth.instrument", after: :load_config_initializers do
|
|
22
|
+
Apidepth.logger = Rails.logger
|
|
23
|
+
|
|
24
|
+
# Freeze environment once so NetHTTPInstrumentation#resolve_env is a
|
|
25
|
+
# single attr_accessor read rather than a defined?/Rails.env call on
|
|
26
|
+
# every outbound HTTP request.
|
|
27
|
+
Apidepth.configuration.environment ||= Rails.env.to_s
|
|
28
|
+
|
|
29
|
+
Net::HTTP.prepend(Apidepth::NetHTTPInstrumentation)
|
|
30
|
+
Apidepth::VendorRegistry.load_extra_vendors(Apidepth.configuration.extra_vendors)
|
|
31
|
+
Apidepth::RegistryLoader.load_and_start
|
|
32
|
+
|
|
33
|
+
if Rails.env.development?
|
|
34
|
+
Rails.logger.debug(
|
|
35
|
+
"[Apidepth] Instrumentation active — " \
|
|
36
|
+
"registry=#{Apidepth::VendorRegistry.version} " \
|
|
37
|
+
"vendors=#{Apidepth::VendorRegistry.vendor_count}"
|
|
38
|
+
)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# -------------------------------------------------------------------------
|
|
43
|
+
# 3. Flush queue on graceful shutdown.
|
|
44
|
+
# at_exit fires on SIGTERM → graceful Puma/Unicorn shutdown.
|
|
45
|
+
# flush! rescues internally so a network error at shutdown is not fatal.
|
|
46
|
+
# -------------------------------------------------------------------------
|
|
47
|
+
config.after_initialize do
|
|
48
|
+
at_exit { Apidepth::Collector.instance.flush! }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# -------------------------------------------------------------------------
|
|
52
|
+
# 4. Fork safety for Puma cluster mode / Spring.
|
|
53
|
+
#
|
|
54
|
+
# after_fork: reset the Collector singleton so each worker gets a fresh
|
|
55
|
+
# instance with its own flush thread. The master's flush thread is not
|
|
56
|
+
# copied by fork() — without reset!, the worker's first call to
|
|
57
|
+
# Collector.instance returns the master's stale object with no thread.
|
|
58
|
+
#
|
|
59
|
+
# before_fork: NOT handled here — no clean Rails API exists for it.
|
|
60
|
+
# Add this to config/puma.rb to flush the master's queue before forking:
|
|
61
|
+
#
|
|
62
|
+
# before_fork { Apidepth::Collector.instance.flush! }
|
|
63
|
+
#
|
|
64
|
+
# ActiveSupport::ForkTracker is available in Rails 7.1+.
|
|
65
|
+
# -------------------------------------------------------------------------
|
|
66
|
+
config.after_initialize do
|
|
67
|
+
if defined?(ActiveSupport::ForkTracker)
|
|
68
|
+
ActiveSupport::ForkTracker.after_fork { Apidepth::Collector.reset! }
|
|
69
|
+
elsif defined?(Puma)
|
|
70
|
+
# ActiveSupport::ForkTracker requires Rails 7.1+. Without it, forked
|
|
71
|
+
# Puma workers inherit the master's stale Collector singleton with no
|
|
72
|
+
# flush thread. Events recorded in workers will never be sent.
|
|
73
|
+
# Upgrade to Rails 7.1+ or add to config/puma.rb:
|
|
74
|
+
# on_worker_boot { Apidepth::Collector.reset! }
|
|
75
|
+
Rails.logger.warn(
|
|
76
|
+
"[Apidepth] Puma detected but ActiveSupport::ForkTracker is unavailable " \
|
|
77
|
+
"(requires Rails 7.1+). Workers in cluster mode will not flush events. " \
|
|
78
|
+
"Add `on_worker_boot { Apidepth::Collector.reset! }` to config/puma.rb"
|
|
79
|
+
)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# lib/apidepth/rate_limit_headers.rb
|
|
2
|
+
#
|
|
3
|
+
# Extracts rate limit quota state from HTTP response headers and normalises
|
|
4
|
+
# them into three canonical fields:
|
|
5
|
+
#
|
|
6
|
+
# rl_remaining — requests left in the current window (integer)
|
|
7
|
+
# rl_limit — total quota for the window (integer)
|
|
8
|
+
# rl_reset_at — when the window resets, as epoch milliseconds (integer)
|
|
9
|
+
#
|
|
10
|
+
# Returns nil when no recognised headers are present so the caller can omit
|
|
11
|
+
# the fields from the event rather than sending nulls for every request.
|
|
12
|
+
#
|
|
13
|
+
# WHY in the SDK rather than the collector?
|
|
14
|
+
# Headers are only visible at the HTTP call site. By the time the event
|
|
15
|
+
# reaches the collector, only the status code and duration are known.
|
|
16
|
+
# Header extraction must happen here, inline with instrumentation.
|
|
17
|
+
#
|
|
18
|
+
# Header coverage (checked in priority order per field):
|
|
19
|
+
#
|
|
20
|
+
# OpenAI / Anthropic:
|
|
21
|
+
# x-ratelimit-remaining-requests, x-ratelimit-limit-requests
|
|
22
|
+
# x-ratelimit-reset-requests (OpenAI duration format: "1s", "20ms", "1m30s")
|
|
23
|
+
#
|
|
24
|
+
# GitHub:
|
|
25
|
+
# x-ratelimit-remaining, x-ratelimit-limit
|
|
26
|
+
# x-ratelimit-reset (Unix timestamp seconds)
|
|
27
|
+
#
|
|
28
|
+
# IETF RateLimit draft / HubSpot / Fastly / others:
|
|
29
|
+
# ratelimit-remaining, ratelimit-limit, ratelimit-reset
|
|
30
|
+
#
|
|
31
|
+
# Stripe / generic 429 fallback:
|
|
32
|
+
# retry-after (seconds from now; only meaningful on 429 responses)
|
|
33
|
+
|
|
34
|
+
module Apidepth
|
|
35
|
+
module RateLimitHeaders
|
|
36
|
+
# Ordered header names per field — first match wins.
|
|
37
|
+
REMAINING_HEADERS = %w[
|
|
38
|
+
x-ratelimit-remaining-requests
|
|
39
|
+
x-ratelimit-remaining
|
|
40
|
+
ratelimit-remaining
|
|
41
|
+
].freeze
|
|
42
|
+
|
|
43
|
+
LIMIT_HEADERS = %w[
|
|
44
|
+
x-ratelimit-limit-requests
|
|
45
|
+
x-ratelimit-limit
|
|
46
|
+
ratelimit-limit
|
|
47
|
+
].freeze
|
|
48
|
+
|
|
49
|
+
RESET_HEADERS = %w[
|
|
50
|
+
x-ratelimit-reset-requests
|
|
51
|
+
x-ratelimit-reset
|
|
52
|
+
ratelimit-reset
|
|
53
|
+
retry-after
|
|
54
|
+
].freeze
|
|
55
|
+
|
|
56
|
+
# Extract rate limit fields from a Net::HTTP::Response.
|
|
57
|
+
# Returns a Hash with :rl_remaining, :rl_limit, :rl_reset_at keys,
|
|
58
|
+
# or nil if none of the recognised headers are present.
|
|
59
|
+
def self.extract(response, now_ms)
|
|
60
|
+
remaining = find_integer(response, REMAINING_HEADERS)
|
|
61
|
+
limit = find_integer(response, LIMIT_HEADERS)
|
|
62
|
+
reset_at = find_reset_ms(response, RESET_HEADERS, now_ms)
|
|
63
|
+
|
|
64
|
+
return nil if remaining.nil? && limit.nil? && reset_at.nil?
|
|
65
|
+
|
|
66
|
+
{ rl_remaining: remaining, rl_limit: limit, rl_reset_at: reset_at }.compact
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# --- private helpers ---
|
|
70
|
+
|
|
71
|
+
def self.find_integer(response, headers)
|
|
72
|
+
headers.each do |name|
|
|
73
|
+
val = response[name]
|
|
74
|
+
next unless val
|
|
75
|
+
|
|
76
|
+
n = val.strip.to_i
|
|
77
|
+
return n if n >= 0
|
|
78
|
+
end
|
|
79
|
+
nil
|
|
80
|
+
end
|
|
81
|
+
private_class_method :find_integer
|
|
82
|
+
|
|
83
|
+
def self.find_reset_ms(response, headers, now_ms)
|
|
84
|
+
headers.each do |name|
|
|
85
|
+
val = response[name]
|
|
86
|
+
next unless val
|
|
87
|
+
|
|
88
|
+
ms = normalize_reset_ms(val.strip, now_ms)
|
|
89
|
+
return ms if ms
|
|
90
|
+
end
|
|
91
|
+
nil
|
|
92
|
+
end
|
|
93
|
+
private_class_method :find_reset_ms
|
|
94
|
+
|
|
95
|
+
# Normalise a rate limit reset value to epoch milliseconds.
|
|
96
|
+
#
|
|
97
|
+
# Handles three formats:
|
|
98
|
+
# Unix timestamp — integer > 1_000_000_000 (e.g. "1716000000")
|
|
99
|
+
# Seconds-from-now — small integer (e.g. "30" from Retry-After)
|
|
100
|
+
# OpenAI duration — string like "1s", "20ms", "1m30s", "2h"
|
|
101
|
+
def self.normalize_reset_ms(str, now_ms)
|
|
102
|
+
# Pure numeric
|
|
103
|
+
if str.match?(/\A\d+(?:\.\d+)?\z/)
|
|
104
|
+
n = str.to_f
|
|
105
|
+
return n >= 1_000_000_000 ? (n * 1_000).to_i : now_ms + (n * 1_000).to_i
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Duration string (OpenAI / Anthropic style)
|
|
109
|
+
duration_ms = parse_duration_ms(str)
|
|
110
|
+
duration_ms ? now_ms + duration_ms : nil
|
|
111
|
+
end
|
|
112
|
+
private_class_method :normalize_reset_ms
|
|
113
|
+
|
|
114
|
+
# Parse an OpenAI-style duration string to milliseconds.
|
|
115
|
+
# Handles: "1s" => 1000, "20ms" => 20, "1m30s" => 90000, "2h" => 7200000
|
|
116
|
+
def self.parse_duration_ms(str)
|
|
117
|
+
total = 0
|
|
118
|
+
found = false
|
|
119
|
+
str.scan(/(\d+(?:\.\d+)?)(h|m(?!s)|s|ms)/) do |val, unit|
|
|
120
|
+
found = true
|
|
121
|
+
total += case unit
|
|
122
|
+
when "h" then (val.to_f * 3_600_000).to_i
|
|
123
|
+
when "m" then (val.to_f * 60_000).to_i
|
|
124
|
+
when "s" then (val.to_f * 1_000).to_i
|
|
125
|
+
when "ms" then val.to_f.to_i
|
|
126
|
+
else 0
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
found && total.positive? ? total : nil
|
|
130
|
+
end
|
|
131
|
+
private_class_method :parse_duration_ms
|
|
132
|
+
end
|
|
133
|
+
end
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# lib/apidepth/registry_loader.rb
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "json"
|
|
5
|
+
require "uri"
|
|
6
|
+
|
|
7
|
+
module Apidepth
|
|
8
|
+
class RegistryLoader
|
|
9
|
+
REGISTRY_URL = "https://collector.apidepth.io/v1/registry".freeze
|
|
10
|
+
|
|
11
|
+
# Called by the Railtie after_initialize. Loads the best available
|
|
12
|
+
# registry (remote → disk cache → bundled baseline already loaded by
|
|
13
|
+
# VendorRegistry.initialize_registry) and starts the background
|
|
14
|
+
# refresh thread.
|
|
15
|
+
def self.load_and_start
|
|
16
|
+
registry = fetch_remote || load_from_disk
|
|
17
|
+
VendorRegistry.replace(registry) if registry
|
|
18
|
+
start_refresh_thread
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def self.start_refresh_thread
|
|
24
|
+
Thread.new do
|
|
25
|
+
loop do
|
|
26
|
+
sleep Apidepth.configuration.registry_refresh_interval
|
|
27
|
+
registry = fetch_remote
|
|
28
|
+
VendorRegistry.replace(registry) if registry
|
|
29
|
+
end
|
|
30
|
+
end.tap do |t|
|
|
31
|
+
t.abort_on_exception = false
|
|
32
|
+
t.name = "apidepth-registry"
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def self.fetch_remote
|
|
37
|
+
Thread.current[:apidepth_skip] = true
|
|
38
|
+
|
|
39
|
+
http = nil
|
|
40
|
+
uri = URI(REGISTRY_URL)
|
|
41
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
42
|
+
http.use_ssl = true
|
|
43
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
|
44
|
+
http.open_timeout = 3
|
|
45
|
+
http.read_timeout = 5
|
|
46
|
+
|
|
47
|
+
res = http.get(uri.path, "Authorization" => "Bearer #{Apidepth.configuration.api_key}")
|
|
48
|
+
return nil unless res.code.to_i == 200
|
|
49
|
+
|
|
50
|
+
# Ceiling on response size before parsing — a legitimate registry is ~10KB.
|
|
51
|
+
# Parsing an unbounded body could consume significant memory if the endpoint
|
|
52
|
+
# is compromised or misconfigured.
|
|
53
|
+
if res.body.bytesize > 512_000
|
|
54
|
+
Apidepth.logger&.warn("[Apidepth] Registry response too large (#{res.body.bytesize} bytes) — skipping")
|
|
55
|
+
return nil
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
registry = JSON.parse(res.body)
|
|
59
|
+
|
|
60
|
+
# Warm the disk cache so the next cold-start skips the network fetch.
|
|
61
|
+
begin
|
|
62
|
+
validate_cache_path!(Apidepth.configuration.registry_cache_path)
|
|
63
|
+
File.write(Apidepth.configuration.registry_cache_path, res.body)
|
|
64
|
+
rescue ArgumentError => e
|
|
65
|
+
Apidepth.logger&.warn("[Apidepth] Invalid registry_cache_path: #{e.message}")
|
|
66
|
+
rescue StandardError => e
|
|
67
|
+
Apidepth.logger&.warn("[Apidepth] Could not write registry cache: #{Apidepth.sanitize_log(e.message)}")
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
registry
|
|
71
|
+
rescue StandardError
|
|
72
|
+
nil
|
|
73
|
+
ensure
|
|
74
|
+
begin
|
|
75
|
+
http&.finish
|
|
76
|
+
rescue StandardError
|
|
77
|
+
nil
|
|
78
|
+
end
|
|
79
|
+
Thread.current[:apidepth_skip] = false
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def self.load_from_disk
|
|
83
|
+
path = Apidepth.configuration.registry_cache_path
|
|
84
|
+
|
|
85
|
+
validate_cache_path!(path)
|
|
86
|
+
return nil unless File.exist?(path)
|
|
87
|
+
|
|
88
|
+
JSON.parse(File.read(path))
|
|
89
|
+
rescue ArgumentError => e
|
|
90
|
+
Apidepth.logger&.warn("[Apidepth] Invalid registry_cache_path: #{e.message}")
|
|
91
|
+
nil
|
|
92
|
+
rescue StandardError => e
|
|
93
|
+
Apidepth.logger&.warn("[Apidepth] Could not read registry cache: #{Apidepth.sanitize_log(e.message)}")
|
|
94
|
+
nil
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Validates the cache path before any file operation.
|
|
98
|
+
#
|
|
99
|
+
# Requires an absolute path with no traversal segments. Without this, a
|
|
100
|
+
# misconfigured registry_cache_path like "../../etc/cron.d/apidepth" would
|
|
101
|
+
# cause us to write registry JSON into sensitive system directories.
|
|
102
|
+
# The content is our controlled JSON, but the behaviour is still wrong and
|
|
103
|
+
# surprising to audit.
|
|
104
|
+
def self.validate_cache_path!(path)
|
|
105
|
+
unless path.is_a?(String) && path.start_with?("/")
|
|
106
|
+
raise ArgumentError, "registry_cache_path must be an absolute path (got #{path.inspect})"
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
return unless path.split("/").include?("..")
|
|
110
|
+
|
|
111
|
+
raise ArgumentError, "registry_cache_path must not contain '..' traversal segments (got #{path.inspect})"
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Ruby's `private` keyword does not apply to `def self.method` — those remain
|
|
115
|
+
# public class methods regardless of placement inside a private block.
|
|
116
|
+
# private_class_method is the correct idiom.
|
|
117
|
+
private_class_method :start_refresh_thread, :fetch_remote,
|
|
118
|
+
:load_from_disk, :validate_cache_path!
|
|
119
|
+
end
|
|
120
|
+
end
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# lib/apidepth/vendor_registry.rb
|
|
2
|
+
|
|
3
|
+
module Apidepth
|
|
4
|
+
module VendorRegistry
|
|
5
|
+
BUNDLED_BASELINE = {
|
|
6
|
+
"version" => "bundled",
|
|
7
|
+
"vendors" => {
|
|
8
|
+
"stripe" => {
|
|
9
|
+
"hosts" => ["api.stripe.com"],
|
|
10
|
+
"patterns" => [
|
|
11
|
+
{ "match" => '/v1/charges/ch_\w+', "replace" => "/v1/charges/:id" },
|
|
12
|
+
{ "match" => '/v1/customers/cus_\w+', "replace" => "/v1/customers/:id" },
|
|
13
|
+
{ "match" => '/v1/payment_intents/pi_\w+', "replace" => "/v1/payment_intents/:id" },
|
|
14
|
+
{ "match" => '/v1/subscriptions/sub_\w+', "replace" => "/v1/subscriptions/:id" },
|
|
15
|
+
{ "match" => '/v1/invoices/in_\w+', "replace" => "/v1/invoices/:id" },
|
|
16
|
+
{ "match" => '/v1/refunds/re_\w+', "replace" => "/v1/refunds/:id" }
|
|
17
|
+
]
|
|
18
|
+
},
|
|
19
|
+
"openai" => {
|
|
20
|
+
"hosts" => ["api.openai.com"],
|
|
21
|
+
"patterns" => [
|
|
22
|
+
{ "match" => "/v1/chat/completions", "replace" => "/v1/chat/completions" },
|
|
23
|
+
{ "match" => "/v1/embeddings", "replace" => "/v1/embeddings" },
|
|
24
|
+
{ "match" => "/v1/images/generations", "replace" => "/v1/images/generations" },
|
|
25
|
+
{ "match" => '/v1/files/file-\w+', "replace" => "/v1/files/:id" }
|
|
26
|
+
]
|
|
27
|
+
},
|
|
28
|
+
"anthropic" => {
|
|
29
|
+
"hosts" => ["api.anthropic.com"],
|
|
30
|
+
"patterns" => [
|
|
31
|
+
{ "match" => "/v1/messages", "replace" => "/v1/messages" }
|
|
32
|
+
]
|
|
33
|
+
},
|
|
34
|
+
"twilio" => {
|
|
35
|
+
"hosts" => ["api.twilio.com"],
|
|
36
|
+
"patterns" => [
|
|
37
|
+
{ "match" => '/2010-04-01/Accounts/AC\w+/Messages/SM\w+', "replace" => "/Accounts/:id/Messages/:id" },
|
|
38
|
+
{ "match" => '/2010-04-01/Accounts/AC\w+/Messages', "replace" => "/Accounts/:id/Messages" },
|
|
39
|
+
{ "match" => '/2010-04-01/Accounts/AC\w+/Calls/CA\w+', "replace" => "/Accounts/:id/Calls/:id" },
|
|
40
|
+
{ "match" => '/2010-04-01/Accounts/AC\w+/Calls', "replace" => "/Accounts/:id/Calls" }
|
|
41
|
+
]
|
|
42
|
+
},
|
|
43
|
+
"resend" => {
|
|
44
|
+
"hosts" => ["api.resend.com"],
|
|
45
|
+
"patterns" => [
|
|
46
|
+
{ "match" => "/emails/[0-9a-f-]{36}", "replace" => "/emails/:id" }
|
|
47
|
+
]
|
|
48
|
+
},
|
|
49
|
+
"github" => {
|
|
50
|
+
"hosts" => ["api.github.com"],
|
|
51
|
+
"patterns" => [
|
|
52
|
+
{ "match" => '/repos/[^/]+/[^/]+/pulls/\d+', "replace" => "/repos/:owner/:repo/pulls/:number" },
|
|
53
|
+
{ "match" => '/repos/[^/]+/[^/]+/issues/\d+', "replace" => "/repos/:owner/:repo/issues/:number" },
|
|
54
|
+
{ "match" => "/repos/[^/]+/[^/]+", "replace" => "/repos/:owner/:repo" },
|
|
55
|
+
{ "match" => "/users/[^/]+", "replace" => "/users/:username" }
|
|
56
|
+
]
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}.freeze
|
|
60
|
+
|
|
61
|
+
GENERIC_PATTERNS = [
|
|
62
|
+
[%r{/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}}, "/:uuid"],
|
|
63
|
+
[%r{/\d{4,}}, "/:id"],
|
|
64
|
+
[%r{/[a-z0-9]{24,}}, "/:token"]
|
|
65
|
+
].freeze
|
|
66
|
+
|
|
67
|
+
class << self
|
|
68
|
+
def identify(host, raw_path)
|
|
69
|
+
hosts, patterns = @mutex.synchronize { [@hosts, @patterns] }
|
|
70
|
+
vendor = hosts[host]
|
|
71
|
+
return nil unless vendor
|
|
72
|
+
|
|
73
|
+
path = strip_query_string(raw_path)
|
|
74
|
+
path = apply_vendor_normalizers(patterns[vendor] || [], path)
|
|
75
|
+
path = apply_generic_normalizers(path)
|
|
76
|
+
[vendor, path]
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Merge customer-defined host→vendor mappings from config.extra_vendors.
|
|
80
|
+
# Called once at Railtie boot after the user's configure block has run.
|
|
81
|
+
# Does not touch @patterns — custom vendors use generic path normalization only.
|
|
82
|
+
def load_extra_vendors(extra_vendors)
|
|
83
|
+
return if extra_vendors.nil? || extra_vendors.empty?
|
|
84
|
+
|
|
85
|
+
@mutex.synchronize do
|
|
86
|
+
extra_vendors.each { |name, host| @hosts[host.to_s] = name.to_s }
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def replace(registry_json)
|
|
91
|
+
new_hosts = build_hosts(registry_json)
|
|
92
|
+
new_patterns = build_patterns(registry_json)
|
|
93
|
+
|
|
94
|
+
# Re-apply extra_vendors so a registry refresh never wipes customer-defined
|
|
95
|
+
# host mappings. The config value wins over any registry entry for the same host.
|
|
96
|
+
(Apidepth.configuration.extra_vendors || {}).each do |name, host|
|
|
97
|
+
new_hosts[host.to_s] = name.to_s
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
@mutex.synchronize do
|
|
101
|
+
@hosts = new_hosts
|
|
102
|
+
@patterns = new_patterns
|
|
103
|
+
@version = registry_json["version"]
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
Apidepth.logger&.debug(
|
|
107
|
+
"[Apidepth] Registry updated — version=#{Apidepth.sanitize_log(registry_json['version'])} " \
|
|
108
|
+
"vendors=#{new_hosts.values.uniq.count}"
|
|
109
|
+
)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def version
|
|
113
|
+
@mutex.synchronize { @version }
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def vendor_count
|
|
117
|
+
@mutex.synchronize { @hosts.values.uniq.count }
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
private
|
|
121
|
+
|
|
122
|
+
# Called at require time — Apidepth.logger is not yet defined so we
|
|
123
|
+
# can't call replace (which logs). Initialize state directly instead.
|
|
124
|
+
def initialize_registry
|
|
125
|
+
@mutex = Mutex.new
|
|
126
|
+
@version = BUNDLED_BASELINE["version"]
|
|
127
|
+
@hosts = build_hosts(BUNDLED_BASELINE)
|
|
128
|
+
@patterns = build_patterns(BUNDLED_BASELINE)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def build_hosts(registry)
|
|
132
|
+
{}.tap do |hosts|
|
|
133
|
+
(registry["vendors"] || {}).each do |slug, config|
|
|
134
|
+
(config["hosts"] || []).each { |h| hosts[h] = slug }
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def build_patterns(registry)
|
|
140
|
+
{}.tap do |patterns|
|
|
141
|
+
(registry["vendors"] || {}).each do |slug, config|
|
|
142
|
+
patterns[slug] = (config["patterns"] || []).filter_map do |rule|
|
|
143
|
+
match = rule["match"].to_s
|
|
144
|
+
|
|
145
|
+
# Block constructs that enable arbitrary code execution in some
|
|
146
|
+
# Ruby/Oniguruma versions. This is a blocklist — it does not prevent
|
|
147
|
+
# catastrophic-backtracking ReDoS (e.g. (a+)+) from a compromised
|
|
148
|
+
# registry, but legitimate path patterns never need these constructs.
|
|
149
|
+
if match.match?(/\(\?[{<!=]|\(\?#|\+\?|\*\?{2}/)
|
|
150
|
+
Apidepth.logger&.warn(
|
|
151
|
+
"[Apidepth] Skipping unsafe pattern for #{Apidepth.sanitize_log(slug)}: #{match.inspect}"
|
|
152
|
+
)
|
|
153
|
+
next
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
[Regexp.new(match), rule["replace"].to_s]
|
|
157
|
+
rescue RegexpError => e
|
|
158
|
+
Apidepth.logger&.warn(
|
|
159
|
+
"[Apidepth] Skipping invalid pattern for #{Apidepth.sanitize_log(slug)} " \
|
|
160
|
+
"#{match.inspect}: #{e.message}"
|
|
161
|
+
)
|
|
162
|
+
nil
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def strip_query_string(path)
|
|
169
|
+
path.split("?").first
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def apply_vendor_normalizers(rules, path)
|
|
173
|
+
rules.each do |pattern, replacement|
|
|
174
|
+
return path.gsub(pattern, replacement) if path.match?(pattern)
|
|
175
|
+
end
|
|
176
|
+
path
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def apply_generic_normalizers(path)
|
|
180
|
+
GENERIC_PATTERNS.reduce(path) do |p, (pattern, replacement)|
|
|
181
|
+
p.gsub(pattern, replacement)
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
initialize_registry
|
|
187
|
+
end
|
|
188
|
+
end
|