apidepth 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,117 @@
1
+ # lib/apidepth/net_http_instrumentation.rb
2
+
3
+ module Apidepth
4
+ module NetHTTPInstrumentation
5
+ def request(req, body = nil, &block)
6
+ # Early exits — evaluated in order of cheapness:
7
+ # 1. Recursion guard: we're inside our own collector flush
8
+ # 2. SDK disabled entirely
9
+ # 3. Host is on the customer's ignore list
10
+ # 4. Sample rate: probabilistically skip events
11
+ return super if Thread.current[:apidepth_skip]
12
+ return super unless Apidepth.configuration.enabled
13
+ return super if Apidepth.configuration.ignored_hosts.include?(address)
14
+ return super unless sampled?
15
+
16
+ # Snapshot connection state BEFORE calling super.
17
+ # started? returns true if a keep-alive connection is already open.
18
+ # cold_start events pay for DNS + SSL — that latency belongs to the
19
+ # customer's infrastructure, not the vendor. Tag it so the collector
20
+ # can exclude cold-start events from latency percentile calculations.
21
+ cold_start = !started?
22
+
23
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
24
+
25
+ begin
26
+ response = super
27
+ duration_ms = elapsed_ms(start)
28
+ record_event(req, response, duration_ms, cold_start: cold_start)
29
+ response
30
+ rescue Net::OpenTimeout, Net::ReadTimeout => e
31
+ # Timeouts are the leading indicator of vendor degradation — they
32
+ # appear before the vendor acknowledges an incident. We record them
33
+ # and always re-raise so the customer's error handling is unaffected.
34
+ duration_ms = elapsed_ms(start)
35
+ record_timeout(req, duration_ms, e.class.name, cold_start: cold_start)
36
+ raise
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def elapsed_ms(start)
43
+ ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
44
+ end
45
+
46
+ # Environment is set once at Railtie boot (or by the customer in configure).
47
+ # Reading it here is a single attr_accessor access — no method dispatch,
48
+ # no defined?() check, no Rails.env call on every outbound HTTP request.
49
+ def resolve_env
50
+ Apidepth.configuration.environment || "unknown"
51
+ end
52
+
53
+ # Probabilistic sampling. At sample_rate 1.0 (default), always returns true.
54
+ # At 0.5, roughly half of events are captured. At 0.0, nothing is captured.
55
+ # The comparison is cheap — the rand call only happens when rate < 1.0.
56
+ def sampled?
57
+ rate = Apidepth.configuration.sample_rate
58
+ rate >= 1.0 || rand < rate
59
+ end
60
+
61
+ def record_event(req, response, duration_ms, cold_start:)
62
+ vendor, normalized_path = Apidepth::VendorRegistry.identify(address, req.path)
63
+ return unless vendor
64
+
65
+ status = response.code.to_i
66
+ outcome = case status
67
+ when 200..299 then :success
68
+ when 400..499 then :client_error
69
+ when 500..599 then :server_error
70
+ else :unknown
71
+ end
72
+
73
+ now_ms = Process.clock_gettime(Process::CLOCK_REALTIME, :millisecond)
74
+ rl = Apidepth::RateLimitHeaders.extract(response, now_ms)
75
+
76
+ Apidepth::Collector.instance.record(
77
+ Apidepth::Event.build(
78
+ {
79
+ vendor: vendor,
80
+ endpoint: normalized_path,
81
+ method: req.method,
82
+ status: status,
83
+ outcome: outcome,
84
+ duration_ms: duration_ms,
85
+ cold_start: cold_start,
86
+ env: resolve_env,
87
+ ts: now_ms
88
+ }.merge(rl || {})
89
+ )
90
+ )
91
+ rescue StandardError
92
+ nil
93
+ end
94
+
95
+ def record_timeout(req, duration_ms, error_class, cold_start:)
96
+ vendor, normalized_path = Apidepth::VendorRegistry.identify(address, req.path)
97
+ return unless vendor
98
+
99
+ Apidepth::Collector.instance.record(
100
+ Apidepth::Event.build(
101
+ vendor: vendor,
102
+ endpoint: normalized_path,
103
+ method: req.method,
104
+ status: nil,
105
+ outcome: :timeout,
106
+ error_class: error_class,
107
+ duration_ms: duration_ms,
108
+ cold_start: cold_start,
109
+ env: resolve_env,
110
+ ts: Process.clock_gettime(Process::CLOCK_REALTIME, :millisecond)
111
+ )
112
+ )
113
+ rescue StandardError
114
+ nil
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,83 @@
1
+ # lib/apidepth/railtie.rb
2
+
3
+ module Apidepth
4
+ class Railtie < Rails::Railtie
5
+ # -------------------------------------------------------------------------
6
+ # 1. Validate config early — loud warning beats silent 401s at flush time
7
+ # -------------------------------------------------------------------------
8
+ initializer "apidepth.validate_config", after: :load_config_initializers do
9
+ if Apidepth.configuration.api_key.nil?
10
+ Rails.logger.warn(
11
+ "[Apidepth] No api_key configured — events will not be delivered. " \
12
+ "Add `config.api_key = ENV['APIDEPTH_API_KEY']` to config/initializers/apidepth.rb"
13
+ )
14
+ end
15
+ end
16
+
17
+ # -------------------------------------------------------------------------
18
+ # 2. Instrument Net::HTTP and load the remote vendor registry.
19
+ # Runs after all initializers so any gem that reopens Net::HTTP is settled.
20
+ # -------------------------------------------------------------------------
21
+ initializer "apidepth.instrument", after: :load_config_initializers do
22
+ Apidepth.logger = Rails.logger
23
+
24
+ # Freeze environment once so NetHTTPInstrumentation#resolve_env is a
25
+ # single attr_accessor read rather than a defined?/Rails.env call on
26
+ # every outbound HTTP request.
27
+ Apidepth.configuration.environment ||= Rails.env.to_s
28
+
29
+ Net::HTTP.prepend(Apidepth::NetHTTPInstrumentation)
30
+ Apidepth::VendorRegistry.load_extra_vendors(Apidepth.configuration.extra_vendors)
31
+ Apidepth::RegistryLoader.load_and_start
32
+
33
+ if Rails.env.development?
34
+ Rails.logger.debug(
35
+ "[Apidepth] Instrumentation active — " \
36
+ "registry=#{Apidepth::VendorRegistry.version} " \
37
+ "vendors=#{Apidepth::VendorRegistry.vendor_count}"
38
+ )
39
+ end
40
+ end
41
+
42
+ # -------------------------------------------------------------------------
43
+ # 3. Flush queue on graceful shutdown.
44
+ # at_exit fires on SIGTERM → graceful Puma/Unicorn shutdown.
45
+ # flush! rescues internally so a network error at shutdown is not fatal.
46
+ # -------------------------------------------------------------------------
47
+ config.after_initialize do
48
+ at_exit { Apidepth::Collector.instance.flush! }
49
+ end
50
+
51
+ # -------------------------------------------------------------------------
52
+ # 4. Fork safety for Puma cluster mode / Spring.
53
+ #
54
+ # after_fork: reset the Collector singleton so each worker gets a fresh
55
+ # instance with its own flush thread. The master's flush thread is not
56
+ # copied by fork() — without reset!, the worker's first call to
57
+ # Collector.instance returns the master's stale object with no thread.
58
+ #
59
+ # before_fork: NOT handled here — no clean Rails API exists for it.
60
+ # Add this to config/puma.rb to flush the master's queue before forking:
61
+ #
62
+ # before_fork { Apidepth::Collector.instance.flush! }
63
+ #
64
+ # ActiveSupport::ForkTracker is available in Rails 7.1+.
65
+ # -------------------------------------------------------------------------
66
+ config.after_initialize do
67
+ if defined?(ActiveSupport::ForkTracker)
68
+ ActiveSupport::ForkTracker.after_fork { Apidepth::Collector.reset! }
69
+ elsif defined?(Puma)
70
+ # ActiveSupport::ForkTracker requires Rails 7.1+. Without it, forked
71
+ # Puma workers inherit the master's stale Collector singleton with no
72
+ # flush thread. Events recorded in workers will never be sent.
73
+ # Upgrade to Rails 7.1+ or add to config/puma.rb:
74
+ # on_worker_boot { Apidepth::Collector.reset! }
75
+ Rails.logger.warn(
76
+ "[Apidepth] Puma detected but ActiveSupport::ForkTracker is unavailable " \
77
+ "(requires Rails 7.1+). Workers in cluster mode will not flush events. " \
78
+ "Add `on_worker_boot { Apidepth::Collector.reset! }` to config/puma.rb"
79
+ )
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,133 @@
1
+ # lib/apidepth/rate_limit_headers.rb
2
+ #
3
+ # Extracts rate limit quota state from HTTP response headers and normalises
4
+ # them into three canonical fields:
5
+ #
6
+ # rl_remaining — requests left in the current window (integer)
7
+ # rl_limit — total quota for the window (integer)
8
+ # rl_reset_at — when the window resets, as epoch milliseconds (integer)
9
+ #
10
+ # Returns nil when no recognised headers are present so the caller can omit
11
+ # the fields from the event rather than sending nulls for every request.
12
+ #
13
+ # WHY in the SDK rather than the collector?
14
+ # Headers are only visible at the HTTP call site. By the time the event
15
+ # reaches the collector, only the status code and duration are known.
16
+ # Header extraction must happen here, inline with instrumentation.
17
+ #
18
+ # Header coverage (checked in priority order per field):
19
+ #
20
+ # OpenAI / Anthropic:
21
+ # x-ratelimit-remaining-requests, x-ratelimit-limit-requests
22
+ # x-ratelimit-reset-requests (OpenAI duration format: "1s", "20ms", "1m30s")
23
+ #
24
+ # GitHub:
25
+ # x-ratelimit-remaining, x-ratelimit-limit
26
+ # x-ratelimit-reset (Unix timestamp seconds)
27
+ #
28
+ # IETF RateLimit draft / HubSpot / Fastly / others:
29
+ # ratelimit-remaining, ratelimit-limit, ratelimit-reset
30
+ #
31
+ # Stripe / generic 429 fallback:
32
+ # retry-after (seconds from now; only meaningful on 429 responses)
33
+
34
+ module Apidepth
35
+ module RateLimitHeaders
36
+ # Ordered header names per field — first match wins.
37
+ REMAINING_HEADERS = %w[
38
+ x-ratelimit-remaining-requests
39
+ x-ratelimit-remaining
40
+ ratelimit-remaining
41
+ ].freeze
42
+
43
+ LIMIT_HEADERS = %w[
44
+ x-ratelimit-limit-requests
45
+ x-ratelimit-limit
46
+ ratelimit-limit
47
+ ].freeze
48
+
49
+ RESET_HEADERS = %w[
50
+ x-ratelimit-reset-requests
51
+ x-ratelimit-reset
52
+ ratelimit-reset
53
+ retry-after
54
+ ].freeze
55
+
56
+ # Extract rate limit fields from a Net::HTTP::Response.
57
+ # Returns a Hash with :rl_remaining, :rl_limit, :rl_reset_at keys,
58
+ # or nil if none of the recognised headers are present.
59
+ def self.extract(response, now_ms)
60
+ remaining = find_integer(response, REMAINING_HEADERS)
61
+ limit = find_integer(response, LIMIT_HEADERS)
62
+ reset_at = find_reset_ms(response, RESET_HEADERS, now_ms)
63
+
64
+ return nil if remaining.nil? && limit.nil? && reset_at.nil?
65
+
66
+ { rl_remaining: remaining, rl_limit: limit, rl_reset_at: reset_at }.compact
67
+ end
68
+
69
+ # --- private helpers ---
70
+
71
+ def self.find_integer(response, headers)
72
+ headers.each do |name|
73
+ val = response[name]
74
+ next unless val
75
+
76
+ n = val.strip.to_i
77
+ return n if n >= 0
78
+ end
79
+ nil
80
+ end
81
+ private_class_method :find_integer
82
+
83
+ def self.find_reset_ms(response, headers, now_ms)
84
+ headers.each do |name|
85
+ val = response[name]
86
+ next unless val
87
+
88
+ ms = normalize_reset_ms(val.strip, now_ms)
89
+ return ms if ms
90
+ end
91
+ nil
92
+ end
93
+ private_class_method :find_reset_ms
94
+
95
+ # Normalise a rate limit reset value to epoch milliseconds.
96
+ #
97
+ # Handles three formats:
98
+ # Unix timestamp — integer > 1_000_000_000 (e.g. "1716000000")
99
+ # Seconds-from-now — small integer (e.g. "30" from Retry-After)
100
+ # OpenAI duration — string like "1s", "20ms", "1m30s", "2h"
101
+ def self.normalize_reset_ms(str, now_ms)
102
+ # Pure numeric
103
+ if str.match?(/\A\d+(?:\.\d+)?\z/)
104
+ n = str.to_f
105
+ return n >= 1_000_000_000 ? (n * 1_000).to_i : now_ms + (n * 1_000).to_i
106
+ end
107
+
108
+ # Duration string (OpenAI / Anthropic style)
109
+ duration_ms = parse_duration_ms(str)
110
+ duration_ms ? now_ms + duration_ms : nil
111
+ end
112
+ private_class_method :normalize_reset_ms
113
+
114
+ # Parse an OpenAI-style duration string to milliseconds.
115
+ # Handles: "1s" => 1000, "20ms" => 20, "1m30s" => 90000, "2h" => 7200000
116
+ def self.parse_duration_ms(str)
117
+ total = 0
118
+ found = false
119
+ str.scan(/(\d+(?:\.\d+)?)(h|m(?!s)|s|ms)/) do |val, unit|
120
+ found = true
121
+ total += case unit
122
+ when "h" then (val.to_f * 3_600_000).to_i
123
+ when "m" then (val.to_f * 60_000).to_i
124
+ when "s" then (val.to_f * 1_000).to_i
125
+ when "ms" then val.to_f.to_i
126
+ else 0
127
+ end
128
+ end
129
+ found && total.positive? ? total : nil
130
+ end
131
+ private_class_method :parse_duration_ms
132
+ end
133
+ end
@@ -0,0 +1,120 @@
1
+ # lib/apidepth/registry_loader.rb
2
+
3
+ require "net/http"
4
+ require "json"
5
+ require "uri"
6
+
7
+ module Apidepth
8
+ class RegistryLoader
9
+ REGISTRY_URL = "https://collector.apidepth.io/v1/registry".freeze
10
+
11
+ # Called by the Railtie after_initialize. Loads the best available
12
+ # registry (remote → disk cache → bundled baseline already loaded by
13
+ # VendorRegistry.initialize_registry) and starts the background
14
+ # refresh thread.
15
+ def self.load_and_start
16
+ registry = fetch_remote || load_from_disk
17
+ VendorRegistry.replace(registry) if registry
18
+ start_refresh_thread
19
+ end
20
+
21
+ private
22
+
23
+ def self.start_refresh_thread
24
+ Thread.new do
25
+ loop do
26
+ sleep Apidepth.configuration.registry_refresh_interval
27
+ registry = fetch_remote
28
+ VendorRegistry.replace(registry) if registry
29
+ end
30
+ end.tap do |t|
31
+ t.abort_on_exception = false
32
+ t.name = "apidepth-registry"
33
+ end
34
+ end
35
+
36
+ def self.fetch_remote
37
+ Thread.current[:apidepth_skip] = true
38
+
39
+ http = nil
40
+ uri = URI(REGISTRY_URL)
41
+ http = Net::HTTP.new(uri.host, uri.port)
42
+ http.use_ssl = true
43
+ http.verify_mode = OpenSSL::SSL::VERIFY_PEER
44
+ http.open_timeout = 3
45
+ http.read_timeout = 5
46
+
47
+ res = http.get(uri.path, "Authorization" => "Bearer #{Apidepth.configuration.api_key}")
48
+ return nil unless res.code.to_i == 200
49
+
50
+ # Ceiling on response size before parsing — a legitimate registry is ~10KB.
51
+ # Parsing an unbounded body could consume significant memory if the endpoint
52
+ # is compromised or misconfigured.
53
+ if res.body.bytesize > 512_000
54
+ Apidepth.logger&.warn("[Apidepth] Registry response too large (#{res.body.bytesize} bytes) — skipping")
55
+ return nil
56
+ end
57
+
58
+ registry = JSON.parse(res.body)
59
+
60
+ # Warm the disk cache so the next cold-start skips the network fetch.
61
+ begin
62
+ validate_cache_path!(Apidepth.configuration.registry_cache_path)
63
+ File.write(Apidepth.configuration.registry_cache_path, res.body)
64
+ rescue ArgumentError => e
65
+ Apidepth.logger&.warn("[Apidepth] Invalid registry_cache_path: #{e.message}")
66
+ rescue StandardError => e
67
+ Apidepth.logger&.warn("[Apidepth] Could not write registry cache: #{Apidepth.sanitize_log(e.message)}")
68
+ end
69
+
70
+ registry
71
+ rescue StandardError
72
+ nil
73
+ ensure
74
+ begin
75
+ http&.finish
76
+ rescue StandardError
77
+ nil
78
+ end
79
+ Thread.current[:apidepth_skip] = false
80
+ end
81
+
82
+ def self.load_from_disk
83
+ path = Apidepth.configuration.registry_cache_path
84
+
85
+ validate_cache_path!(path)
86
+ return nil unless File.exist?(path)
87
+
88
+ JSON.parse(File.read(path))
89
+ rescue ArgumentError => e
90
+ Apidepth.logger&.warn("[Apidepth] Invalid registry_cache_path: #{e.message}")
91
+ nil
92
+ rescue StandardError => e
93
+ Apidepth.logger&.warn("[Apidepth] Could not read registry cache: #{Apidepth.sanitize_log(e.message)}")
94
+ nil
95
+ end
96
+
97
+ # Validates the cache path before any file operation.
98
+ #
99
+ # Requires an absolute path with no traversal segments. Without this, a
100
+ # misconfigured registry_cache_path like "../../etc/cron.d/apidepth" would
101
+ # cause us to write registry JSON into sensitive system directories.
102
+ # The content is our controlled JSON, but the behaviour is still wrong and
103
+ # surprising to audit.
104
+ def self.validate_cache_path!(path)
105
+ unless path.is_a?(String) && path.start_with?("/")
106
+ raise ArgumentError, "registry_cache_path must be an absolute path (got #{path.inspect})"
107
+ end
108
+
109
+ return unless path.split("/").include?("..")
110
+
111
+ raise ArgumentError, "registry_cache_path must not contain '..' traversal segments (got #{path.inspect})"
112
+ end
113
+
114
+ # Ruby's `private` keyword does not apply to `def self.method` — those remain
115
+ # public class methods regardless of placement inside a private block.
116
+ # private_class_method is the correct idiom.
117
+ private_class_method :start_refresh_thread, :fetch_remote,
118
+ :load_from_disk, :validate_cache_path!
119
+ end
120
+ end
@@ -0,0 +1,188 @@
1
+ # lib/apidepth/vendor_registry.rb
2
+
3
+ module Apidepth
4
+ module VendorRegistry
5
+ BUNDLED_BASELINE = {
6
+ "version" => "bundled",
7
+ "vendors" => {
8
+ "stripe" => {
9
+ "hosts" => ["api.stripe.com"],
10
+ "patterns" => [
11
+ { "match" => '/v1/charges/ch_\w+', "replace" => "/v1/charges/:id" },
12
+ { "match" => '/v1/customers/cus_\w+', "replace" => "/v1/customers/:id" },
13
+ { "match" => '/v1/payment_intents/pi_\w+', "replace" => "/v1/payment_intents/:id" },
14
+ { "match" => '/v1/subscriptions/sub_\w+', "replace" => "/v1/subscriptions/:id" },
15
+ { "match" => '/v1/invoices/in_\w+', "replace" => "/v1/invoices/:id" },
16
+ { "match" => '/v1/refunds/re_\w+', "replace" => "/v1/refunds/:id" }
17
+ ]
18
+ },
19
+ "openai" => {
20
+ "hosts" => ["api.openai.com"],
21
+ "patterns" => [
22
+ { "match" => "/v1/chat/completions", "replace" => "/v1/chat/completions" },
23
+ { "match" => "/v1/embeddings", "replace" => "/v1/embeddings" },
24
+ { "match" => "/v1/images/generations", "replace" => "/v1/images/generations" },
25
+ { "match" => '/v1/files/file-\w+', "replace" => "/v1/files/:id" }
26
+ ]
27
+ },
28
+ "anthropic" => {
29
+ "hosts" => ["api.anthropic.com"],
30
+ "patterns" => [
31
+ { "match" => "/v1/messages", "replace" => "/v1/messages" }
32
+ ]
33
+ },
34
+ "twilio" => {
35
+ "hosts" => ["api.twilio.com"],
36
+ "patterns" => [
37
+ { "match" => '/2010-04-01/Accounts/AC\w+/Messages/SM\w+', "replace" => "/Accounts/:id/Messages/:id" },
38
+ { "match" => '/2010-04-01/Accounts/AC\w+/Messages', "replace" => "/Accounts/:id/Messages" },
39
+ { "match" => '/2010-04-01/Accounts/AC\w+/Calls/CA\w+', "replace" => "/Accounts/:id/Calls/:id" },
40
+ { "match" => '/2010-04-01/Accounts/AC\w+/Calls', "replace" => "/Accounts/:id/Calls" }
41
+ ]
42
+ },
43
+ "resend" => {
44
+ "hosts" => ["api.resend.com"],
45
+ "patterns" => [
46
+ { "match" => "/emails/[0-9a-f-]{36}", "replace" => "/emails/:id" }
47
+ ]
48
+ },
49
+ "github" => {
50
+ "hosts" => ["api.github.com"],
51
+ "patterns" => [
52
+ { "match" => '/repos/[^/]+/[^/]+/pulls/\d+', "replace" => "/repos/:owner/:repo/pulls/:number" },
53
+ { "match" => '/repos/[^/]+/[^/]+/issues/\d+', "replace" => "/repos/:owner/:repo/issues/:number" },
54
+ { "match" => "/repos/[^/]+/[^/]+", "replace" => "/repos/:owner/:repo" },
55
+ { "match" => "/users/[^/]+", "replace" => "/users/:username" }
56
+ ]
57
+ }
58
+ }
59
+ }.freeze
60
+
61
+ GENERIC_PATTERNS = [
62
+ [%r{/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}}, "/:uuid"],
63
+ [%r{/\d{4,}}, "/:id"],
64
+ [%r{/[a-z0-9]{24,}}, "/:token"]
65
+ ].freeze
66
+
67
+ class << self
68
+ def identify(host, raw_path)
69
+ hosts, patterns = @mutex.synchronize { [@hosts, @patterns] }
70
+ vendor = hosts[host]
71
+ return nil unless vendor
72
+
73
+ path = strip_query_string(raw_path)
74
+ path = apply_vendor_normalizers(patterns[vendor] || [], path)
75
+ path = apply_generic_normalizers(path)
76
+ [vendor, path]
77
+ end
78
+
79
+ # Merge customer-defined host→vendor mappings from config.extra_vendors.
80
+ # Called once at Railtie boot after the user's configure block has run.
81
+ # Does not touch @patterns — custom vendors use generic path normalization only.
82
+ def load_extra_vendors(extra_vendors)
83
+ return if extra_vendors.nil? || extra_vendors.empty?
84
+
85
+ @mutex.synchronize do
86
+ extra_vendors.each { |name, host| @hosts[host.to_s] = name.to_s }
87
+ end
88
+ end
89
+
90
+ def replace(registry_json)
91
+ new_hosts = build_hosts(registry_json)
92
+ new_patterns = build_patterns(registry_json)
93
+
94
+ # Re-apply extra_vendors so a registry refresh never wipes customer-defined
95
+ # host mappings. The config value wins over any registry entry for the same host.
96
+ (Apidepth.configuration.extra_vendors || {}).each do |name, host|
97
+ new_hosts[host.to_s] = name.to_s
98
+ end
99
+
100
+ @mutex.synchronize do
101
+ @hosts = new_hosts
102
+ @patterns = new_patterns
103
+ @version = registry_json["version"]
104
+ end
105
+
106
+ Apidepth.logger&.debug(
107
+ "[Apidepth] Registry updated — version=#{Apidepth.sanitize_log(registry_json['version'])} " \
108
+ "vendors=#{new_hosts.values.uniq.count}"
109
+ )
110
+ end
111
+
112
+ def version
113
+ @mutex.synchronize { @version }
114
+ end
115
+
116
+ def vendor_count
117
+ @mutex.synchronize { @hosts.values.uniq.count }
118
+ end
119
+
120
+ private
121
+
122
+ # Called at require time — Apidepth.logger is not yet defined so we
123
+ # can't call replace (which logs). Initialize state directly instead.
124
+ def initialize_registry
125
+ @mutex = Mutex.new
126
+ @version = BUNDLED_BASELINE["version"]
127
+ @hosts = build_hosts(BUNDLED_BASELINE)
128
+ @patterns = build_patterns(BUNDLED_BASELINE)
129
+ end
130
+
131
+ def build_hosts(registry)
132
+ {}.tap do |hosts|
133
+ (registry["vendors"] || {}).each do |slug, config|
134
+ (config["hosts"] || []).each { |h| hosts[h] = slug }
135
+ end
136
+ end
137
+ end
138
+
139
+ def build_patterns(registry)
140
+ {}.tap do |patterns|
141
+ (registry["vendors"] || {}).each do |slug, config|
142
+ patterns[slug] = (config["patterns"] || []).filter_map do |rule|
143
+ match = rule["match"].to_s
144
+
145
+ # Block constructs that enable arbitrary code execution in some
146
+ # Ruby/Oniguruma versions. This is a blocklist — it does not prevent
147
+ # catastrophic-backtracking ReDoS (e.g. (a+)+) from a compromised
148
+ # registry, but legitimate path patterns never need these constructs.
149
+ if match.match?(/\(\?[{<!=]|\(\?#|\+\?|\*\?{2}/)
150
+ Apidepth.logger&.warn(
151
+ "[Apidepth] Skipping unsafe pattern for #{Apidepth.sanitize_log(slug)}: #{match.inspect}"
152
+ )
153
+ next
154
+ end
155
+
156
+ [Regexp.new(match), rule["replace"].to_s]
157
+ rescue RegexpError => e
158
+ Apidepth.logger&.warn(
159
+ "[Apidepth] Skipping invalid pattern for #{Apidepth.sanitize_log(slug)} " \
160
+ "#{match.inspect}: #{e.message}"
161
+ )
162
+ nil
163
+ end
164
+ end
165
+ end
166
+ end
167
+
168
+ def strip_query_string(path)
169
+ path.split("?").first
170
+ end
171
+
172
+ def apply_vendor_normalizers(rules, path)
173
+ rules.each do |pattern, replacement|
174
+ return path.gsub(pattern, replacement) if path.match?(pattern)
175
+ end
176
+ path
177
+ end
178
+
179
+ def apply_generic_normalizers(path)
180
+ GENERIC_PATTERNS.reduce(path) do |p, (pattern, replacement)|
181
+ p.gsub(pattern, replacement)
182
+ end
183
+ end
184
+ end
185
+
186
+ initialize_registry
187
+ end
188
+ end
@@ -0,0 +1,5 @@
1
+ # lib/apidepth/version.rb
2
+
3
+ module Apidepth
4
+ VERSION = "0.2.0".freeze
5
+ end