apidepth 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +40 -0
- data/lib/apidepth/configuration.rb +5 -3
- data/lib/apidepth/model_name_extractor.rb +68 -0
- data/lib/apidepth/net_http_instrumentation.rb +15 -13
- data/lib/apidepth/vendor_registry.rb +29 -15
- data/lib/apidepth/version.rb +1 -1
- data/lib/apidepth.rb +10 -8
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 69006b5a613f60527b41de94536502c71526d9e9dd274625c7f235d03624d194
|
|
4
|
+
data.tar.gz: 76941c29e08ec0c4a40e310a017f57f9c32f1c90cc37820ce794834c90ad8d15
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d5e0babbb150df6d38aa32c83df843da7d7991c8fd17be37010d1454eaef8a936b878c4efdc8eb3373d714c70e75a124a99ada7d3fa43e248c4eb464920f833c
|
|
7
|
+
data.tar.gz: fd034d845f43a8f34af26f6cf99067ec51c690168744d379d19727de2c383ea88a8e9ffd89dbdc8ffc79b6fbd2e73fa29f684b1415c7708c1c88bc78ee01f52b
|
data/README.md
CHANGED
|
@@ -56,6 +56,46 @@ Get your API key at [apidepth.io](https://apidepth.io).
|
|
|
56
56
|
|
|
57
57
|
---
|
|
58
58
|
|
|
59
|
+
## CLI
|
|
60
|
+
|
|
61
|
+
The gem ships two subcommands for setup and connectivity verification.
|
|
62
|
+
|
|
63
|
+
### `bundle exec apidepth setup`
|
|
64
|
+
|
|
65
|
+
Interactive wizard that detects your framework (Rails, Sinatra, or generic), generates the correct initializer snippet, and optionally writes it to disk.
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
bundle exec apidepth setup
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
For CI/CD pipelines, skip all prompts:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
bundle exec apidepth setup --api-key $APIDEPTH_API_KEY --no-prompt
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
| Flag | Description |
|
|
78
|
+
|---|---|
|
|
79
|
+
| `--api-key <key>` | Inject your API key into the generated snippet. |
|
|
80
|
+
| `--no-prompt` | Non-interactive mode — print snippet to stdout and exit. |
|
|
81
|
+
| `--framework <name>` | Override auto-detection (`rails`, `sinatra`, `generic`). |
|
|
82
|
+
| `--ignored-hosts <patterns>` | Comma-separated host patterns to add to `ignored_hosts` (glob wildcards supported). |
|
|
83
|
+
| `--collector-url <url>` | Override the collector URL in the generated snippet. |
|
|
84
|
+
|
|
85
|
+
### `bundle exec apidepth test`
|
|
86
|
+
|
|
87
|
+
Sends a synthetic test event to the collector and confirms the pipeline is working end-to-end. Reads `APIDEPTH_API_KEY` (and optionally `APIDEPTH_COLLECTOR_URL`) from the environment. Prints the round-trip time on success, or a per-failure-mode error message with next steps on failure.
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
bundle exec apidepth test
|
|
91
|
+
# ✓ received in 142ms
|
|
92
|
+
# Visit your dashboard: https://apidepth.io/dashboard
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Exits with code 1 on any error (bad key, unreachable, SSL failure, timeout).
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
59
99
|
## Configuration
|
|
60
100
|
|
|
61
101
|
All options with their defaults:
|
|
@@ -18,9 +18,10 @@ module Apidepth
|
|
|
18
18
|
:registry_refresh_interval,
|
|
19
19
|
:registry_cache_path,
|
|
20
20
|
:on_flush_error,
|
|
21
|
-
:environment,
|
|
22
|
-
:sample_rate,
|
|
23
|
-
:extra_vendors
|
|
21
|
+
:environment, # e.g. "production" — set by Railtie from Rails.env
|
|
22
|
+
:sample_rate, # Float 0.0–1.0, default 1.0 (100% of events captured)
|
|
23
|
+
:extra_vendors, # Hash of vendor_name => host, e.g. { "my-api" => "api.myservice.com" }
|
|
24
|
+
:capture_model_names # Boolean — read model field from AI vendor JSON responses
|
|
24
25
|
|
|
25
26
|
attr_reader :ignored_hosts, :collector_url
|
|
26
27
|
|
|
@@ -35,6 +36,7 @@ module Apidepth
|
|
|
35
36
|
@environment = nil # Railtie sets this to Rails.env at boot
|
|
36
37
|
@sample_rate = 1.0 # capture everything by default
|
|
37
38
|
@extra_vendors = {} # customer-defined host mappings
|
|
39
|
+
@capture_model_names = true # read model field from AI vendor JSON responses
|
|
38
40
|
_rebuild_ignored_hosts
|
|
39
41
|
end
|
|
40
42
|
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# lib/apidepth/model_name_extractor.rb
|
|
2
|
+
require "set"
|
|
3
|
+
#
|
|
4
|
+
# Extracts the model name from AI vendor JSON response bodies.
|
|
5
|
+
#
|
|
6
|
+
# WHY response body rather than headers?
|
|
7
|
+
# AI vendors (OpenAI, Anthropic, Gemini, Mistral, Cohere) return the active
|
|
8
|
+
# model in the response body ({"model":"claude-3-opus-20240229",...}), not in
|
|
9
|
+
# headers. This is the only reliable source.
|
|
10
|
+
#
|
|
11
|
+
# WHY only for known AI vendor hosts?
|
|
12
|
+
# Body reads add a tiny overhead. Scoping to a hard-coded allowlist keeps the
|
|
13
|
+
# hot path for non-AI vendors completely unaffected.
|
|
14
|
+
#
|
|
15
|
+
# Body safety: Net::HTTP::HTTPResponse#body memoizes after the first read.
|
|
16
|
+
# Calling it here and returning the response to the application is safe — the
|
|
17
|
+
# application receives the same cached body bytes.
|
|
18
|
+
#
|
|
19
|
+
# Streaming safety: streamed responses have Content-Type: text/event-stream, not
|
|
20
|
+
# application/json. The content-type guard exits early before any body read.
|
|
21
|
+
#
|
|
22
|
+
# Extraction strategy (RUBY-018): scan for the JSON "model": "<value>" field
|
|
23
|
+
# with a linear regex rather than JSON.parse-ing a truncated body. Embeddings
|
|
24
|
+
# and batch responses place `model` AFTER a large `data` array, so the old
|
|
25
|
+
# parse-after-8KB-truncate approach produced invalid JSON and silently dropped
|
|
26
|
+
# the model. The regex finds the first structural model field wherever it sits.
|
|
27
|
+
|
|
28
|
+
module Apidepth
|
|
29
|
+
module ModelNameExtractor
|
|
30
|
+
AI_VENDOR_HOSTS = %w[
|
|
31
|
+
api.openai.com
|
|
32
|
+
api.anthropic.com
|
|
33
|
+
generativelanguage.googleapis.com
|
|
34
|
+
api.mistral.ai
|
|
35
|
+
api.cohere.com
|
|
36
|
+
].to_set.freeze
|
|
37
|
+
|
|
38
|
+
# Upper bound on how far into the body we scan for the model field. 256 KB
|
|
39
|
+
# comfortably covers realistic embeddings/batch responses (a few-input OpenAI
|
|
40
|
+
# embeddings body is ~23 KB) while bounding work on pathologically large bodies.
|
|
41
|
+
MODEL_SCAN_MAX_BYTES = 262_144
|
|
42
|
+
|
|
43
|
+
# Matches a structural JSON "model": "<value>" pair. Escaped quotes inside
|
|
44
|
+
# string values appear as \" so this never matches a "model" mentioned inside
|
|
45
|
+
# another JSON string. First match wins (the top-level model field).
|
|
46
|
+
MODEL_RE = /"model"\s*:\s*"([^"]+)"/.freeze
|
|
47
|
+
|
|
48
|
+
def self.extract(host, response)
|
|
49
|
+
return nil unless Apidepth.configuration.capture_model_names
|
|
50
|
+
# Case-insensitive host match (RUBY-019): DNS hostnames are case-insensitive,
|
|
51
|
+
# so a vendor declared with mixed case (e.g. via extra_vendors) still matches.
|
|
52
|
+
return nil unless AI_VENDOR_HOSTS.include?(host.to_s.downcase)
|
|
53
|
+
return nil unless response["content-type"]&.include?("application/json")
|
|
54
|
+
|
|
55
|
+
body = response.body
|
|
56
|
+
return nil if body.nil? || body.empty?
|
|
57
|
+
|
|
58
|
+
scan = body.byteslice(0, MODEL_SCAN_MAX_BYTES).to_s.dup.force_encoding("UTF-8")
|
|
59
|
+
match = MODEL_RE.match(scan)
|
|
60
|
+
match && !match[1].empty? ? match[1] : nil
|
|
61
|
+
rescue StandardError
|
|
62
|
+
# Covers malformed/invalid-encoding bodies and non-buffered streaming
|
|
63
|
+
# bodies (e.g. Net::ReadAdapter, which has no #empty?). Returning nil keeps
|
|
64
|
+
# the surrounding telemetry event intact rather than dropping it (RUBY-017).
|
|
65
|
+
nil
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -72,21 +72,23 @@ module Apidepth
|
|
|
72
72
|
|
|
73
73
|
now_ms = Process.clock_gettime(Process::CLOCK_REALTIME, :millisecond)
|
|
74
74
|
rl = Apidepth::RateLimitHeaders.extract(response, now_ms)
|
|
75
|
+
model_name = Apidepth::ModelNameExtractor.extract(address, response)
|
|
76
|
+
|
|
77
|
+
event_attrs = {
|
|
78
|
+
vendor: vendor,
|
|
79
|
+
endpoint: normalized_path,
|
|
80
|
+
method: req.method,
|
|
81
|
+
status: status,
|
|
82
|
+
outcome: outcome,
|
|
83
|
+
duration_ms: duration_ms,
|
|
84
|
+
cold_start: cold_start,
|
|
85
|
+
env: resolve_env,
|
|
86
|
+
ts: now_ms
|
|
87
|
+
}.merge(rl || {})
|
|
88
|
+
event_attrs[:model_name] = model_name if model_name
|
|
75
89
|
|
|
76
90
|
Apidepth::Collector.instance.record(
|
|
77
|
-
Apidepth::Event.build(
|
|
78
|
-
{
|
|
79
|
-
vendor: vendor,
|
|
80
|
-
endpoint: normalized_path,
|
|
81
|
-
method: req.method,
|
|
82
|
-
status: status,
|
|
83
|
-
outcome: outcome,
|
|
84
|
-
duration_ms: duration_ms,
|
|
85
|
-
cold_start: cold_start,
|
|
86
|
-
env: resolve_env,
|
|
87
|
-
ts: now_ms
|
|
88
|
-
}.merge(rl || {})
|
|
89
|
-
)
|
|
91
|
+
Apidepth::Event.build(event_attrs)
|
|
90
92
|
)
|
|
91
93
|
rescue StandardError => e
|
|
92
94
|
Apidepth.logger&.debug("[Apidepth] Instrumentation error: #{e.class}: #{e.message}")
|
|
@@ -58,12 +58,22 @@ module Apidepth
|
|
|
58
58
|
}
|
|
59
59
|
}.freeze
|
|
60
60
|
|
|
61
|
+
# Generic fallbacks applied after vendor-specific patterns. Canonical across
|
|
62
|
+
# all SDKs (XSDK-NORM) — see apidepth-collector/tests/fixtures/endpoint_cases.json.
|
|
63
|
+
# The :token rule requires at least one digit (?=[a-z0-9]*\d) so 24+ char
|
|
64
|
+
# readable slugs are left intact while opaque IDs/tokens — which effectively
|
|
65
|
+
# always contain a digit — are collapsed. UUID is case-insensitive.
|
|
61
66
|
GENERIC_PATTERNS = [
|
|
62
|
-
[%r{/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}}, "/:uuid"],
|
|
63
|
-
[%r{/\d{4,}},
|
|
64
|
-
[%r{/[a-z0-9]{24,}}, "/:token"]
|
|
67
|
+
[%r{/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}}i, "/:uuid"],
|
|
68
|
+
[%r{/\d{4,}}, "/:id"],
|
|
69
|
+
[%r{/(?=[a-z0-9]*\d)[a-z0-9]{24,}}i, "/:token"]
|
|
65
70
|
].freeze
|
|
66
71
|
|
|
72
|
+
# Upper bound on path length we run the generic normalizers against. Realistic
|
|
73
|
+
# paths are well under 4 KB; above this we skip normalization because the
|
|
74
|
+
# :token lookahead is O(n^2) worst-case on a long digit-free alnum run.
|
|
75
|
+
GENERIC_MAX_PATH = 4096
|
|
76
|
+
|
|
67
77
|
# True when the runtime supports Regexp.timeout (introduced in Ruby 3.2).
|
|
68
78
|
# Used by apply_vendor_normalizers to enable ReDoS protection when available.
|
|
69
79
|
RUBY_GTE_3_2 = Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("3.2")
|
|
@@ -157,7 +167,13 @@ module Apidepth
|
|
|
157
167
|
next
|
|
158
168
|
end
|
|
159
169
|
|
|
160
|
-
|
|
170
|
+
# ReDoS protection: bake a per-pattern timeout into the Regexp at
|
|
171
|
+
# compile time on Ruby >= 3.2 (RUBY-020). This bounds match time for
|
|
172
|
+
# a pathological pattern from a compromised/misconfigured registry
|
|
173
|
+
# without mutating the process-global Regexp.timeout on every request
|
|
174
|
+
# (which would impose the limit on unrelated regexes in other threads).
|
|
175
|
+
compiled = RUBY_GTE_3_2 ? Regexp.new(match, timeout: 0.001) : Regexp.new(match)
|
|
176
|
+
[compiled, rule["replace"].to_s]
|
|
161
177
|
rescue RegexpError => e
|
|
162
178
|
Apidepth.logger&.warn(
|
|
163
179
|
"[Apidepth] Skipping invalid pattern for #{Apidepth.sanitize_log(slug)} " \
|
|
@@ -179,25 +195,23 @@ module Apidepth
|
|
|
179
195
|
# broader catch-alls (e.g. /v1/:resource/:id). A less-specific rule placed
|
|
180
196
|
# earlier will shadow any more-specific rules that follow it.
|
|
181
197
|
#
|
|
182
|
-
# ReDoS protection:
|
|
183
|
-
#
|
|
184
|
-
# cannot stall the request thread
|
|
185
|
-
#
|
|
198
|
+
# ReDoS protection: each compiled pattern carries its own 1ms timeout (set
|
|
199
|
+
# in build_patterns on Ruby >= 3.2), so a pathological pattern from a
|
|
200
|
+
# compromised or misconfigured registry cannot stall the request thread
|
|
201
|
+
# indefinitely — without touching the process-global Regexp.timeout
|
|
202
|
+
# (RUBY-020). On older Ruby the timeout is absent; use a trusted,
|
|
203
|
+
# internally-reviewed registry source. A Regexp::TimeoutError that trips
|
|
204
|
+
# here propagates to identify's caller, which already rescues StandardError.
|
|
186
205
|
def apply_vendor_normalizers(rules, path)
|
|
187
|
-
if RUBY_GTE_3_2
|
|
188
|
-
saved_timeout = Regexp.timeout
|
|
189
|
-
Regexp.timeout = 0.001
|
|
190
|
-
end
|
|
191
|
-
|
|
192
206
|
rules.each do |pattern, replacement|
|
|
193
207
|
return path.gsub(pattern, replacement) if path.match?(pattern)
|
|
194
208
|
end
|
|
195
209
|
path
|
|
196
|
-
ensure
|
|
197
|
-
Regexp.timeout = saved_timeout if RUBY_GTE_3_2
|
|
198
210
|
end
|
|
199
211
|
|
|
200
212
|
def apply_generic_normalizers(path)
|
|
213
|
+
return path if path.length > GENERIC_MAX_PATH
|
|
214
|
+
|
|
201
215
|
GENERIC_PATTERNS.reduce(path) do |p, (pattern, replacement)|
|
|
202
216
|
p.gsub(pattern, replacement)
|
|
203
217
|
end
|
data/lib/apidepth/version.rb
CHANGED
data/lib/apidepth.rb
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
# lib/apidepth.rb
|
|
2
2
|
#
|
|
3
3
|
# Main entry point. Require order matters:
|
|
4
|
-
# 1. version
|
|
5
|
-
# 2. configuration
|
|
6
|
-
# 3. vendor_registry
|
|
7
|
-
# 4. rate_limit_headers
|
|
8
|
-
# 5.
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
#
|
|
4
|
+
# 1. version — no dependencies
|
|
5
|
+
# 2. configuration — no dependencies
|
|
6
|
+
# 3. vendor_registry — no dependencies, boots from BUNDLED_BASELINE immediately
|
|
7
|
+
# 4. rate_limit_headers — no dependencies; used by net_http_instrumentation
|
|
8
|
+
# 5. model_name_extractor — no dependencies; used by net_http_instrumentation
|
|
9
|
+
# 6. net_http_instrumentation — depends on vendor_registry + collector (via lazy reference)
|
|
10
|
+
# 7. collector — depends on configuration
|
|
11
|
+
# 8. registry_loader — depends on collector + vendor_registry
|
|
12
|
+
# 9. railtie — depends on all of the above; only loaded in a Rails context
|
|
12
13
|
|
|
13
14
|
require "logger"
|
|
14
15
|
require "apidepth/version"
|
|
@@ -16,6 +17,7 @@ require "apidepth/configuration"
|
|
|
16
17
|
require "apidepth/event"
|
|
17
18
|
require "apidepth/vendor_registry"
|
|
18
19
|
require "apidepth/rate_limit_headers"
|
|
20
|
+
require "apidepth/model_name_extractor"
|
|
19
21
|
require "apidepth/net_http_instrumentation"
|
|
20
22
|
require "apidepth/collector"
|
|
21
23
|
require "apidepth/registry_loader"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: apidepth
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Apidepth
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-06-04 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: json
|
|
@@ -129,6 +129,7 @@ files:
|
|
|
129
129
|
- lib/apidepth/collector.rb
|
|
130
130
|
- lib/apidepth/configuration.rb
|
|
131
131
|
- lib/apidepth/event.rb
|
|
132
|
+
- lib/apidepth/model_name_extractor.rb
|
|
132
133
|
- lib/apidepth/net_http_instrumentation.rb
|
|
133
134
|
- lib/apidepth/railtie.rb
|
|
134
135
|
- lib/apidepth/rate_limit_headers.rb
|