apidepth 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/apidepth/model_name_extractor.rb +25 -8
- data/lib/apidepth/vendor_registry.rb +29 -15
- data/lib/apidepth/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 69006b5a613f60527b41de94536502c71526d9e9dd274625c7f235d03624d194
|
|
4
|
+
data.tar.gz: 76941c29e08ec0c4a40e310a017f57f9c32f1c90cc37820ce794834c90ad8d15
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d5e0babbb150df6d38aa32c83df843da7d7991c8fd17be37010d1454eaef8a936b878c4efdc8eb3373d714c70e75a124a99ada7d3fa43e248c4eb464920f833c
|
|
7
|
+
data.tar.gz: fd034d845f43a8f34af26f6cf99067ec51c690168744d379d19727de2c383ea88a8e9ffd89dbdc8ffc79b6fbd2e73fa29f684b1415c7708c1c88bc78ee01f52b
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
# lib/apidepth/model_name_extractor.rb
|
|
2
|
-
require "json"
|
|
3
2
|
require "set"
|
|
4
3
|
#
|
|
5
4
|
# Extracts the model name from AI vendor JSON response bodies.
|
|
@@ -19,7 +18,12 @@ require "set"
|
|
|
19
18
|
#
|
|
20
19
|
# Streaming safety: streamed responses have Content-Type: text/event-stream, not
|
|
21
20
|
# application/json. The content-type guard exits early before any body read.
|
|
22
|
-
#
|
|
21
|
+
#
|
|
22
|
+
# Extraction strategy (RUBY-018): scan for the JSON "model": "<value>" field
|
|
23
|
+
# with a linear regex rather than JSON.parse-ing a truncated body. Embeddings
|
|
24
|
+
# and batch responses place `model` AFTER a large `data` array, so the old
|
|
25
|
+
# parse-after-8KB-truncate approach produced invalid JSON and silently dropped
|
|
26
|
+
# the model. The regex finds the first structural model field wherever it sits.
|
|
23
27
|
|
|
24
28
|
module Apidepth
|
|
25
29
|
module ModelNameExtractor
|
|
@@ -31,20 +35,33 @@ module Apidepth
|
|
|
31
35
|
api.cohere.com
|
|
32
36
|
].to_set.freeze
|
|
33
37
|
|
|
34
|
-
|
|
38
|
+
# Upper bound on how far into the body we scan for the model field. 256 KB
|
|
39
|
+
# comfortably covers realistic embeddings/batch responses (a few-input OpenAI
|
|
40
|
+
# embeddings body is ~23 KB) while bounding work on pathologically large bodies.
|
|
41
|
+
MODEL_SCAN_MAX_BYTES = 262_144
|
|
42
|
+
|
|
43
|
+
# Matches a structural JSON "model": "<value>" pair. Escaped quotes inside
|
|
44
|
+
# string values appear as \" so this never matches a "model" mentioned inside
|
|
45
|
+
# another JSON string. First match wins (the top-level model field).
|
|
46
|
+
MODEL_RE = /"model"\s*:\s*"([^"]+)"/.freeze
|
|
35
47
|
|
|
36
48
|
def self.extract(host, response)
|
|
37
49
|
return nil unless Apidepth.configuration.capture_model_names
|
|
38
|
-
|
|
50
|
+
# Case-insensitive host match (RUBY-019): DNS hostnames are case-insensitive,
|
|
51
|
+
# so a vendor declared with mixed case (e.g. via extra_vendors) still matches.
|
|
52
|
+
return nil unless AI_VENDOR_HOSTS.include?(host.to_s.downcase)
|
|
39
53
|
return nil unless response["content-type"]&.include?("application/json")
|
|
40
54
|
|
|
41
55
|
body = response.body
|
|
42
56
|
return nil if body.nil? || body.empty?
|
|
43
57
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
rescue
|
|
58
|
+
scan = body.byteslice(0, MODEL_SCAN_MAX_BYTES).to_s.dup.force_encoding("UTF-8")
|
|
59
|
+
match = MODEL_RE.match(scan)
|
|
60
|
+
match && !match[1].empty? ? match[1] : nil
|
|
61
|
+
rescue StandardError
|
|
62
|
+
# Covers malformed/invalid-encoding bodies and non-buffered streaming
|
|
63
|
+
# bodies (e.g. Net::ReadAdapter, which has no #empty?). Returning nil keeps
|
|
64
|
+
# the surrounding telemetry event intact rather than dropping it (RUBY-017).
|
|
48
65
|
nil
|
|
49
66
|
end
|
|
50
67
|
end
|
|
@@ -58,12 +58,22 @@ module Apidepth
|
|
|
58
58
|
}
|
|
59
59
|
}.freeze
|
|
60
60
|
|
|
61
|
+
# Generic fallbacks applied after vendor-specific patterns. Canonical across
|
|
62
|
+
# all SDKs (XSDK-NORM) — see apidepth-collector/tests/fixtures/endpoint_cases.json.
|
|
63
|
+
# The :token rule requires at least one digit (?=[a-z0-9]*\d) so 24+ char
|
|
64
|
+
# readable slugs are left intact while opaque IDs/tokens — which effectively
|
|
65
|
+
# always contain a digit — are collapsed. UUID is case-insensitive.
|
|
61
66
|
GENERIC_PATTERNS = [
|
|
62
|
-
[%r{/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}}, "/:uuid"],
|
|
63
|
-
[%r{/\d{4,}},
|
|
64
|
-
[%r{/[a-z0-9]{24,}}, "/:token"]
|
|
67
|
+
[%r{/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}}i, "/:uuid"],
|
|
68
|
+
[%r{/\d{4,}}, "/:id"],
|
|
69
|
+
[%r{/(?=[a-z0-9]*\d)[a-z0-9]{24,}}i, "/:token"]
|
|
65
70
|
].freeze
|
|
66
71
|
|
|
72
|
+
# Upper bound on path length we run the generic normalizers against. Realistic
|
|
73
|
+
# paths are well under 4 KB; above this we skip normalization because the
|
|
74
|
+
# :token lookahead is O(n^2) worst-case on a long digit-free alnum run.
|
|
75
|
+
GENERIC_MAX_PATH = 4096
|
|
76
|
+
|
|
67
77
|
# True when the runtime supports Regexp.timeout (introduced in Ruby 3.2).
|
|
68
78
|
# Used by apply_vendor_normalizers to enable ReDoS protection when available.
|
|
69
79
|
RUBY_GTE_3_2 = Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("3.2")
|
|
@@ -157,7 +167,13 @@ module Apidepth
|
|
|
157
167
|
next
|
|
158
168
|
end
|
|
159
169
|
|
|
160
|
-
|
|
170
|
+
# ReDoS protection: bake a per-pattern timeout into the Regexp at
|
|
171
|
+
# compile time on Ruby >= 3.2 (RUBY-020). This bounds match time for
|
|
172
|
+
# a pathological pattern from a compromised/misconfigured registry
|
|
173
|
+
# without mutating the process-global Regexp.timeout on every request
|
|
174
|
+
# (which would impose the limit on unrelated regexes in other threads).
|
|
175
|
+
compiled = RUBY_GTE_3_2 ? Regexp.new(match, timeout: 0.001) : Regexp.new(match)
|
|
176
|
+
[compiled, rule["replace"].to_s]
|
|
161
177
|
rescue RegexpError => e
|
|
162
178
|
Apidepth.logger&.warn(
|
|
163
179
|
"[Apidepth] Skipping invalid pattern for #{Apidepth.sanitize_log(slug)} " \
|
|
@@ -179,25 +195,23 @@ module Apidepth
|
|
|
179
195
|
# broader catch-alls (e.g. /v1/:resource/:id). A less-specific rule placed
|
|
180
196
|
# earlier will shadow any more-specific rules that follow it.
|
|
181
197
|
#
|
|
182
|
-
# ReDoS protection:
|
|
183
|
-
#
|
|
184
|
-
# cannot stall the request thread
|
|
185
|
-
#
|
|
198
|
+
# ReDoS protection: each compiled pattern carries its own 1ms timeout (set
|
|
199
|
+
# in build_patterns on Ruby >= 3.2), so a pathological pattern from a
|
|
200
|
+
# compromised or misconfigured registry cannot stall the request thread
|
|
201
|
+
# indefinitely — without touching the process-global Regexp.timeout
|
|
202
|
+
# (RUBY-020). On older Ruby the timeout is absent; use a trusted,
|
|
203
|
+
# internally-reviewed registry source. A Regexp::TimeoutError that trips
|
|
204
|
+
# here propagates to identify's caller, which already rescues StandardError.
|
|
186
205
|
def apply_vendor_normalizers(rules, path)
|
|
187
|
-
if RUBY_GTE_3_2
|
|
188
|
-
saved_timeout = Regexp.timeout
|
|
189
|
-
Regexp.timeout = 0.001
|
|
190
|
-
end
|
|
191
|
-
|
|
192
206
|
rules.each do |pattern, replacement|
|
|
193
207
|
return path.gsub(pattern, replacement) if path.match?(pattern)
|
|
194
208
|
end
|
|
195
209
|
path
|
|
196
|
-
ensure
|
|
197
|
-
Regexp.timeout = saved_timeout if RUBY_GTE_3_2
|
|
198
210
|
end
|
|
199
211
|
|
|
200
212
|
def apply_generic_normalizers(path)
|
|
213
|
+
return path if path.length > GENERIC_MAX_PATH
|
|
214
|
+
|
|
201
215
|
GENERIC_PATTERNS.reduce(path) do |p, (pattern, replacement)|
|
|
202
216
|
p.gsub(pattern, replacement)
|
|
203
217
|
end
|
data/lib/apidepth/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: apidepth
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Apidepth
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-06-
|
|
11
|
+
date: 2026-06-04 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: json
|