mcp 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,423 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ipaddr"
4
+ require "uri"
5
+
6
+ module MCP
7
+ class Client
8
+ module OAuth
9
+ # Stateless helpers that map MCP-authorization spec URLs and headers into something
10
+ # the `Flow` orchestrator and `MCP::Client::HTTP` transport can act on.
11
+ # The module bundles five concerns that share no state but are closely related to
12
+ # the spec's "Discovery" and "Communication Security" sections:
13
+ #
14
+ # - **`WWW-Authenticate` parsing** (`parse_www_authenticate`): pulls
15
+ # the Bearer challenge parameters (`resource_metadata`, `scope`, `error`,
16
+ # ...) out of a header that may carry multiple challenges per RFC 7235
17
+ # and may use `quoted-pair` escapes per RFC 7230 Section 3.2.6.
18
+ # - **Discovery URL builders** (`protected_resource_metadata_urls`,
19
+ # `authorization_server_metadata_urls`): list the candidate well-known
20
+ # URLs to probe when no explicit metadata URL is supplied,
21
+ # in the priority order required by RFC 9728 and RFC 8414.
22
+ # - **Communication Security check** (`secure_url?`): enforces "HTTPS only"
23
+ # for every OAuth-facing URL, with the loopback carve-out described in
24
+ # `secure_url?`'s comment.
25
+ # - **URL canonicalization** (`canonicalize_url`): normalizes scheme,
26
+ # host, port, path, percent-encoded dot segments, and fragments
27
+ # so two URLs that *refer to the same resource* compare as equal,
28
+ # and drops userinfo so credentials never reach the RFC 8707 `resource` claim
29
+ # or any error message.
30
+ # - **Resource coverage** (`resource_covers?`): decides whether a PRM `resource` URI
31
+ # is allowed to govern a given MCP server URL, i.e. whether the MCP endpoint sits
32
+ # "under" the resource per RFC 8707 audience semantics.
33
+ #
34
+ # Every entry point is a class method so it can be called from initializers and
35
+ # from any thread without synchronization.
36
+ module Discovery
37
+ # Matches a single `key=value` pair inside an HTTP auth-scheme challenge.
38
+ # `value` is either a quoted string (which can contain commas and spaces)
39
+ # or a bare token, per RFC 7235.
40
+ WWW_AUTH_PARAM_PATTERN = /\A([A-Za-z0-9_-]+)\s*=\s*(?:"((?:[^"\\]|\\.)*)"|([^\s,]+))/
41
+
42
+ class << self
43
+ # Parses a `WWW-Authenticate` header and returns the parameters of
44
+ # the `Bearer` challenge as a hash with lower-cased keys (e.g. `resource_metadata`,
45
+ # `scope`, `error`). Returns `{}` when no Bearer challenge is present.
46
+ # Handles multiple challenges (e.g. `Basic ..., Bearer ...` or `Bearer ..., DPoP ...`)
47
+ # by extracting only the Bearer parameters.
48
+ #
49
+ # - https://www.rfc-editor.org/rfc/rfc9728.html#section-5.1
50
+ # - https://www.rfc-editor.org/rfc/rfc7235.html#section-4.1
51
+ def parse_www_authenticate(header)
52
+ return {} unless header
53
+
54
+ # Locate the Bearer challenge: at the start of the header or after a comma.
55
+ bearer = header.match(/(?:\A|,)\s*Bearer(?:\s+|\z)/i)
56
+ return {} unless bearer
57
+
58
+ # Walk key=value pairs starting where Bearer's parameters begin.
59
+ # The loop stops at the first token that is not a key=value pair,
60
+ # which marks the next challenge (e.g. `, DPoP algs="..."`).
61
+ cursor = bearer.end(0)
62
+ params = {}
63
+ while cursor < header.length
64
+ prefix = header[cursor..]
65
+ prefix = prefix.sub(/\A\s*,?\s*/, "")
66
+ break if prefix.empty?
67
+
68
+ match = prefix.match(WWW_AUTH_PARAM_PATTERN)
69
+ break unless match
70
+
71
+ params[match[1].downcase] = match[2] ? unescape_quoted_pair(match[2]) : match[3]
72
+ cursor = header.length - prefix.length + match.end(0)
73
+ end
74
+ params
75
+ end
76
+
77
+ # Returns the candidate Protected Resource Metadata URLs to probe, in priority order.
78
+ # https://modelcontextprotocol.io/specification/2025-11-25/basic/authorization#protected-resource-metadata-discovery-requirements
79
+ def protected_resource_metadata_urls(server_url:, resource_metadata_url: nil)
80
+ urls = []
81
+ urls << resource_metadata_url if resource_metadata_url
82
+
83
+ uri = URI.parse(server_url)
84
+ path = uri.path == "/" ? "" : uri.path.to_s
85
+ base = base_url(uri)
86
+
87
+ urls << "#{base}/.well-known/oauth-protected-resource#{path}"
88
+ urls << "#{base}/.well-known/oauth-protected-resource"
89
+ urls.uniq
90
+ end
91
+
92
+ # Returns the candidate Authorization Server metadata URLs to probe, in priority order.
93
+ # https://modelcontextprotocol.io/specification/2025-11-25/basic/authorization#authorization-server-metadata-discovery
94
+ def authorization_server_metadata_urls(issuer_url)
95
+ uri = URI.parse(issuer_url)
96
+ path = uri.path == "/" ? "" : uri.path.to_s
97
+ base = base_url(uri)
98
+
99
+ if path.empty?
100
+ ["#{base}/.well-known/oauth-authorization-server", "#{base}/.well-known/openid-configuration"]
101
+ else
102
+ [
103
+ "#{base}/.well-known/oauth-authorization-server#{path}",
104
+ "#{base}/.well-known/openid-configuration#{path}",
105
+ "#{base}#{path}/.well-known/openid-configuration",
106
+ ]
107
+ end
108
+ end
109
+
110
+ # Returns a canonical form of `url` suitable for comparing two URIs
111
+ # that are meant to identify the same protected resource: lowercased scheme/host,
112
+ # default port stripped, fragment removed, percent-encoded dot octets normalized
113
+ # to `.` per RFC 3986 Section 6.2.2.2, dot-segments in the path resolved per
114
+ # RFC 3986 Section 5.2.4, and a single trailing `/` on the root path normalized away.
115
+ #
116
+ # Userinfo is *dropped*. The MCP authorization spec sends the canonicalized URL
117
+ # on the wire as the RFC 8707 `resource` claim and surfaces it in error messages;
118
+ # both paths would leak `user:pass@` credentials to the authorization server and
119
+ # to log destinations if we preserved them. The MCP server URI does not legitimately
120
+ # carry userinfo, so dropping it is also a no-op for normal traffic.
121
+ #
122
+ # Decoding `%2e`/`%2E` *before* dot-segment resolution is what prevents
123
+ # an attacker-supplied URL like `https://srv.example.com/api/%2e%2e/mcp` from sneaking
124
+ # past the PRM `resource` check in `resource_covers?`.
125
+ def canonicalize_url(url)
126
+ uri = URI.parse(url.to_s)
127
+
128
+ uri.fragment = nil
129
+ # `URI::Generic#userinfo=` is a no-op on Ruby 2.7 (the project's minimum supported version),
130
+ # so clear the components individually.
131
+ if uri.respond_to?(:user) && (uri.user || uri.password)
132
+ uri.user = nil
133
+ uri.password = nil
134
+ end
135
+ uri.scheme = uri.scheme.downcase if uri.scheme
136
+ uri.host = uri.host.downcase if uri.host
137
+ uri.port = nil if uri.port == uri.default_port
138
+
139
+ path = uri.path.to_s.gsub(/%2[eE]/, ".")
140
+ uri.path = remove_dot_segments(path)
141
+ uri.path = "" if uri.path == "/"
142
+
143
+ uri.query = normalize_query(uri.query)
144
+
145
+ uri.to_s
146
+ end
147
+
148
+ # Returns true when `url` is safe to use for OAuth communication per
149
+ # the MCP authorization spec's "Communication Security" requirement:
150
+ # `https` is always allowed, `http` is permitted only when the host is
151
+ # a loopback address (`localhost`, `127.0.0.0/8`, or `::1`).
152
+ #
153
+ # The loopback exception applies uniformly to every OAuth-related URL
154
+ # the SDK consumes (PRM URL, AS metadata URL, `authorization_servers`
155
+ # entries, `authorization_endpoint`, `token_endpoint`, `registration_endpoint`,
156
+ # the `redirect_uri`, and the MCP transport URL when `oauth:` is set).
157
+ # A strict reading of OAuth 2.1 reserves the loopback carve-out for
158
+ # `redirect_uri` only (per RFC 8252), but neither the Python nor
159
+ # the TypeScript MCP SDK enforces HTTPS on those endpoints either -
160
+ # and the official MCP conformance test suite drives its fixtures
161
+ # over `http://localhost` auth servers, so enforcing HTTPS for everything
162
+ # except `redirect_uri` would break local development out of the box and
163
+ # regress 16 conformance scenarios. Operators who run in production are
164
+ # expected to deploy real HTTPS endpoints; this helper does not enforce
165
+ # that at the SDK boundary.
166
+ #
167
+ # Rejects URLs that fail to parse, lack a host, or whose `http://` host is
168
+ # something like `127.attacker.com` or `foo.localhost`,
169
+ # which would otherwise pass a naive `start_with?("127.")` check.
170
+ # https://modelcontextprotocol.io/specification/2025-11-25/basic/authorization#communication-security
171
+ def secure_url?(url)
172
+ return false if url.nil? || url.to_s.empty?
173
+
174
+ uri = URI.parse(url.to_s)
175
+ return false if uri.host.nil? || uri.host.empty?
176
+
177
+ scheme = uri.scheme&.downcase
178
+ return true if scheme == "https"
179
+ return loopback_host?(uri.host) if scheme == "http"
180
+
181
+ false
182
+ rescue URI::InvalidURIError
183
+ false
184
+ end
185
+
186
+ # Like `canonicalize_url` but also strips query string, fragment, and
187
+ # userinfo. This variant is used for identity comparison against
188
+ # the request URL Faraday actually sends, which differs from the value
189
+ # the caller passed in two ways: `Faraday::Connection#url_prefix`
190
+ # drops query parameters, and Faraday hoists `user:pass@` out of
191
+ # the URL into an `Authorization: Basic` header before the request goes
192
+ # out. Including userinfo here would (a) raise a false-positive
193
+ # `InsecureURLError` on any legitimate URL with credentials in
194
+ # the authority, and (b) leak `user:pass` through the resulting error
195
+ # message - both of which would defeat the bearer-token-protection
196
+ # purpose of the identity check.
197
+ def canonicalize_origin_and_path(url)
198
+ uri = URI.parse(url.to_s)
199
+
200
+ uri.fragment = nil
201
+ uri.query = nil
202
+ # `URI::Generic#userinfo=` is a no-op on Ruby 2.7 (the project's minimum supported version),
203
+ # so clear the components individually.
204
+ if uri.respond_to?(:user) && (uri.user || uri.password)
205
+ uri.user = nil
206
+ uri.password = nil
207
+ end
208
+ uri.scheme = uri.scheme.downcase if uri.scheme
209
+ uri.host = uri.host.downcase if uri.host
210
+ uri.port = nil if uri.port == uri.default_port
211
+
212
+ path = uri.path.to_s.gsub(/%2[eE]/, ".")
213
+ uri.path = remove_dot_segments(path)
214
+ uri.path = "" if uri.path == "/"
215
+
216
+ uri.to_s
217
+ end
218
+
219
+ # Returns true when `prm` (a PRM `resource` URL) covers `server`
220
+ # (the MCP endpoint URL): same scheme/host/port, with PRM's path being
221
+ # a prefix of the server's path. When PRM also advertises a query
222
+ # string, the server's query MUST be identical to it
223
+ # (otherwise a hijacked PRM that advertises `?tenant=evil` would cover
224
+ # an MCP server at `?tenant=victim` and let the attacker mint
225
+ # a different tenant's token for the same origin + path).
226
+ # PRM with *no* query (URI#query returns `nil`) acts as a generic identifier
227
+ # over the origin + path prefix and covers any server query.
228
+ #
229
+ # An empty query (`prm_url?` -- URI#query returns `""`) is NOT
230
+ # treated as wildcard: it represents the URI literally `<...>?`,
231
+ # which is distinct from "no query at all" and from any non-empty query,
232
+ # so it must match exactly.
233
+ #
234
+ # Both arguments must already be canonicalized.
235
+ def resource_covers?(prm:, server:)
236
+ prm_uri = URI.parse(prm)
237
+ server_uri = URI.parse(server)
238
+ return false unless prm_uri.scheme == server_uri.scheme &&
239
+ prm_uri.host == server_uri.host &&
240
+ prm_uri.port == server_uri.port
241
+
242
+ prm_path = prm_uri.path.to_s
243
+ server_path = server_uri.path.to_s
244
+ prm_path = "" if prm_path == "/"
245
+ server_path = "" if server_path == "/"
246
+ path_covers = server_path == prm_path || server_path.start_with?("#{prm_path}/")
247
+ return false unless path_covers
248
+
249
+ prm_query = prm_uri.query
250
+ return true if prm_query.nil?
251
+
252
+ prm_query == server_uri.query
253
+ end
254
+
255
+ private
256
+
257
+ # Unescapes a `quoted-string` value's `quoted-pair` octets per
258
+ # RFC 7230 Section 3.2.6 (referenced from RFC 7235): `\<char>` becomes `<char>`.
259
+ # https://www.rfc-editor.org/rfc/rfc7230#section-3.2.6
260
+ def unescape_quoted_pair(value)
261
+ value.gsub(/\\(.)/, '\1')
262
+ end
263
+
264
+ # Recognizes the IPv4 loopback range (`127.0.0.0/8`), IPv6 loopback
265
+ # (`::1`, optionally bracketed by `URI.parse`), and the `localhost`
266
+ # hostname (matched exactly so that hostnames like `foo.localhost` or
267
+ # `127.attacker.com` are not treated as loopback).
268
+ IPV4_LOOPBACK_RANGE = IPAddr.new("127.0.0.0/8")
269
+ IPV6_LOOPBACK = IPAddr.new("::1")
270
+ private_constant :IPV4_LOOPBACK_RANGE, :IPV6_LOOPBACK
271
+
272
+ def loopback_host?(host)
273
+ return false if host.nil? || host.empty?
274
+
275
+ normalized = host.downcase
276
+ return true if normalized == "localhost"
277
+
278
+ ip_candidate = normalized.delete_prefix("[").delete_suffix("]")
279
+ address = parse_ip_address(ip_candidate)
280
+ return false unless address
281
+
282
+ return IPV4_LOOPBACK_RANGE.include?(address) if address.ipv4?
283
+ return address == IPV6_LOOPBACK if address.ipv6?
284
+
285
+ false
286
+ end
287
+
288
+ def parse_ip_address(candidate)
289
+ IPAddr.new(candidate)
290
+ rescue IPAddr::Error
291
+ nil
292
+ end
293
+
294
+ def base_url(uri)
295
+ port_part = uri.port && uri.port != uri.default_port ? ":#{uri.port}" : ""
296
+ "#{uri.scheme}://#{uri.host}#{port_part}"
297
+ end
298
+
299
+ # Normalizes a URL query string so two URLs that are equivalent in
300
+ # OAuth identity terms compare as equal. This is required because
301
+ # Faraday transparently rewrites `env.url` before sending a request:
302
+ #
303
+ # - Parameters get sorted by name (`?b=2&a=1` -> `?a=1&b=2`).
304
+ # - Percent-encoded hex is uppercased (`?x=%2f` -> `?x=%2F`).
305
+ # - A trailing `?` with no parameters is dropped (`?` -> no query).
306
+ # - Same-name keys are collapsed so only the last value survives
307
+ # (`?a=1&a=2` -> `?a=2`).
308
+ # - Empty-name pairs and blank `&&` separators are dropped
309
+ # (`?=v` -> no query, `?&&a=1&&` -> `?a=1`).
310
+ # - Value-less keys (`?tenant`) and empty-value keys (`?tenant=`)
311
+ # are kept distinct -- Faraday preserves the `=` exactly as
312
+ # the caller passed it. `URI.decode_www_form` / `encode_www_form`
313
+ # would collapse both to `?tenant=`, so this function does
314
+ # the parsing by hand on `&`-separated segments.
315
+ #
316
+ # Without applying the same transformation to our snapshotted URL,
317
+ # the request-time URL guard would false-positive on every URL that
318
+ # falls under one of the rules above.
319
+ #
320
+ # Returns `nil` when the resulting query is empty
321
+ # (matching Faraday's drop-empty-query behavior).
322
+ def normalize_query(query)
323
+ return if query.nil? || query.empty?
324
+
325
+ # Each segment becomes `[decoded_name, has_equals?, decoded_value_or_nil]`.
326
+ # `has_equals?` is what lets us preserve the `?key` vs `?key=`
327
+ # distinction that Faraday respects.
328
+ parsed = query.split("&").filter_map do |segment|
329
+ next if segment.empty?
330
+
331
+ name_raw, separator, value_raw = segment.partition("=")
332
+ name = URI.decode_www_form_component(name_raw)
333
+ next if name.empty?
334
+
335
+ has_equals = !separator.empty?
336
+ value = has_equals ? URI.decode_www_form_component(value_raw) : nil
337
+ [name, has_equals, value]
338
+ end
339
+
340
+ # Keys ending in `[]` (`tenant[]`, `roles[]`) are Faraday's array
341
+ # notation: the encoder preserves every occurrence in input order
342
+ # instead of collapsing them. All other keys (`tenant`, `a[b]`,
343
+ # plain scalars) are collapsed with last-write-wins semantics.
344
+ # Separating the two avoids a false negative where a hijacked
345
+ # middleware drops an entry from a `?tenant[]=victim&tenant[]=...`
346
+ # URL and slips past the guard, and a false positive on
347
+ # a legitimate `?roles[]=a&roles[]=b` URL.
348
+ array_segments = []
349
+ scalar_segments = {}
350
+ parsed.each do |name, has_equals, value|
351
+ if name.end_with?("[]")
352
+ array_segments << [name, has_equals, value]
353
+ else
354
+ scalar_segments[name] = [has_equals, value]
355
+ end
356
+ end
357
+
358
+ scalar_entries = scalar_segments.map { |name, (has_equals, value)| [name, has_equals, value] }
359
+ combined = scalar_entries + array_segments
360
+ return if combined.empty?
361
+
362
+ # Stable sort by name: array entries that share a name keep their
363
+ # original order, while scalar names are alphabetized to match
364
+ # Faraday's deterministic encoding order.
365
+ combined.each_with_index
366
+ .sort_by { |(name, _, _), index| [name, index] }
367
+ .map do |(name, has_equals, value), _index|
368
+ encoded_name = URI.encode_www_form_component(name)
369
+ if has_equals
370
+ "#{encoded_name}=#{URI.encode_www_form_component(value)}"
371
+ else
372
+ encoded_name
373
+ end
374
+ end.join("&")
375
+ end
376
+
377
+ # Implements RFC 3986 Section 5.2.4 `remove_dot_segments`. Walks the input
378
+ # buffer one segment at a time, popping the previous output segment
379
+ # whenever a `..` is encountered, so that `/api/../mcp` collapses to
380
+ # `/mcp` and `/foo/./bar` collapses to `/foo/bar`.
381
+ # https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4
382
+ def remove_dot_segments(path)
383
+ return path if path.nil? || path.empty?
384
+
385
+ input = path.dup
386
+ output = +""
387
+ until input.empty?
388
+ if input.start_with?("../")
389
+ input = input[3..]
390
+ elsif input.start_with?("./")
391
+ input = input[2..]
392
+ elsif input.start_with?("/./")
393
+ input = "/#{input[3..]}"
394
+ elsif input == "/."
395
+ input = "/"
396
+ elsif input.start_with?("/../")
397
+ input = "/#{input[4..]}"
398
+ output = remove_last_segment(output)
399
+ elsif input == "/.."
400
+ input = "/"
401
+ output = remove_last_segment(output)
402
+ elsif input == "." || input == ".."
403
+ input = ""
404
+ else
405
+ segment = input.match(%r{\A/?[^/]*})[0]
406
+ output << segment
407
+ input = input[segment.length..]
408
+ end
409
+ end
410
+ output
411
+ end
412
+
413
+ def remove_last_segment(output)
414
+ idx = output.rindex("/")
415
+ return +"" if idx.nil?
416
+
417
+ output[0...idx]
418
+ end
419
+ end
420
+ end
421
+ end
422
+ end
423
+ end