rubygems_mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +33 -0
- data/LICENSE.md +22 -0
- data/README.md +369 -0
- data/bin/rubygems_mcp +18 -0
- data/lib/rubygems_mcp/client.rb +912 -0
- data/lib/rubygems_mcp/server.rb +444 -0
- data/lib/rubygems_mcp/version.rb +3 -0
- data/lib/rubygems_mcp.rb +31 -0
- data/sig/rubygems_mcp.rbs +199 -0
- metadata +266 -0
|
@@ -0,0 +1,912 @@
|
|
|
1
|
+
require "uri"
|
|
2
|
+
require "net/http"
|
|
3
|
+
require "openssl"
|
|
4
|
+
require "json"
|
|
5
|
+
require "date"
|
|
6
|
+
require "nokogiri"
|
|
7
|
+
|
|
8
|
+
module RubygemsMcp
|
|
9
|
+
# RubyGems and Ruby version API client
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# client = RubygemsMcp::Client.new
|
|
13
|
+
# versions = client.get_latest_versions(["rails", "nokogiri"])
|
|
14
|
+
# all_versions = client.get_gem_versions("rails")
|
|
15
|
+
# ruby_version = client.get_latest_ruby_version
|
|
16
|
+
class Client
|
|
17
|
+
# Maximum response size (1MB) to protect against crawler protection pages
|
|
18
|
+
MAX_RESPONSE_SIZE = 1024 * 1024 # 1MB
|
|
19
|
+
|
|
20
|
+
# Custom exception for corrupted data
|
|
21
|
+
class CorruptedDataError < StandardError
|
|
22
|
+
attr_reader :original_error, :response_size
|
|
23
|
+
|
|
24
|
+
def initialize(message, original_error: nil, response_size: nil)
|
|
25
|
+
super(message)
|
|
26
|
+
@original_error = original_error
|
|
27
|
+
@response_size = response_size
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Custom exception for response size exceeded
|
|
32
|
+
class ResponseSizeExceededError < StandardError
|
|
33
|
+
attr_reader :size, :max_size
|
|
34
|
+
|
|
35
|
+
def initialize(size, max_size)
|
|
36
|
+
@size = size
|
|
37
|
+
@max_size = max_size
|
|
38
|
+
super("Response size (#{size} bytes) exceeds maximum allowed size (#{max_size} bytes). This may indicate crawler protection.")
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
RUBYGEMS_API_BASE = "https://rubygems.org/api/v1"
|
|
42
|
+
RUBY_RELEASES_URL = "https://www.ruby-lang.org/en/downloads/releases/"
|
|
43
|
+
RUBY_BRANCHES_URL = "https://www.ruby-lang.org/en/downloads/branches/"
|
|
44
|
+
|
|
45
|
+
# Simple in-memory cache with TTL
|
|
46
|
+
class Cache
|
|
47
|
+
def initialize
|
|
48
|
+
@cache = {}
|
|
49
|
+
@mutex = Mutex.new
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def get(key)
|
|
53
|
+
@mutex.synchronize do
|
|
54
|
+
entry = @cache[key]
|
|
55
|
+
return nil unless entry
|
|
56
|
+
|
|
57
|
+
if entry[:expires_at] < Time.now
|
|
58
|
+
@cache.delete(key)
|
|
59
|
+
return nil
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
entry[:value]
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def set(key, value, ttl_seconds)
|
|
67
|
+
@mutex.synchronize do
|
|
68
|
+
@cache[key] = {
|
|
69
|
+
value: value,
|
|
70
|
+
expires_at: Time.now + ttl_seconds
|
|
71
|
+
}
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def clear
|
|
76
|
+
@mutex.synchronize { @cache.clear }
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Shared cache instance
|
|
81
|
+
@cache = Cache.new
|
|
82
|
+
|
|
83
|
+
class << self
|
|
84
|
+
attr_reader :cache
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def initialize(cache_enabled: true)
|
|
88
|
+
@cache_enabled = cache_enabled
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Get latest versions for a list of gems with release dates
|
|
92
|
+
#
|
|
93
|
+
# @param gem_names [Array<String>] Array of gem names
|
|
94
|
+
# @param fields [Array<String>, nil] GraphQL-like field selection (nil = all fields)
|
|
95
|
+
# Available fields: name, version, release_date, license, built_at, prerelease, platform,
|
|
96
|
+
# ruby_version, rubygems_version, downloads_count, sha, spec_sha, requirements, metadata
|
|
97
|
+
# @return [Array<Hash>] Array of hashes with selected fields
|
|
98
|
+
def get_latest_versions(gem_names, fields: nil)
|
|
99
|
+
gem_names.map do |name|
|
|
100
|
+
versions = get_gem_versions(name, limit: 1, fields: fields)
|
|
101
|
+
latest = versions.first # Versions are sorted by version number descending
|
|
102
|
+
if latest
|
|
103
|
+
result = latest.dup
|
|
104
|
+
result[:name] = name
|
|
105
|
+
result
|
|
106
|
+
else
|
|
107
|
+
base_result = {name: name, version: nil, release_date: nil, license: nil}
|
|
108
|
+
select_fields([base_result], fields).first || base_result
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Get all versions for a single gem
|
|
114
|
+
#
|
|
115
|
+
# @param gem_name [String] Gem name
|
|
116
|
+
# @param limit [Integer, nil] Maximum number of versions to return (nil = all)
|
|
117
|
+
# @param offset [Integer] Number of versions to skip (for pagination)
|
|
118
|
+
# @param sort [Symbol] Sort order: :version_desc (default), :version_asc, :date_desc, :date_asc
|
|
119
|
+
# @param fields [Array<String>, nil] GraphQL-like field selection (nil = all fields)
|
|
120
|
+
# Available fields: version, release_date, license, built_at, prerelease, platform,
|
|
121
|
+
# ruby_version, rubygems_version, downloads_count, sha, spec_sha, requirements, metadata
|
|
122
|
+
# @return [Array<Hash>] Array of hashes with selected fields
|
|
123
|
+
def get_gem_versions(gem_name, limit: nil, offset: 0, sort: :version_desc, fields: nil)
|
|
124
|
+
cache_key = "gem_versions:#{gem_name}"
|
|
125
|
+
|
|
126
|
+
if @cache_enabled
|
|
127
|
+
cached = self.class.cache.get(cache_key)
|
|
128
|
+
if cached
|
|
129
|
+
result = apply_pagination_and_sort(cached, limit: limit, offset: offset, sort: sort)
|
|
130
|
+
return select_fields(result, fields) if fields
|
|
131
|
+
return result
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
uri = URI("#{RUBYGEMS_API_BASE}/versions/#{gem_name}.json")
|
|
136
|
+
|
|
137
|
+
response = make_request(uri)
|
|
138
|
+
|
|
139
|
+
# Validate response is an Array (get_gem_versions expects Array)
|
|
140
|
+
unless response.is_a?(Array)
|
|
141
|
+
raise CorruptedDataError.new(
|
|
142
|
+
"Invalid JSON structure: expected Array, got #{response.class}",
|
|
143
|
+
response_size: response.to_s.bytesize
|
|
144
|
+
)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
return [] if response.empty?
|
|
148
|
+
|
|
149
|
+
versions = response.map do |version_data|
|
|
150
|
+
original_version = version_data["number"]
|
|
151
|
+
next unless original_version.match?(/^\d+\.\d+\.\d+$/)
|
|
152
|
+
|
|
153
|
+
version = Gem::Version.new(original_version)
|
|
154
|
+
release_date = version_data["created_at"] ? Date.parse(version_data["created_at"]) : nil
|
|
155
|
+
built_at = version_data["built_at"] ? Date.parse(version_data["built_at"]) : nil
|
|
156
|
+
|
|
157
|
+
version_hash = {
|
|
158
|
+
version: version.to_s,
|
|
159
|
+
release_date: release_date&.iso8601,
|
|
160
|
+
built_at: built_at&.iso8601,
|
|
161
|
+
license: version_data["licenses"]&.first,
|
|
162
|
+
prerelease: version_data["prerelease"] || false,
|
|
163
|
+
platform: version_data["platform"] || "ruby",
|
|
164
|
+
ruby_version: version_data["ruby_version"],
|
|
165
|
+
rubygems_version: version_data["rubygems_version"],
|
|
166
|
+
downloads_count: version_data["downloads_count"],
|
|
167
|
+
sha: version_data["sha"],
|
|
168
|
+
spec_sha: version_data["spec_sha"],
|
|
169
|
+
requirements: version_data["requirements"] || [],
|
|
170
|
+
metadata: version_data["metadata"] || {}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
version_hash
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
versions = versions.compact
|
|
177
|
+
|
|
178
|
+
# Cache for 1 hour (gem versions don't change once published)
|
|
179
|
+
self.class.cache.set(cache_key, versions, 3600) if @cache_enabled
|
|
180
|
+
|
|
181
|
+
result = apply_pagination_and_sort(versions, limit: limit, offset: offset, sort: sort)
|
|
182
|
+
select_fields(result, fields)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Get latest Ruby version with release date
|
|
186
|
+
#
|
|
187
|
+
# @return [Hash] Hash with :version and :release_date (as ISO 8601 string)
|
|
188
|
+
def get_latest_ruby_version
|
|
189
|
+
versions = get_ruby_versions
|
|
190
|
+
versions.first || {version: nil, release_date: nil}
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Get Ruby maintenance status for all versions
|
|
194
|
+
#
|
|
195
|
+
# @return [Array<Hash>] Array of hashes with maintenance information:
|
|
196
|
+
# - :version (String) - Ruby version (e.g., "3.4", "3.3")
|
|
197
|
+
# - :status (String) - Maintenance status: "normal maintenance", "security maintenance", "eol", or "preview"
|
|
198
|
+
# - :release_date (String, nil) - Release date as ISO 8601 string
|
|
199
|
+
# - :normal_maintenance_until (String, nil) - End of normal maintenance as ISO 8601 string or "TBD"
|
|
200
|
+
# - :eol (String, nil) - End of life date as ISO 8601 string or "TBD"
|
|
201
|
+
def get_ruby_maintenance_status
|
|
202
|
+
cache_key = "ruby_maintenance_status"
|
|
203
|
+
|
|
204
|
+
if @cache_enabled
|
|
205
|
+
cached = self.class.cache.get(cache_key)
|
|
206
|
+
return cached if cached
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
uri = URI(RUBY_BRANCHES_URL)
|
|
210
|
+
response = make_request(uri, parse_html: true)
|
|
211
|
+
return [] unless response
|
|
212
|
+
|
|
213
|
+
maintenance_data = []
|
|
214
|
+
|
|
215
|
+
# Find all h3 tags that contain Ruby version numbers
|
|
216
|
+
response.css("h3").each do |h3|
|
|
217
|
+
version_match = h3.text.match(/Ruby ([\d.]+)/)
|
|
218
|
+
next unless version_match
|
|
219
|
+
|
|
220
|
+
version = version_match[1]
|
|
221
|
+
next unless version.match?(/^\d+\.\d+$/) # Match major.minor format
|
|
222
|
+
|
|
223
|
+
# Find the following paragraph with maintenance info
|
|
224
|
+
p_tag = h3.next_element
|
|
225
|
+
next unless p_tag&.name == "p"
|
|
226
|
+
|
|
227
|
+
status_text = p_tag.text
|
|
228
|
+
|
|
229
|
+
# Extract status from the "status: ..." line specifically
|
|
230
|
+
status_match = status_text.match(/status:\s*([^\n<]+)/i)
|
|
231
|
+
status_value = status_match ? status_match[1].strip.downcase : ""
|
|
232
|
+
|
|
233
|
+
# Parse status - check in order of specificity
|
|
234
|
+
status = if status_value.include?("preview")
|
|
235
|
+
"preview"
|
|
236
|
+
elsif status_value.include?("eol") || status_value.include?("end-of-life")
|
|
237
|
+
"eol"
|
|
238
|
+
elsif status_value.include?("security")
|
|
239
|
+
"security maintenance"
|
|
240
|
+
elsif status_value.include?("normal")
|
|
241
|
+
"normal maintenance"
|
|
242
|
+
else
|
|
243
|
+
"unknown"
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Parse release date
|
|
247
|
+
release_date_match = status_text.match(/release date:\s*(\d{4}-\d{2}-\d{2})/i)
|
|
248
|
+
release_date = release_date_match ? release_date_match[1] : nil
|
|
249
|
+
|
|
250
|
+
# Parse normal maintenance until
|
|
251
|
+
normal_maintenance_match = status_text.match(/normal maintenance until:\s*([^<\n]+)/i)
|
|
252
|
+
normal_maintenance_until = if normal_maintenance_match
|
|
253
|
+
date_str = normal_maintenance_match[1].strip
|
|
254
|
+
(date_str == "TBD") ? "TBD" : begin
|
|
255
|
+
Date.parse(date_str).iso8601
|
|
256
|
+
rescue
|
|
257
|
+
date_str
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Parse EOL date
|
|
262
|
+
eol_match = status_text.match(/EOL:\s*([^<\n]+)/i)
|
|
263
|
+
eol = if eol_match
|
|
264
|
+
date_str = eol_match[1].strip
|
|
265
|
+
# Handle "2027-03-31 (expected)" format
|
|
266
|
+
date_str = date_str.split("(").first.strip if date_str.include?("(")
|
|
267
|
+
(date_str == "TBD") ? "TBD" : begin
|
|
268
|
+
Date.parse(date_str).iso8601
|
|
269
|
+
rescue
|
|
270
|
+
date_str
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
maintenance_data << {
|
|
275
|
+
version: version,
|
|
276
|
+
status: status,
|
|
277
|
+
release_date: release_date,
|
|
278
|
+
normal_maintenance_until: normal_maintenance_until,
|
|
279
|
+
eol: eol
|
|
280
|
+
}
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# Sort by version descending
|
|
284
|
+
maintenance_data.sort_by { |v| Gem::Version.new(v[:version]) }.reverse
|
|
285
|
+
|
|
286
|
+
# Cache for 24 hours (maintenance status changes infrequently)
|
|
287
|
+
self.class.cache.set(cache_key, maintenance_data, 86400) if @cache_enabled
|
|
288
|
+
|
|
289
|
+
maintenance_data
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# Get all Ruby versions with release dates
|
|
293
|
+
#
|
|
294
|
+
# @param limit [Integer, nil] Maximum number of versions to return (nil = all)
|
|
295
|
+
# @param offset [Integer] Number of versions to skip (for pagination)
|
|
296
|
+
# @param sort [Symbol] Sort order: :version_desc (default), :version_asc, :date_desc, :date_asc
|
|
297
|
+
# @return [Array<Hash>] Array of hashes with :version and :release_date
|
|
298
|
+
def get_ruby_versions(limit: nil, offset: 0, sort: :version_desc)
|
|
299
|
+
cache_key = "ruby_versions"
|
|
300
|
+
|
|
301
|
+
if @cache_enabled
|
|
302
|
+
cached = self.class.cache.get(cache_key)
|
|
303
|
+
return apply_pagination_and_sort(cached, limit: limit, offset: offset, sort: sort) if cached
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
uri = URI(RUBY_RELEASES_URL)
|
|
307
|
+
|
|
308
|
+
response = make_request(uri, parse_html: true)
|
|
309
|
+
return [] unless response
|
|
310
|
+
|
|
311
|
+
versions = response.css("table.release-list tr").map do |element|
|
|
312
|
+
version_match = element.css("td:nth-child(1)").text.match(/Ruby (.+)/)
|
|
313
|
+
next if version_match.nil? || version_match[1].nil?
|
|
314
|
+
|
|
315
|
+
version_string = version_match[1].strip
|
|
316
|
+
next unless version_string.match?(/^\d+\.\d+\.\d+/)
|
|
317
|
+
|
|
318
|
+
version = Gem::Version.new(version_string)
|
|
319
|
+
release_date_text = element.css("td:nth-child(2)").text.strip
|
|
320
|
+
release_date = begin
|
|
321
|
+
Date.parse(release_date_text)
|
|
322
|
+
rescue Date::Error
|
|
323
|
+
nil
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
# Extract download URL
|
|
327
|
+
download_link = element.css("td:nth-child(3) a").first
|
|
328
|
+
download_url = download_link ? download_link["href"] : nil
|
|
329
|
+
|
|
330
|
+
# Extract release notes URL (convert relative to absolute)
|
|
331
|
+
release_notes_link = element.css("td:nth-child(4) a").first
|
|
332
|
+
release_notes_url = if release_notes_link
|
|
333
|
+
notes_href = release_notes_link["href"]
|
|
334
|
+
notes_href.start_with?("http") ? notes_href : "https://www.ruby-lang.org#{notes_href}"
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
{
|
|
338
|
+
version: version.to_s,
|
|
339
|
+
release_date: release_date,
|
|
340
|
+
download_url: download_url,
|
|
341
|
+
release_notes_url: release_notes_url
|
|
342
|
+
}
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
# Sort and convert dates to ISO 8601 strings for JSON serialization
|
|
346
|
+
versions.compact.sort_by { |v| Gem::Version.new(v[:version]) }.reverse.map do |v|
|
|
347
|
+
v[:release_date] = v[:release_date]&.iso8601
|
|
348
|
+
v
|
|
349
|
+
end
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
# Get changelog summary for a Ruby version from release notes
|
|
353
|
+
#
|
|
354
|
+
# @param version [String] Ruby version (e.g., "3.4.7")
|
|
355
|
+
# @return [Hash] Hash with :version, :release_notes_url, and :summary
|
|
356
|
+
def get_ruby_version_changelog(version)
|
|
357
|
+
# First get the release notes URL for this version
|
|
358
|
+
versions = get_ruby_versions
|
|
359
|
+
version_data = versions.find { |v| v[:version] == version }
|
|
360
|
+
return {version: version, release_notes_url: nil, summary: nil, error: "Version not found"} unless version_data
|
|
361
|
+
|
|
362
|
+
release_notes_url = version_data[:release_notes_url]
|
|
363
|
+
return {version: version, release_notes_url: nil, summary: nil, error: "No release notes available"} unless release_notes_url
|
|
364
|
+
|
|
365
|
+
cache_key = "ruby_changelog:#{version}"
|
|
366
|
+
|
|
367
|
+
if @cache_enabled
|
|
368
|
+
cached = self.class.cache.get(cache_key)
|
|
369
|
+
return cached if cached
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
uri = URI(release_notes_url)
|
|
373
|
+
response = make_request(uri, parse_html: true)
|
|
374
|
+
return {version: version, release_notes_url: release_notes_url, summary: nil, error: "Failed to fetch release notes"} unless response
|
|
375
|
+
|
|
376
|
+
# Extract the main content - typically in a div with class "content" or "entry-content"
|
|
377
|
+
# Try multiple selectors to find the main content
|
|
378
|
+
content = response.css("div.content, div.entry-content, article, main").first || response.css("body").first
|
|
379
|
+
|
|
380
|
+
if content
|
|
381
|
+
# Extract text, remove excessive whitespace, and get first few paragraphs
|
|
382
|
+
text = content.text.strip
|
|
383
|
+
# Split into paragraphs and take first 3-5 meaningful ones
|
|
384
|
+
paragraphs = text.split(/\n\n+/).reject { |p| p.strip.length < 50 }
|
|
385
|
+
summary = paragraphs.first(5).join("\n\n").strip
|
|
386
|
+
|
|
387
|
+
# Limit summary length
|
|
388
|
+
summary = summary[0..2000] + "..." if summary.length > 2000
|
|
389
|
+
else
|
|
390
|
+
summary = nil
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
result = {
|
|
394
|
+
version: version,
|
|
395
|
+
release_notes_url: release_notes_url,
|
|
396
|
+
summary: summary
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
# Cache for 24 hours
|
|
400
|
+
self.class.cache.set(cache_key, result, 86400) if @cache_enabled
|
|
401
|
+
|
|
402
|
+
result
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
# Get reverse dependencies (gems that depend on this gem)
|
|
406
|
+
#
|
|
407
|
+
# @param gem_name [String] Gem name
|
|
408
|
+
# @return [Array<String>] Array of gem names that depend on this gem
|
|
409
|
+
def get_gem_reverse_dependencies(gem_name)
|
|
410
|
+
cache_key = "gem_reverse_deps:#{gem_name}"
|
|
411
|
+
|
|
412
|
+
if @cache_enabled
|
|
413
|
+
cached = self.class.cache.get(cache_key)
|
|
414
|
+
return cached if cached
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
uri = URI("#{RUBYGEMS_API_BASE}/gems/#{gem_name}/reverse_dependencies.json")
|
|
418
|
+
|
|
419
|
+
response = make_request(uri)
|
|
420
|
+
return [] unless response.is_a?(Array)
|
|
421
|
+
|
|
422
|
+
# Cache for 1 hour
|
|
423
|
+
self.class.cache.set(cache_key, response, 3600) if @cache_enabled
|
|
424
|
+
|
|
425
|
+
response
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
# Get download statistics for a specific gem version
|
|
429
|
+
#
|
|
430
|
+
# @param gem_name [String] Gem name
|
|
431
|
+
# @param version [String] Gem version (e.g., "1.0.0")
|
|
432
|
+
# @return [Hash] Hash with :version_downloads and :total_downloads
|
|
433
|
+
def get_gem_version_downloads(gem_name, version)
|
|
434
|
+
cache_key = "gem_downloads:#{gem_name}:#{version}"
|
|
435
|
+
|
|
436
|
+
if @cache_enabled
|
|
437
|
+
cached = self.class.cache.get(cache_key)
|
|
438
|
+
return cached if cached
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
uri = URI("#{RUBYGEMS_API_BASE}/downloads/#{gem_name}-#{version}.json")
|
|
442
|
+
|
|
443
|
+
response = make_request(uri)
|
|
444
|
+
return {version_downloads: nil, total_downloads: nil} unless response.is_a?(Hash)
|
|
445
|
+
|
|
446
|
+
result = {
|
|
447
|
+
gem_name: gem_name,
|
|
448
|
+
version: version,
|
|
449
|
+
version_downloads: response["version_downloads"],
|
|
450
|
+
total_downloads: response["total_downloads"]
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
# Cache for 1 hour
|
|
454
|
+
self.class.cache.set(cache_key, result, 3600) if @cache_enabled
|
|
455
|
+
|
|
456
|
+
result
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
# Get latest gems (most recently added)
|
|
460
|
+
#
|
|
461
|
+
# @param limit [Integer, nil] Maximum number of gems to return (default: 30, max: 50)
|
|
462
|
+
# @return [Array<Hash>] Array of gem information
|
|
463
|
+
def get_latest_gems(limit: 30)
|
|
464
|
+
limit = [limit || 30, 50].min # API returns max 50
|
|
465
|
+
cache_key = "latest_gems:#{limit}"
|
|
466
|
+
|
|
467
|
+
if @cache_enabled
|
|
468
|
+
cached = self.class.cache.get(cache_key)
|
|
469
|
+
return cached if cached
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
uri = URI("#{RUBYGEMS_API_BASE}/activity/latest.json")
|
|
473
|
+
|
|
474
|
+
response = make_request(uri)
|
|
475
|
+
return [] unless response.is_a?(Array)
|
|
476
|
+
|
|
477
|
+
gems = response.first(limit).map do |gem_data|
|
|
478
|
+
{
|
|
479
|
+
name: gem_data["name"],
|
|
480
|
+
version: gem_data["version"],
|
|
481
|
+
downloads: gem_data["downloads"],
|
|
482
|
+
info: gem_data["info"],
|
|
483
|
+
authors: gem_data["authors"],
|
|
484
|
+
homepage: gem_data["homepage_uri"],
|
|
485
|
+
source_code: gem_data["source_code_uri"],
|
|
486
|
+
documentation: gem_data["documentation_uri"],
|
|
487
|
+
licenses: gem_data["licenses"] || []
|
|
488
|
+
}
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
# Cache for 15 minutes (activity changes frequently)
|
|
492
|
+
self.class.cache.set(cache_key, gems, 900) if @cache_enabled
|
|
493
|
+
|
|
494
|
+
gems
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
# Get recently updated gems
|
|
498
|
+
#
|
|
499
|
+
# @param limit [Integer, nil] Maximum number of gems to return (default: 30, max: 50)
|
|
500
|
+
# @return [Array<Hash>] Array of gem version information
|
|
501
|
+
def get_recently_updated_gems(limit: 30)
|
|
502
|
+
limit = [limit || 30, 50].min # API returns max 50
|
|
503
|
+
cache_key = "recently_updated_gems:#{limit}"
|
|
504
|
+
|
|
505
|
+
if @cache_enabled
|
|
506
|
+
cached = self.class.cache.get(cache_key)
|
|
507
|
+
return cached if cached
|
|
508
|
+
end
|
|
509
|
+
|
|
510
|
+
uri = URI("#{RUBYGEMS_API_BASE}/activity/just_updated.json")
|
|
511
|
+
|
|
512
|
+
response = make_request(uri)
|
|
513
|
+
return [] unless response.is_a?(Array)
|
|
514
|
+
|
|
515
|
+
gems = response.first(limit).map do |gem_data|
|
|
516
|
+
{
|
|
517
|
+
name: gem_data["name"],
|
|
518
|
+
version: gem_data["version"],
|
|
519
|
+
downloads: gem_data["downloads"],
|
|
520
|
+
version_downloads: gem_data["version_downloads"],
|
|
521
|
+
info: gem_data["info"],
|
|
522
|
+
authors: gem_data["authors"],
|
|
523
|
+
homepage: gem_data["homepage_uri"],
|
|
524
|
+
source_code: gem_data["source_code_uri"],
|
|
525
|
+
documentation: gem_data["documentation_uri"],
|
|
526
|
+
licenses: gem_data["licenses"] || [],
|
|
527
|
+
created_at: gem_data["created_at"]
|
|
528
|
+
}
|
|
529
|
+
end
|
|
530
|
+
|
|
531
|
+
# Cache for 15 minutes (activity changes frequently)
|
|
532
|
+
self.class.cache.set(cache_key, gems, 900) if @cache_enabled
|
|
533
|
+
|
|
534
|
+
gems
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
# Get changelog summary for a gem from its changelog_uri
|
|
538
|
+
#
|
|
539
|
+
# @param gem_name [String] Gem name
|
|
540
|
+
# @param version [String, nil] Gem version (optional, uses latest if not provided)
|
|
541
|
+
# @return [Hash] Hash with :gem_name, :version, :changelog_uri, and :summary
|
|
542
|
+
def get_gem_changelog(gem_name, version: nil)
|
|
543
|
+
# Get gem info to find changelog_uri
|
|
544
|
+
gem_info = get_gem_info(gem_name)
|
|
545
|
+
return {gem_name: gem_name, version: nil, changelog_uri: nil, summary: nil, error: "Gem not found"} if gem_info.empty?
|
|
546
|
+
|
|
547
|
+
version ||= gem_info[:version]
|
|
548
|
+
changelog_uri = gem_info[:changelog_uri]
|
|
549
|
+
|
|
550
|
+
return {gem_name: gem_name, version: version, changelog_uri: nil, summary: nil, error: "No changelog URI available"} unless changelog_uri
|
|
551
|
+
|
|
552
|
+
cache_key = "gem_changelog:#{gem_name}:#{version}"
|
|
553
|
+
|
|
554
|
+
if @cache_enabled
|
|
555
|
+
cached = self.class.cache.get(cache_key)
|
|
556
|
+
return cached if cached
|
|
557
|
+
end
|
|
558
|
+
|
|
559
|
+
uri = URI(changelog_uri)
|
|
560
|
+
response = make_request(uri, parse_html: true)
|
|
561
|
+
return {gem_name: gem_name, version: version, changelog_uri: changelog_uri, summary: nil, error: "Failed to fetch changelog"} unless response
|
|
562
|
+
|
|
563
|
+
# Extract the main content - try GitHub release page first, then generic selectors
|
|
564
|
+
content = if changelog_uri.include?("github.com") && changelog_uri.include?("/releases/")
|
|
565
|
+
# GitHub release page - look for release notes in markdown-body or release notes section
|
|
566
|
+
response.css(".markdown-body, .release-body, [data-testid='release-body']").first ||
|
|
567
|
+
response.css("div.repository-content, article").first
|
|
568
|
+
else
|
|
569
|
+
# Generic changelog page
|
|
570
|
+
response.css("div.content, div.entry-content, article, main, .markdown-body").first
|
|
571
|
+
end
|
|
572
|
+
|
|
573
|
+
content ||= response.css("body").first
|
|
574
|
+
|
|
575
|
+
summary = if content
|
|
576
|
+
text = content.text.strip
|
|
577
|
+
# Remove common navigation/header text patterns
|
|
578
|
+
text = text.gsub(/Notifications.*?signed in.*?reload/im, "")
|
|
579
|
+
text = text.gsub(/You must be signed in.*?reload/im, "")
|
|
580
|
+
text = text.gsub(/There was an error.*?reload/im, "")
|
|
581
|
+
|
|
582
|
+
# Split into paragraphs and take first 5-10 meaningful ones
|
|
583
|
+
# Try splitting by double newlines first, then by single newlines if that doesn't work
|
|
584
|
+
paragraphs = if text.include?("\n\n")
|
|
585
|
+
text.split(/\n\n+/)
|
|
586
|
+
else
|
|
587
|
+
text.split(/\n+/)
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
paragraphs = paragraphs.reject { |p|
|
|
591
|
+
p.strip.length < 30 ||
|
|
592
|
+
p.match?(/^(rails|Notifications|You must|There was)/i) ||
|
|
593
|
+
p.match?(/^\/\s*$/)
|
|
594
|
+
}
|
|
595
|
+
summary_text = paragraphs.first(10).join("\n\n").strip
|
|
596
|
+
|
|
597
|
+
# Limit summary length
|
|
598
|
+
summary_text = summary_text[0..3000] + "..." if summary_text.length > 3000
|
|
599
|
+
summary_text.empty? ? nil : summary_text
|
|
600
|
+
end
|
|
601
|
+
|
|
602
|
+
result = {
|
|
603
|
+
gem_name: gem_name,
|
|
604
|
+
version: version,
|
|
605
|
+
changelog_uri: changelog_uri,
|
|
606
|
+
summary: summary
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
# Cache for 24 hours
|
|
610
|
+
self.class.cache.set(cache_key, result, 86400) if @cache_enabled
|
|
611
|
+
|
|
612
|
+
result
|
|
613
|
+
end
|
|
614
|
+
|
|
615
|
+
# Get gem information (summary, homepage, etc.)
|
|
616
|
+
#
|
|
617
|
+
# @param gem_name [String] Gem name
|
|
618
|
+
# @param fields [Array<String>, nil] GraphQL-like field selection (nil = all fields)
|
|
619
|
+
# Available fields: name, version, summary, description, homepage, source_code,
|
|
620
|
+
# documentation, licenses, authors, info, downloads, version_downloads, yanked,
|
|
621
|
+
# dependencies, changelog_uri, funding_uri, platform, sha, spec_sha, metadata
|
|
622
|
+
# @return [Hash] Hash with selected gem information
|
|
623
|
+
def get_gem_info(gem_name, fields: nil)
|
|
624
|
+
cache_key = "gem_info:#{gem_name}"
|
|
625
|
+
|
|
626
|
+
if @cache_enabled
|
|
627
|
+
cached = self.class.cache.get(cache_key)
|
|
628
|
+
if cached
|
|
629
|
+
return select_fields([cached], fields).first if fields
|
|
630
|
+
return cached
|
|
631
|
+
end
|
|
632
|
+
end
|
|
633
|
+
|
|
634
|
+
uri = URI("#{RUBYGEMS_API_BASE}/gems/#{gem_name}.json")
|
|
635
|
+
|
|
636
|
+
response = make_request(uri)
|
|
637
|
+
return {} unless response.is_a?(Hash)
|
|
638
|
+
|
|
639
|
+
gem_info = {
|
|
640
|
+
name: response["name"],
|
|
641
|
+
version: response["version"],
|
|
642
|
+
summary: response["summary"] || response["info"],
|
|
643
|
+
description: response["description"],
|
|
644
|
+
homepage: response["homepage_uri"],
|
|
645
|
+
source_code: response["source_code_uri"],
|
|
646
|
+
documentation: response["documentation_uri"],
|
|
647
|
+
licenses: response["licenses"] || [],
|
|
648
|
+
authors: response["authors"],
|
|
649
|
+
info: response["info"],
|
|
650
|
+
downloads: response["downloads"],
|
|
651
|
+
version_downloads: response["version_downloads"],
|
|
652
|
+
yanked: response["yanked"] || false,
|
|
653
|
+
dependencies: response["dependencies"] || {runtime: [], development: []},
|
|
654
|
+
changelog_uri: response["changelog_uri"] || response.dig("metadata", "changelog_uri"),
|
|
655
|
+
funding_uri: response["funding_uri"] || response.dig("metadata", "funding_uri"),
|
|
656
|
+
platform: response["platform"] || "ruby",
|
|
657
|
+
sha: response["sha"],
|
|
658
|
+
spec_sha: response["spec_sha"],
|
|
659
|
+
metadata: response["metadata"] || {}
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
# Cache for 1 hour
|
|
663
|
+
self.class.cache.set(cache_key, gem_info, 3600) if @cache_enabled
|
|
664
|
+
|
|
665
|
+
select_fields([gem_info], fields).first || gem_info
|
|
666
|
+
end
|
|
667
|
+
|
|
668
|
+
# Search for gems by name
|
|
669
|
+
#
|
|
670
|
+
# @param query [String] Search query
|
|
671
|
+
# @param limit [Integer, nil] Maximum number of results to return (nil = all)
|
|
672
|
+
# @param offset [Integer] Number of results to skip (for pagination)
|
|
673
|
+
# @return [Array<Hash>] Array of hashes with gem information
|
|
674
|
+
def search_gems(query, limit: nil, offset: 0)
|
|
675
|
+
# Don't cache search results as they can change frequently
|
|
676
|
+
uri = URI("#{RUBYGEMS_API_BASE}/search.json")
|
|
677
|
+
uri.query = URI.encode_www_form(query: query)
|
|
678
|
+
|
|
679
|
+
response = make_request(uri)
|
|
680
|
+
return [] unless response.is_a?(Array)
|
|
681
|
+
|
|
682
|
+
results = response.map do |gem_data|
|
|
683
|
+
{
|
|
684
|
+
name: gem_data["name"],
|
|
685
|
+
version: gem_data["version"],
|
|
686
|
+
info: gem_data["info"],
|
|
687
|
+
homepage: gem_data["homepage_uri"],
|
|
688
|
+
source_code: gem_data["source_code_uri"],
|
|
689
|
+
documentation: gem_data["documentation_uri"]
|
|
690
|
+
}
|
|
691
|
+
end
|
|
692
|
+
|
|
693
|
+
# Apply pagination
|
|
694
|
+
results = results[offset..] if offset > 0
|
|
695
|
+
results = results.first(limit) if limit
|
|
696
|
+
results
|
|
697
|
+
end
|
|
698
|
+
|
|
699
|
+
private
|
|
700
|
+
|
|
701
|
+
# Apply pagination and sorting to a version array
|
|
702
|
+
#
|
|
703
|
+
# @param versions [Array<Hash>] Array of version hashes
|
|
704
|
+
# @param limit [Integer, nil] Maximum number of versions to return
|
|
705
|
+
# @param offset [Integer] Number of versions to skip
|
|
706
|
+
# @param sort [Symbol] Sort order: :version_desc, :version_asc, :date_desc, :date_asc
|
|
707
|
+
# @return [Array<Hash>] Paginated and sorted array
|
|
708
|
+
def apply_pagination_and_sort(versions, limit: nil, offset: 0, sort: :version_desc)
|
|
709
|
+
# Sort first
|
|
710
|
+
sorted = case sort
|
|
711
|
+
when :version_desc
|
|
712
|
+
versions.sort_by { |v| Gem::Version.new(v[:version]) }.reverse
|
|
713
|
+
when :version_asc
|
|
714
|
+
versions.sort_by { |v| Gem::Version.new(v[:version]) }
|
|
715
|
+
when :date_desc
|
|
716
|
+
versions.sort_by { |v| v[:release_date] || "" }.reverse
|
|
717
|
+
when :date_asc
|
|
718
|
+
versions.sort_by { |v| v[:release_date] || "" }
|
|
719
|
+
else
|
|
720
|
+
versions.sort_by { |v| Gem::Version.new(v[:version]) }.reverse
|
|
721
|
+
end
|
|
722
|
+
|
|
723
|
+
# Apply pagination
|
|
724
|
+
paginated = sorted[offset..] || []
|
|
725
|
+
paginated = paginated.first(limit) if limit
|
|
726
|
+
paginated
|
|
727
|
+
end
|
|
728
|
+
|
|
729
|
+
# GraphQL-like field selection
|
|
730
|
+
#
|
|
731
|
+
# @param data [Array<Hash>] Array of hashes to filter
|
|
732
|
+
# @param fields [Array<String>, nil] Fields to include (nil = all fields)
|
|
733
|
+
# @return [Array<Hash>] Filtered array with only selected fields
|
|
734
|
+
def select_fields(data, fields)
|
|
735
|
+
return data if fields.nil? || fields.empty?
|
|
736
|
+
|
|
737
|
+
data.map do |item|
|
|
738
|
+
item.select { |key, _| fields.include?(key.to_s) || fields.include?(key.to_sym) }
|
|
739
|
+
end
|
|
740
|
+
end
|
|
741
|
+
|
|
742
|
+
# Build HTTP client
|
|
743
|
+
#
|
|
744
|
+
# @param uri [URI] URI object for the request
|
|
745
|
+
# @return [Net::HTTP] Configured HTTP client
|
|
746
|
+
def build_http_client(uri)
|
|
747
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
748
|
+
http.read_timeout = 10
|
|
749
|
+
http.open_timeout = 10
|
|
750
|
+
|
|
751
|
+
if uri.scheme == "https"
|
|
752
|
+
http.use_ssl = true
|
|
753
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
|
754
|
+
|
|
755
|
+
# Set ca_file directly - this is the simplest and most reliable approach
|
|
756
|
+
# Try SSL_CERT_FILE first, then default cert file
|
|
757
|
+
ca_file = if ENV["SSL_CERT_FILE"] && File.file?(ENV["SSL_CERT_FILE"])
|
|
758
|
+
ENV["SSL_CERT_FILE"]
|
|
759
|
+
elsif File.exist?(OpenSSL::X509::DEFAULT_CERT_FILE)
|
|
760
|
+
OpenSSL::X509::DEFAULT_CERT_FILE
|
|
761
|
+
end
|
|
762
|
+
|
|
763
|
+
http.ca_file = ca_file if ca_file
|
|
764
|
+
end
|
|
765
|
+
|
|
766
|
+
http
|
|
767
|
+
end
|
|
768
|
+
|
|
769
|
+
def make_request(uri, parse_html: false)
|
|
770
|
+
http = build_http_client(uri)
|
|
771
|
+
|
|
772
|
+
request = Net::HTTP::Get.new(uri)
|
|
773
|
+
request["Accept"] = parse_html ? "text/html" : "application/json"
|
|
774
|
+
request["User-Agent"] = "rubygems_mcp/#{RubygemsMcp::VERSION}"
|
|
775
|
+
|
|
776
|
+
response = http.request(request)
|
|
777
|
+
|
|
778
|
+
case response
|
|
779
|
+
when Net::HTTPSuccess
|
|
780
|
+
# Check response size before processing
|
|
781
|
+
# Note: response.body may be nil for some responses, so check first
|
|
782
|
+
response_body = response.body || ""
|
|
783
|
+
response_size = response_body.bytesize
|
|
784
|
+
if response_size > MAX_RESPONSE_SIZE
|
|
785
|
+
raise ResponseSizeExceededError.new(response_size, MAX_RESPONSE_SIZE)
|
|
786
|
+
end
|
|
787
|
+
|
|
788
|
+
# Validate and parse response
|
|
789
|
+
if parse_html
|
|
790
|
+
validate_and_parse_html(response_body, uri)
|
|
791
|
+
else
|
|
792
|
+
validate_and_parse_json(response_body, uri)
|
|
793
|
+
end
|
|
794
|
+
when Net::HTTPNotFound
|
|
795
|
+
raise "Resource not found. Response: #{response.body[0..500]}"
|
|
796
|
+
else
|
|
797
|
+
raise "API request failed: #{response.code} #{response.message}\n#{response.body[0..500]}"
|
|
798
|
+
end
|
|
799
|
+
rescue ResponseSizeExceededError, CorruptedDataError
|
|
800
|
+
# Re-raise our custom errors as-is (don't cache corrupted data)
|
|
801
|
+
raise
|
|
802
|
+
rescue OpenSSL::SSL::SSLError => e
|
|
803
|
+
raise "SSL verification failed: #{e.message}. This may be due to system certificate configuration issues."
|
|
804
|
+
rescue => e
|
|
805
|
+
raise "Request failed: #{e.class} - #{e.message}"
|
|
806
|
+
end
|
|
807
|
+
|
|
808
|
+
# Validate and parse JSON response
|
|
809
|
+
# @param body [String] Response body
|
|
810
|
+
# @param uri [URI] Request URI for error context
|
|
811
|
+
# @return [Hash, Array] Parsed JSON data
|
|
812
|
+
# @raise [CorruptedDataError] If JSON is invalid or corrupted
|
|
813
|
+
def validate_and_parse_json(body, uri)
|
|
814
|
+
# Check for common crawler protection patterns
|
|
815
|
+
# Only check if body looks like HTML (starts with <) to avoid false positives
|
|
816
|
+
if body.strip.start_with?("<") && body.match?(/cloudflare|ddos protection|access denied|blocked|captcha/i)
|
|
817
|
+
raise CorruptedDataError.new(
|
|
818
|
+
"Response appears to be a crawler protection page from #{uri}",
|
|
819
|
+
response_size: body.bytesize
|
|
820
|
+
)
|
|
821
|
+
end
|
|
822
|
+
|
|
823
|
+
begin
|
|
824
|
+
parsed = JSON.parse(body)
|
|
825
|
+
|
|
826
|
+
# Additional validation: ensure it's actually JSON data, not HTML error page
|
|
827
|
+
unless parsed.is_a?(Hash) || parsed.is_a?(Array)
|
|
828
|
+
raise CorruptedDataError.new(
|
|
829
|
+
"Invalid JSON structure: expected Hash or Array, got #{parsed.class}",
|
|
830
|
+
response_size: body.bytesize
|
|
831
|
+
)
|
|
832
|
+
end
|
|
833
|
+
|
|
834
|
+
parsed
|
|
835
|
+
rescue JSON::ParserError => e
|
|
836
|
+
# Check if response is HTML (common for error pages)
|
|
837
|
+
if body.strip.start_with?("<!DOCTYPE", "<html", "<HTML")
|
|
838
|
+
raise CorruptedDataError.new(
|
|
839
|
+
"Received HTML instead of JSON from #{uri}. This may indicate an error page or crawler protection.",
|
|
840
|
+
original_error: e,
|
|
841
|
+
response_size: body.bytesize
|
|
842
|
+
)
|
|
843
|
+
end
|
|
844
|
+
|
|
845
|
+
raise CorruptedDataError.new(
|
|
846
|
+
"Failed to parse JSON response from #{uri}: #{e.message}",
|
|
847
|
+
original_error: e,
|
|
848
|
+
response_size: body.bytesize
|
|
849
|
+
)
|
|
850
|
+
end
|
|
851
|
+
end
|
|
852
|
+
|
|
853
|
+
# Validate and parse HTML response
|
|
854
|
+
# @param body [String] Response body
|
|
855
|
+
# @param uri [URI] Request URI for error context
|
|
856
|
+
# @return [Nokogiri::HTML::Document] Parsed HTML document
|
|
857
|
+
# @raise [CorruptedDataError] If HTML is invalid or appears to be an error page
|
|
858
|
+
def validate_and_parse_html(body, uri)
|
|
859
|
+
# Check for common crawler protection patterns
|
|
860
|
+
if body.match?(/cloudflare|ddos protection|access denied|blocked|captcha|rate limit/i)
|
|
861
|
+
raise CorruptedDataError.new(
|
|
862
|
+
"Response appears to be a crawler protection page from #{uri}",
|
|
863
|
+
response_size: body.bytesize
|
|
864
|
+
)
|
|
865
|
+
end
|
|
866
|
+
|
|
867
|
+
# Check if response is actually HTML
|
|
868
|
+
unless body.strip.start_with?("<!DOCTYPE", "<html", "<HTML") || body.include?("<html")
|
|
869
|
+
raise CorruptedDataError.new(
|
|
870
|
+
"Response from #{uri} does not appear to be HTML",
|
|
871
|
+
response_size: body.bytesize
|
|
872
|
+
)
|
|
873
|
+
end
|
|
874
|
+
|
|
875
|
+
begin
|
|
876
|
+
doc = Nokogiri::HTML(body)
|
|
877
|
+
|
|
878
|
+
# Check if HTML is empty or appears to be an error page
|
|
879
|
+
if doc.text.strip.length < 50
|
|
880
|
+
raise CorruptedDataError.new(
|
|
881
|
+
"HTML response from #{uri} appears to be empty or too short",
|
|
882
|
+
response_size: body.bytesize
|
|
883
|
+
)
|
|
884
|
+
end
|
|
885
|
+
|
|
886
|
+
# Check for common error page indicators
|
|
887
|
+
error_indicators = [
|
|
888
|
+
/error 404/i,
|
|
889
|
+
/page not found/i,
|
|
890
|
+
/access denied/i,
|
|
891
|
+
/forbidden/i,
|
|
892
|
+
/internal server error/i
|
|
893
|
+
]
|
|
894
|
+
|
|
895
|
+
if error_indicators.any? { |pattern| doc.text.match?(pattern) }
|
|
896
|
+
raise CorruptedDataError.new(
|
|
897
|
+
"HTML response from #{uri} appears to be an error page",
|
|
898
|
+
response_size: body.bytesize
|
|
899
|
+
)
|
|
900
|
+
end
|
|
901
|
+
|
|
902
|
+
doc
|
|
903
|
+
rescue Nokogiri::XML::SyntaxError => e
|
|
904
|
+
raise CorruptedDataError.new(
|
|
905
|
+
"Failed to parse HTML from #{uri}: #{e.message}",
|
|
906
|
+
original_error: e,
|
|
907
|
+
response_size: body.bytesize
|
|
908
|
+
)
|
|
909
|
+
end
|
|
910
|
+
end
|
|
911
|
+
end
|
|
912
|
+
end
|