rubygems_mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,912 @@
1
+ require "uri"
2
+ require "net/http"
3
+ require "openssl"
4
+ require "json"
5
+ require "date"
6
+ require "nokogiri"
7
+
8
+ module RubygemsMcp
9
+ # RubyGems and Ruby version API client
10
+ #
11
+ # @example
12
+ # client = RubygemsMcp::Client.new
13
+ # versions = client.get_latest_versions(["rails", "nokogiri"])
14
+ # all_versions = client.get_gem_versions("rails")
15
+ # ruby_version = client.get_latest_ruby_version
16
+ class Client
17
+ # Maximum response size (1MB) to protect against crawler protection pages
18
+ MAX_RESPONSE_SIZE = 1024 * 1024 # 1MB
19
+
20
+ # Custom exception for corrupted data
21
+ class CorruptedDataError < StandardError
22
+ attr_reader :original_error, :response_size
23
+
24
+ def initialize(message, original_error: nil, response_size: nil)
25
+ super(message)
26
+ @original_error = original_error
27
+ @response_size = response_size
28
+ end
29
+ end
30
+
31
+ # Custom exception for response size exceeded
32
+ class ResponseSizeExceededError < StandardError
33
+ attr_reader :size, :max_size
34
+
35
+ def initialize(size, max_size)
36
+ @size = size
37
+ @max_size = max_size
38
+ super("Response size (#{size} bytes) exceeds maximum allowed size (#{max_size} bytes). This may indicate crawler protection.")
39
+ end
40
+ end
41
+ RUBYGEMS_API_BASE = "https://rubygems.org/api/v1"
42
+ RUBY_RELEASES_URL = "https://www.ruby-lang.org/en/downloads/releases/"
43
+ RUBY_BRANCHES_URL = "https://www.ruby-lang.org/en/downloads/branches/"
44
+
45
+ # Simple in-memory cache with TTL
46
+ class Cache
47
+ def initialize
48
+ @cache = {}
49
+ @mutex = Mutex.new
50
+ end
51
+
52
+ def get(key)
53
+ @mutex.synchronize do
54
+ entry = @cache[key]
55
+ return nil unless entry
56
+
57
+ if entry[:expires_at] < Time.now
58
+ @cache.delete(key)
59
+ return nil
60
+ end
61
+
62
+ entry[:value]
63
+ end
64
+ end
65
+
66
+ def set(key, value, ttl_seconds)
67
+ @mutex.synchronize do
68
+ @cache[key] = {
69
+ value: value,
70
+ expires_at: Time.now + ttl_seconds
71
+ }
72
+ end
73
+ end
74
+
75
+ def clear
76
+ @mutex.synchronize { @cache.clear }
77
+ end
78
+ end
79
+
80
+ # Shared cache instance
81
+ @cache = Cache.new
82
+
83
+ class << self
84
+ attr_reader :cache
85
+ end
86
+
87
+ def initialize(cache_enabled: true)
88
+ @cache_enabled = cache_enabled
89
+ end
90
+
91
+ # Get latest versions for a list of gems with release dates
92
+ #
93
+ # @param gem_names [Array<String>] Array of gem names
94
+ # @param fields [Array<String>, nil] GraphQL-like field selection (nil = all fields)
95
+ # Available fields: name, version, release_date, license, built_at, prerelease, platform,
96
+ # ruby_version, rubygems_version, downloads_count, sha, spec_sha, requirements, metadata
97
+ # @return [Array<Hash>] Array of hashes with selected fields
98
+ def get_latest_versions(gem_names, fields: nil)
99
+ gem_names.map do |name|
100
+ versions = get_gem_versions(name, limit: 1, fields: fields)
101
+ latest = versions.first # Versions are sorted by version number descending
102
+ if latest
103
+ result = latest.dup
104
+ result[:name] = name
105
+ result
106
+ else
107
+ base_result = {name: name, version: nil, release_date: nil, license: nil}
108
+ select_fields([base_result], fields).first || base_result
109
+ end
110
+ end
111
+ end
112
+
113
+ # Get all versions for a single gem
114
+ #
115
+ # @param gem_name [String] Gem name
116
+ # @param limit [Integer, nil] Maximum number of versions to return (nil = all)
117
+ # @param offset [Integer] Number of versions to skip (for pagination)
118
+ # @param sort [Symbol] Sort order: :version_desc (default), :version_asc, :date_desc, :date_asc
119
+ # @param fields [Array<String>, nil] GraphQL-like field selection (nil = all fields)
120
+ # Available fields: version, release_date, license, built_at, prerelease, platform,
121
+ # ruby_version, rubygems_version, downloads_count, sha, spec_sha, requirements, metadata
122
+ # @return [Array<Hash>] Array of hashes with selected fields
123
+ def get_gem_versions(gem_name, limit: nil, offset: 0, sort: :version_desc, fields: nil)
124
+ cache_key = "gem_versions:#{gem_name}"
125
+
126
+ if @cache_enabled
127
+ cached = self.class.cache.get(cache_key)
128
+ if cached
129
+ result = apply_pagination_and_sort(cached, limit: limit, offset: offset, sort: sort)
130
+ return select_fields(result, fields) if fields
131
+ return result
132
+ end
133
+ end
134
+
135
+ uri = URI("#{RUBYGEMS_API_BASE}/versions/#{gem_name}.json")
136
+
137
+ response = make_request(uri)
138
+
139
+ # Validate response is an Array (get_gem_versions expects Array)
140
+ unless response.is_a?(Array)
141
+ raise CorruptedDataError.new(
142
+ "Invalid JSON structure: expected Array, got #{response.class}",
143
+ response_size: response.to_s.bytesize
144
+ )
145
+ end
146
+
147
+ return [] if response.empty?
148
+
149
+ versions = response.map do |version_data|
150
+ original_version = version_data["number"]
151
+ next unless original_version.match?(/^\d+\.\d+\.\d+$/)
152
+
153
+ version = Gem::Version.new(original_version)
154
+ release_date = version_data["created_at"] ? Date.parse(version_data["created_at"]) : nil
155
+ built_at = version_data["built_at"] ? Date.parse(version_data["built_at"]) : nil
156
+
157
+ version_hash = {
158
+ version: version.to_s,
159
+ release_date: release_date&.iso8601,
160
+ built_at: built_at&.iso8601,
161
+ license: version_data["licenses"]&.first,
162
+ prerelease: version_data["prerelease"] || false,
163
+ platform: version_data["platform"] || "ruby",
164
+ ruby_version: version_data["ruby_version"],
165
+ rubygems_version: version_data["rubygems_version"],
166
+ downloads_count: version_data["downloads_count"],
167
+ sha: version_data["sha"],
168
+ spec_sha: version_data["spec_sha"],
169
+ requirements: version_data["requirements"] || [],
170
+ metadata: version_data["metadata"] || {}
171
+ }
172
+
173
+ version_hash
174
+ end
175
+
176
+ versions = versions.compact
177
+
178
+ # Cache for 1 hour (gem versions don't change once published)
179
+ self.class.cache.set(cache_key, versions, 3600) if @cache_enabled
180
+
181
+ result = apply_pagination_and_sort(versions, limit: limit, offset: offset, sort: sort)
182
+ select_fields(result, fields)
183
+ end
184
+
185
+ # Get latest Ruby version with release date
186
+ #
187
+ # @return [Hash] Hash with :version and :release_date (as ISO 8601 string)
188
+ def get_latest_ruby_version
189
+ versions = get_ruby_versions
190
+ versions.first || {version: nil, release_date: nil}
191
+ end
192
+
193
+ # Get Ruby maintenance status for all versions
194
+ #
195
+ # @return [Array<Hash>] Array of hashes with maintenance information:
196
+ # - :version (String) - Ruby version (e.g., "3.4", "3.3")
197
+ # - :status (String) - Maintenance status: "normal maintenance", "security maintenance", "eol", or "preview"
198
+ # - :release_date (String, nil) - Release date as ISO 8601 string
199
+ # - :normal_maintenance_until (String, nil) - End of normal maintenance as ISO 8601 string or "TBD"
200
+ # - :eol (String, nil) - End of life date as ISO 8601 string or "TBD"
201
+ def get_ruby_maintenance_status
202
+ cache_key = "ruby_maintenance_status"
203
+
204
+ if @cache_enabled
205
+ cached = self.class.cache.get(cache_key)
206
+ return cached if cached
207
+ end
208
+
209
+ uri = URI(RUBY_BRANCHES_URL)
210
+ response = make_request(uri, parse_html: true)
211
+ return [] unless response
212
+
213
+ maintenance_data = []
214
+
215
+ # Find all h3 tags that contain Ruby version numbers
216
+ response.css("h3").each do |h3|
217
+ version_match = h3.text.match(/Ruby ([\d.]+)/)
218
+ next unless version_match
219
+
220
+ version = version_match[1]
221
+ next unless version.match?(/^\d+\.\d+$/) # Match major.minor format
222
+
223
+ # Find the following paragraph with maintenance info
224
+ p_tag = h3.next_element
225
+ next unless p_tag&.name == "p"
226
+
227
+ status_text = p_tag.text
228
+
229
+ # Extract status from the "status: ..." line specifically
230
+ status_match = status_text.match(/status:\s*([^\n<]+)/i)
231
+ status_value = status_match ? status_match[1].strip.downcase : ""
232
+
233
+ # Parse status - check in order of specificity
234
+ status = if status_value.include?("preview")
235
+ "preview"
236
+ elsif status_value.include?("eol") || status_value.include?("end-of-life")
237
+ "eol"
238
+ elsif status_value.include?("security")
239
+ "security maintenance"
240
+ elsif status_value.include?("normal")
241
+ "normal maintenance"
242
+ else
243
+ "unknown"
244
+ end
245
+
246
+ # Parse release date
247
+ release_date_match = status_text.match(/release date:\s*(\d{4}-\d{2}-\d{2})/i)
248
+ release_date = release_date_match ? release_date_match[1] : nil
249
+
250
+ # Parse normal maintenance until
251
+ normal_maintenance_match = status_text.match(/normal maintenance until:\s*([^<\n]+)/i)
252
+ normal_maintenance_until = if normal_maintenance_match
253
+ date_str = normal_maintenance_match[1].strip
254
+ (date_str == "TBD") ? "TBD" : begin
255
+ Date.parse(date_str).iso8601
256
+ rescue
257
+ date_str
258
+ end
259
+ end
260
+
261
+ # Parse EOL date
262
+ eol_match = status_text.match(/EOL:\s*([^<\n]+)/i)
263
+ eol = if eol_match
264
+ date_str = eol_match[1].strip
265
+ # Handle "2027-03-31 (expected)" format
266
+ date_str = date_str.split("(").first.strip if date_str.include?("(")
267
+ (date_str == "TBD") ? "TBD" : begin
268
+ Date.parse(date_str).iso8601
269
+ rescue
270
+ date_str
271
+ end
272
+ end
273
+
274
+ maintenance_data << {
275
+ version: version,
276
+ status: status,
277
+ release_date: release_date,
278
+ normal_maintenance_until: normal_maintenance_until,
279
+ eol: eol
280
+ }
281
+ end
282
+
283
+ # Sort by version descending
284
+ maintenance_data.sort_by { |v| Gem::Version.new(v[:version]) }.reverse
285
+
286
+ # Cache for 24 hours (maintenance status changes infrequently)
287
+ self.class.cache.set(cache_key, maintenance_data, 86400) if @cache_enabled
288
+
289
+ maintenance_data
290
+ end
291
+
292
+ # Get all Ruby versions with release dates
293
+ #
294
+ # @param limit [Integer, nil] Maximum number of versions to return (nil = all)
295
+ # @param offset [Integer] Number of versions to skip (for pagination)
296
+ # @param sort [Symbol] Sort order: :version_desc (default), :version_asc, :date_desc, :date_asc
297
+ # @return [Array<Hash>] Array of hashes with :version and :release_date
298
+ def get_ruby_versions(limit: nil, offset: 0, sort: :version_desc)
299
+ cache_key = "ruby_versions"
300
+
301
+ if @cache_enabled
302
+ cached = self.class.cache.get(cache_key)
303
+ return apply_pagination_and_sort(cached, limit: limit, offset: offset, sort: sort) if cached
304
+ end
305
+
306
+ uri = URI(RUBY_RELEASES_URL)
307
+
308
+ response = make_request(uri, parse_html: true)
309
+ return [] unless response
310
+
311
+ versions = response.css("table.release-list tr").map do |element|
312
+ version_match = element.css("td:nth-child(1)").text.match(/Ruby (.+)/)
313
+ next if version_match.nil? || version_match[1].nil?
314
+
315
+ version_string = version_match[1].strip
316
+ next unless version_string.match?(/^\d+\.\d+\.\d+/)
317
+
318
+ version = Gem::Version.new(version_string)
319
+ release_date_text = element.css("td:nth-child(2)").text.strip
320
+ release_date = begin
321
+ Date.parse(release_date_text)
322
+ rescue Date::Error
323
+ nil
324
+ end
325
+
326
+ # Extract download URL
327
+ download_link = element.css("td:nth-child(3) a").first
328
+ download_url = download_link ? download_link["href"] : nil
329
+
330
+ # Extract release notes URL (convert relative to absolute)
331
+ release_notes_link = element.css("td:nth-child(4) a").first
332
+ release_notes_url = if release_notes_link
333
+ notes_href = release_notes_link["href"]
334
+ notes_href.start_with?("http") ? notes_href : "https://www.ruby-lang.org#{notes_href}"
335
+ end
336
+
337
+ {
338
+ version: version.to_s,
339
+ release_date: release_date,
340
+ download_url: download_url,
341
+ release_notes_url: release_notes_url
342
+ }
343
+ end
344
+
345
+ # Sort and convert dates to ISO 8601 strings for JSON serialization
346
+ versions.compact.sort_by { |v| Gem::Version.new(v[:version]) }.reverse.map do |v|
347
+ v[:release_date] = v[:release_date]&.iso8601
348
+ v
349
+ end
350
+ end
351
+
352
+ # Get changelog summary for a Ruby version from release notes
353
+ #
354
+ # @param version [String] Ruby version (e.g., "3.4.7")
355
+ # @return [Hash] Hash with :version, :release_notes_url, and :summary
356
+ def get_ruby_version_changelog(version)
357
+ # First get the release notes URL for this version
358
+ versions = get_ruby_versions
359
+ version_data = versions.find { |v| v[:version] == version }
360
+ return {version: version, release_notes_url: nil, summary: nil, error: "Version not found"} unless version_data
361
+
362
+ release_notes_url = version_data[:release_notes_url]
363
+ return {version: version, release_notes_url: nil, summary: nil, error: "No release notes available"} unless release_notes_url
364
+
365
+ cache_key = "ruby_changelog:#{version}"
366
+
367
+ if @cache_enabled
368
+ cached = self.class.cache.get(cache_key)
369
+ return cached if cached
370
+ end
371
+
372
+ uri = URI(release_notes_url)
373
+ response = make_request(uri, parse_html: true)
374
+ return {version: version, release_notes_url: release_notes_url, summary: nil, error: "Failed to fetch release notes"} unless response
375
+
376
+ # Extract the main content - typically in a div with class "content" or "entry-content"
377
+ # Try multiple selectors to find the main content
378
+ content = response.css("div.content, div.entry-content, article, main").first || response.css("body").first
379
+
380
+ if content
381
+ # Extract text, remove excessive whitespace, and get first few paragraphs
382
+ text = content.text.strip
383
+ # Split into paragraphs and take first 3-5 meaningful ones
384
+ paragraphs = text.split(/\n\n+/).reject { |p| p.strip.length < 50 }
385
+ summary = paragraphs.first(5).join("\n\n").strip
386
+
387
+ # Limit summary length
388
+ summary = summary[0..2000] + "..." if summary.length > 2000
389
+ else
390
+ summary = nil
391
+ end
392
+
393
+ result = {
394
+ version: version,
395
+ release_notes_url: release_notes_url,
396
+ summary: summary
397
+ }
398
+
399
+ # Cache for 24 hours
400
+ self.class.cache.set(cache_key, result, 86400) if @cache_enabled
401
+
402
+ result
403
+ end
404
+
405
+ # Get reverse dependencies (gems that depend on this gem)
406
+ #
407
+ # @param gem_name [String] Gem name
408
+ # @return [Array<String>] Array of gem names that depend on this gem
409
+ def get_gem_reverse_dependencies(gem_name)
410
+ cache_key = "gem_reverse_deps:#{gem_name}"
411
+
412
+ if @cache_enabled
413
+ cached = self.class.cache.get(cache_key)
414
+ return cached if cached
415
+ end
416
+
417
+ uri = URI("#{RUBYGEMS_API_BASE}/gems/#{gem_name}/reverse_dependencies.json")
418
+
419
+ response = make_request(uri)
420
+ return [] unless response.is_a?(Array)
421
+
422
+ # Cache for 1 hour
423
+ self.class.cache.set(cache_key, response, 3600) if @cache_enabled
424
+
425
+ response
426
+ end
427
+
428
+ # Get download statistics for a specific gem version
429
+ #
430
+ # @param gem_name [String] Gem name
431
+ # @param version [String] Gem version (e.g., "1.0.0")
432
+ # @return [Hash] Hash with :version_downloads and :total_downloads
433
+ def get_gem_version_downloads(gem_name, version)
434
+ cache_key = "gem_downloads:#{gem_name}:#{version}"
435
+
436
+ if @cache_enabled
437
+ cached = self.class.cache.get(cache_key)
438
+ return cached if cached
439
+ end
440
+
441
+ uri = URI("#{RUBYGEMS_API_BASE}/downloads/#{gem_name}-#{version}.json")
442
+
443
+ response = make_request(uri)
444
+ return {version_downloads: nil, total_downloads: nil} unless response.is_a?(Hash)
445
+
446
+ result = {
447
+ gem_name: gem_name,
448
+ version: version,
449
+ version_downloads: response["version_downloads"],
450
+ total_downloads: response["total_downloads"]
451
+ }
452
+
453
+ # Cache for 1 hour
454
+ self.class.cache.set(cache_key, result, 3600) if @cache_enabled
455
+
456
+ result
457
+ end
458
+
459
+ # Get latest gems (most recently added)
460
+ #
461
+ # @param limit [Integer, nil] Maximum number of gems to return (default: 30, max: 50)
462
+ # @return [Array<Hash>] Array of gem information
463
+ def get_latest_gems(limit: 30)
464
+ limit = [limit || 30, 50].min # API returns max 50
465
+ cache_key = "latest_gems:#{limit}"
466
+
467
+ if @cache_enabled
468
+ cached = self.class.cache.get(cache_key)
469
+ return cached if cached
470
+ end
471
+
472
+ uri = URI("#{RUBYGEMS_API_BASE}/activity/latest.json")
473
+
474
+ response = make_request(uri)
475
+ return [] unless response.is_a?(Array)
476
+
477
+ gems = response.first(limit).map do |gem_data|
478
+ {
479
+ name: gem_data["name"],
480
+ version: gem_data["version"],
481
+ downloads: gem_data["downloads"],
482
+ info: gem_data["info"],
483
+ authors: gem_data["authors"],
484
+ homepage: gem_data["homepage_uri"],
485
+ source_code: gem_data["source_code_uri"],
486
+ documentation: gem_data["documentation_uri"],
487
+ licenses: gem_data["licenses"] || []
488
+ }
489
+ end
490
+
491
+ # Cache for 15 minutes (activity changes frequently)
492
+ self.class.cache.set(cache_key, gems, 900) if @cache_enabled
493
+
494
+ gems
495
+ end
496
+
497
+ # Get recently updated gems
498
+ #
499
+ # @param limit [Integer, nil] Maximum number of gems to return (default: 30, max: 50)
500
+ # @return [Array<Hash>] Array of gem version information
501
+ def get_recently_updated_gems(limit: 30)
502
+ limit = [limit || 30, 50].min # API returns max 50
503
+ cache_key = "recently_updated_gems:#{limit}"
504
+
505
+ if @cache_enabled
506
+ cached = self.class.cache.get(cache_key)
507
+ return cached if cached
508
+ end
509
+
510
+ uri = URI("#{RUBYGEMS_API_BASE}/activity/just_updated.json")
511
+
512
+ response = make_request(uri)
513
+ return [] unless response.is_a?(Array)
514
+
515
+ gems = response.first(limit).map do |gem_data|
516
+ {
517
+ name: gem_data["name"],
518
+ version: gem_data["version"],
519
+ downloads: gem_data["downloads"],
520
+ version_downloads: gem_data["version_downloads"],
521
+ info: gem_data["info"],
522
+ authors: gem_data["authors"],
523
+ homepage: gem_data["homepage_uri"],
524
+ source_code: gem_data["source_code_uri"],
525
+ documentation: gem_data["documentation_uri"],
526
+ licenses: gem_data["licenses"] || [],
527
+ created_at: gem_data["created_at"]
528
+ }
529
+ end
530
+
531
+ # Cache for 15 minutes (activity changes frequently)
532
+ self.class.cache.set(cache_key, gems, 900) if @cache_enabled
533
+
534
+ gems
535
+ end
536
+
537
+ # Get changelog summary for a gem from its changelog_uri
538
+ #
539
+ # @param gem_name [String] Gem name
540
+ # @param version [String, nil] Gem version (optional, uses latest if not provided)
541
+ # @return [Hash] Hash with :gem_name, :version, :changelog_uri, and :summary
542
+ def get_gem_changelog(gem_name, version: nil)
543
+ # Get gem info to find changelog_uri
544
+ gem_info = get_gem_info(gem_name)
545
+ return {gem_name: gem_name, version: nil, changelog_uri: nil, summary: nil, error: "Gem not found"} if gem_info.empty?
546
+
547
+ version ||= gem_info[:version]
548
+ changelog_uri = gem_info[:changelog_uri]
549
+
550
+ return {gem_name: gem_name, version: version, changelog_uri: nil, summary: nil, error: "No changelog URI available"} unless changelog_uri
551
+
552
+ cache_key = "gem_changelog:#{gem_name}:#{version}"
553
+
554
+ if @cache_enabled
555
+ cached = self.class.cache.get(cache_key)
556
+ return cached if cached
557
+ end
558
+
559
+ uri = URI(changelog_uri)
560
+ response = make_request(uri, parse_html: true)
561
+ return {gem_name: gem_name, version: version, changelog_uri: changelog_uri, summary: nil, error: "Failed to fetch changelog"} unless response
562
+
563
+ # Extract the main content - try GitHub release page first, then generic selectors
564
+ content = if changelog_uri.include?("github.com") && changelog_uri.include?("/releases/")
565
+ # GitHub release page - look for release notes in markdown-body or release notes section
566
+ response.css(".markdown-body, .release-body, [data-testid='release-body']").first ||
567
+ response.css("div.repository-content, article").first
568
+ else
569
+ # Generic changelog page
570
+ response.css("div.content, div.entry-content, article, main, .markdown-body").first
571
+ end
572
+
573
+ content ||= response.css("body").first
574
+
575
+ summary = if content
576
+ text = content.text.strip
577
+ # Remove common navigation/header text patterns
578
+ text = text.gsub(/Notifications.*?signed in.*?reload/im, "")
579
+ text = text.gsub(/You must be signed in.*?reload/im, "")
580
+ text = text.gsub(/There was an error.*?reload/im, "")
581
+
582
+ # Split into paragraphs and take first 5-10 meaningful ones
583
+ # Try splitting by double newlines first, then by single newlines if that doesn't work
584
+ paragraphs = if text.include?("\n\n")
585
+ text.split(/\n\n+/)
586
+ else
587
+ text.split(/\n+/)
588
+ end
589
+
590
+ paragraphs = paragraphs.reject { |p|
591
+ p.strip.length < 30 ||
592
+ p.match?(/^(rails|Notifications|You must|There was)/i) ||
593
+ p.match?(/^\/\s*$/)
594
+ }
595
+ summary_text = paragraphs.first(10).join("\n\n").strip
596
+
597
+ # Limit summary length
598
+ summary_text = summary_text[0..3000] + "..." if summary_text.length > 3000
599
+ summary_text.empty? ? nil : summary_text
600
+ end
601
+
602
+ result = {
603
+ gem_name: gem_name,
604
+ version: version,
605
+ changelog_uri: changelog_uri,
606
+ summary: summary
607
+ }
608
+
609
+ # Cache for 24 hours
610
+ self.class.cache.set(cache_key, result, 86400) if @cache_enabled
611
+
612
+ result
613
+ end
614
+
615
+ # Get gem information (summary, homepage, etc.)
616
+ #
617
+ # @param gem_name [String] Gem name
618
+ # @param fields [Array<String>, nil] GraphQL-like field selection (nil = all fields)
619
+ # Available fields: name, version, summary, description, homepage, source_code,
620
+ # documentation, licenses, authors, info, downloads, version_downloads, yanked,
621
+ # dependencies, changelog_uri, funding_uri, platform, sha, spec_sha, metadata
622
+ # @return [Hash] Hash with selected gem information
623
+ def get_gem_info(gem_name, fields: nil)
624
+ cache_key = "gem_info:#{gem_name}"
625
+
626
+ if @cache_enabled
627
+ cached = self.class.cache.get(cache_key)
628
+ if cached
629
+ return select_fields([cached], fields).first if fields
630
+ return cached
631
+ end
632
+ end
633
+
634
+ uri = URI("#{RUBYGEMS_API_BASE}/gems/#{gem_name}.json")
635
+
636
+ response = make_request(uri)
637
+ return {} unless response.is_a?(Hash)
638
+
639
+ gem_info = {
640
+ name: response["name"],
641
+ version: response["version"],
642
+ summary: response["summary"] || response["info"],
643
+ description: response["description"],
644
+ homepage: response["homepage_uri"],
645
+ source_code: response["source_code_uri"],
646
+ documentation: response["documentation_uri"],
647
+ licenses: response["licenses"] || [],
648
+ authors: response["authors"],
649
+ info: response["info"],
650
+ downloads: response["downloads"],
651
+ version_downloads: response["version_downloads"],
652
+ yanked: response["yanked"] || false,
653
+ dependencies: response["dependencies"] || {runtime: [], development: []},
654
+ changelog_uri: response["changelog_uri"] || response.dig("metadata", "changelog_uri"),
655
+ funding_uri: response["funding_uri"] || response.dig("metadata", "funding_uri"),
656
+ platform: response["platform"] || "ruby",
657
+ sha: response["sha"],
658
+ spec_sha: response["spec_sha"],
659
+ metadata: response["metadata"] || {}
660
+ }
661
+
662
+ # Cache for 1 hour
663
+ self.class.cache.set(cache_key, gem_info, 3600) if @cache_enabled
664
+
665
+ select_fields([gem_info], fields).first || gem_info
666
+ end
667
+
668
+ # Search for gems by name
669
+ #
670
+ # @param query [String] Search query
671
+ # @param limit [Integer, nil] Maximum number of results to return (nil = all)
672
+ # @param offset [Integer] Number of results to skip (for pagination)
673
+ # @return [Array<Hash>] Array of hashes with gem information
674
+ def search_gems(query, limit: nil, offset: 0)
675
+ # Don't cache search results as they can change frequently
676
+ uri = URI("#{RUBYGEMS_API_BASE}/search.json")
677
+ uri.query = URI.encode_www_form(query: query)
678
+
679
+ response = make_request(uri)
680
+ return [] unless response.is_a?(Array)
681
+
682
+ results = response.map do |gem_data|
683
+ {
684
+ name: gem_data["name"],
685
+ version: gem_data["version"],
686
+ info: gem_data["info"],
687
+ homepage: gem_data["homepage_uri"],
688
+ source_code: gem_data["source_code_uri"],
689
+ documentation: gem_data["documentation_uri"]
690
+ }
691
+ end
692
+
693
+ # Apply pagination
694
+ results = results[offset..] if offset > 0
695
+ results = results.first(limit) if limit
696
+ results
697
+ end
698
+
699
+ private
700
+
701
+ # Apply pagination and sorting to a version array
702
+ #
703
+ # @param versions [Array<Hash>] Array of version hashes
704
+ # @param limit [Integer, nil] Maximum number of versions to return
705
+ # @param offset [Integer] Number of versions to skip
706
+ # @param sort [Symbol] Sort order: :version_desc, :version_asc, :date_desc, :date_asc
707
+ # @return [Array<Hash>] Paginated and sorted array
708
+ def apply_pagination_and_sort(versions, limit: nil, offset: 0, sort: :version_desc)
709
+ # Sort first
710
+ sorted = case sort
711
+ when :version_desc
712
+ versions.sort_by { |v| Gem::Version.new(v[:version]) }.reverse
713
+ when :version_asc
714
+ versions.sort_by { |v| Gem::Version.new(v[:version]) }
715
+ when :date_desc
716
+ versions.sort_by { |v| v[:release_date] || "" }.reverse
717
+ when :date_asc
718
+ versions.sort_by { |v| v[:release_date] || "" }
719
+ else
720
+ versions.sort_by { |v| Gem::Version.new(v[:version]) }.reverse
721
+ end
722
+
723
+ # Apply pagination
724
+ paginated = sorted[offset..] || []
725
+ paginated = paginated.first(limit) if limit
726
+ paginated
727
+ end
728
+
729
+ # GraphQL-like field selection
730
+ #
731
+ # @param data [Array<Hash>] Array of hashes to filter
732
+ # @param fields [Array<String>, nil] Fields to include (nil = all fields)
733
+ # @return [Array<Hash>] Filtered array with only selected fields
734
+ def select_fields(data, fields)
735
+ return data if fields.nil? || fields.empty?
736
+
737
+ data.map do |item|
738
+ item.select { |key, _| fields.include?(key.to_s) || fields.include?(key.to_sym) }
739
+ end
740
+ end
741
+
742
+ # Build HTTP client
743
+ #
744
+ # @param uri [URI] URI object for the request
745
+ # @return [Net::HTTP] Configured HTTP client
746
+ def build_http_client(uri)
747
+ http = Net::HTTP.new(uri.host, uri.port)
748
+ http.read_timeout = 10
749
+ http.open_timeout = 10
750
+
751
+ if uri.scheme == "https"
752
+ http.use_ssl = true
753
+ http.verify_mode = OpenSSL::SSL::VERIFY_PEER
754
+
755
+ # Set ca_file directly - this is the simplest and most reliable approach
756
+ # Try SSL_CERT_FILE first, then default cert file
757
+ ca_file = if ENV["SSL_CERT_FILE"] && File.file?(ENV["SSL_CERT_FILE"])
758
+ ENV["SSL_CERT_FILE"]
759
+ elsif File.exist?(OpenSSL::X509::DEFAULT_CERT_FILE)
760
+ OpenSSL::X509::DEFAULT_CERT_FILE
761
+ end
762
+
763
+ http.ca_file = ca_file if ca_file
764
+ end
765
+
766
+ http
767
+ end
768
+
769
+ def make_request(uri, parse_html: false)
770
+ http = build_http_client(uri)
771
+
772
+ request = Net::HTTP::Get.new(uri)
773
+ request["Accept"] = parse_html ? "text/html" : "application/json"
774
+ request["User-Agent"] = "rubygems_mcp/#{RubygemsMcp::VERSION}"
775
+
776
+ response = http.request(request)
777
+
778
+ case response
779
+ when Net::HTTPSuccess
780
+ # Check response size before processing
781
+ # Note: response.body may be nil for some responses, so check first
782
+ response_body = response.body || ""
783
+ response_size = response_body.bytesize
784
+ if response_size > MAX_RESPONSE_SIZE
785
+ raise ResponseSizeExceededError.new(response_size, MAX_RESPONSE_SIZE)
786
+ end
787
+
788
+ # Validate and parse response
789
+ if parse_html
790
+ validate_and_parse_html(response_body, uri)
791
+ else
792
+ validate_and_parse_json(response_body, uri)
793
+ end
794
+ when Net::HTTPNotFound
795
+ raise "Resource not found. Response: #{response.body[0..500]}"
796
+ else
797
+ raise "API request failed: #{response.code} #{response.message}\n#{response.body[0..500]}"
798
+ end
799
+ rescue ResponseSizeExceededError, CorruptedDataError
800
+ # Re-raise our custom errors as-is (don't cache corrupted data)
801
+ raise
802
+ rescue OpenSSL::SSL::SSLError => e
803
+ raise "SSL verification failed: #{e.message}. This may be due to system certificate configuration issues."
804
+ rescue => e
805
+ raise "Request failed: #{e.class} - #{e.message}"
806
+ end
807
+
808
+ # Validate and parse JSON response
809
+ # @param body [String] Response body
810
+ # @param uri [URI] Request URI for error context
811
+ # @return [Hash, Array] Parsed JSON data
812
+ # @raise [CorruptedDataError] If JSON is invalid or corrupted
813
+ def validate_and_parse_json(body, uri)
814
+ # Check for common crawler protection patterns
815
+ # Only check if body looks like HTML (starts with <) to avoid false positives
816
+ if body.strip.start_with?("<") && body.match?(/cloudflare|ddos protection|access denied|blocked|captcha/i)
817
+ raise CorruptedDataError.new(
818
+ "Response appears to be a crawler protection page from #{uri}",
819
+ response_size: body.bytesize
820
+ )
821
+ end
822
+
823
+ begin
824
+ parsed = JSON.parse(body)
825
+
826
+ # Additional validation: ensure it's actually JSON data, not HTML error page
827
+ unless parsed.is_a?(Hash) || parsed.is_a?(Array)
828
+ raise CorruptedDataError.new(
829
+ "Invalid JSON structure: expected Hash or Array, got #{parsed.class}",
830
+ response_size: body.bytesize
831
+ )
832
+ end
833
+
834
+ parsed
835
+ rescue JSON::ParserError => e
836
+ # Check if response is HTML (common for error pages)
837
+ if body.strip.start_with?("<!DOCTYPE", "<html", "<HTML")
838
+ raise CorruptedDataError.new(
839
+ "Received HTML instead of JSON from #{uri}. This may indicate an error page or crawler protection.",
840
+ original_error: e,
841
+ response_size: body.bytesize
842
+ )
843
+ end
844
+
845
+ raise CorruptedDataError.new(
846
+ "Failed to parse JSON response from #{uri}: #{e.message}",
847
+ original_error: e,
848
+ response_size: body.bytesize
849
+ )
850
+ end
851
+ end
852
+
853
+ # Validate and parse HTML response
854
+ # @param body [String] Response body
855
+ # @param uri [URI] Request URI for error context
856
+ # @return [Nokogiri::HTML::Document] Parsed HTML document
857
+ # @raise [CorruptedDataError] If HTML is invalid or appears to be an error page
858
+ def validate_and_parse_html(body, uri)
859
+ # Check for common crawler protection patterns
860
+ if body.match?(/cloudflare|ddos protection|access denied|blocked|captcha|rate limit/i)
861
+ raise CorruptedDataError.new(
862
+ "Response appears to be a crawler protection page from #{uri}",
863
+ response_size: body.bytesize
864
+ )
865
+ end
866
+
867
+ # Check if response is actually HTML
868
+ unless body.strip.start_with?("<!DOCTYPE", "<html", "<HTML") || body.include?("<html")
869
+ raise CorruptedDataError.new(
870
+ "Response from #{uri} does not appear to be HTML",
871
+ response_size: body.bytesize
872
+ )
873
+ end
874
+
875
+ begin
876
+ doc = Nokogiri::HTML(body)
877
+
878
+ # Check if HTML is empty or appears to be an error page
879
+ if doc.text.strip.length < 50
880
+ raise CorruptedDataError.new(
881
+ "HTML response from #{uri} appears to be empty or too short",
882
+ response_size: body.bytesize
883
+ )
884
+ end
885
+
886
+ # Check for common error page indicators
887
+ error_indicators = [
888
+ /error 404/i,
889
+ /page not found/i,
890
+ /access denied/i,
891
+ /forbidden/i,
892
+ /internal server error/i
893
+ ]
894
+
895
+ if error_indicators.any? { |pattern| doc.text.match?(pattern) }
896
+ raise CorruptedDataError.new(
897
+ "HTML response from #{uri} appears to be an error page",
898
+ response_size: body.bytesize
899
+ )
900
+ end
901
+
902
+ doc
903
+ rescue Nokogiri::XML::SyntaxError => e
904
+ raise CorruptedDataError.new(
905
+ "Failed to parse HTML from #{uri}: #{e.message}",
906
+ original_error: e,
907
+ response_size: body.bytesize
908
+ )
909
+ end
910
+ end
911
+ end
912
+ end