RubyGems - librariesio-url-parser - Versions diffs - 1.0.2 → 1.0.5 - Mend

librariesio-url-parser 1.0.2 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/Gemfile +1 -1
data/Gemfile.lock +7 -1
data/bin/console +14 -0
data/lib/android_googlesource_url_parser.rb +40 -0
data/lib/apache_git_wip_url_parser.rb +59 -0
data/lib/apache_gitbox_url_parser.rb +59 -0
data/lib/apache_svn_url_parser.rb +3 -2
data/lib/drupal_url_parser.rb +29 -0
data/lib/eclipse_git_url_parser.rb +25 -0
data/lib/librariesio-url-parser.rb +7 -1
data/lib/sourceforge_url_parser.rb +36 -0
data/lib/url_parser.rb +35 -13
data/librariesio-url-parser.gemspec +1 -0
metadata +28 -7

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 39533f7a7c92f1b4bcccd5a9a46b3c2ae5d14f04da87b0d643339be188184dfc
-  data.tar.gz: bcf4fd5128de3343a11f46b25fadf2cf0f0daf479a9f37824f2d0ac93534753b
+  metadata.gz: b7678a02447f9ecb298aca33105ed4a0a686258b39e1dbbf6069cadcc8bb5ee9
+  data.tar.gz: f5c7d4fa5dca4858e46f7740b33f4e54f67230e2801a2e2cfb00bfae7f1aad6a
 SHA512:
-  metadata.gz: 27401dad229d48fd32ec999b91ddf171a708b847fdbfbbb21b283cf6b72e1f8280731ccd2f51a012bdfcf7d73f9087a7c00ab8b8db3109b55913a434be70dba1
-  data.tar.gz: ed74167bc7f4b712209ecf9e0fddbafe7c5c9f77dcda9fcfb883b658d8e99ee8283d89a70f4ddecd2fd3a62912c5cb04c72358804dbedb44932e0adafae75594
+  metadata.gz: 940d8eccc878fe614347f7e18f381825f531cf2f14a0f22834bbd342e8ebf6be13e4daeb5c07e2627446ba5d5df76af8b634285731479ce6fe4d38807fb29326
+  data.tar.gz: 928daf9d94eeed789e059e099dd6346e7c6d1b4cbaee1f788d8d36dc2420fdd232c74a7ed68f051aa91efa26d1d6636107d89deee06e082d0fd7c2f6398b90ca

data/Gemfile CHANGED Viewed

@@ -2,4 +2,4 @@ source "https://rubygems.org"
 ruby "2.6.5"
 # Specify your gem's dependencies in librariesio-url-parser.gemspec
-gemspec
+gemspec

data/Gemfile.lock CHANGED Viewed

@@ -1,12 +1,17 @@
 PATH
   remote: .
   specs:
-    librariesio-url-parser (1.0.2)
+    librariesio-url-parser (1.0.5)
 GEM
   remote: https://rubygems.org/
   specs:
+    coderay (1.1.3)
     diff-lcs (1.5.0)
+    method_source (1.0.0)
+    pry (0.14.1)
+      coderay (~> 1.1)
+      method_source (~> 1.0)
     rake (12.3.3)
     rspec (3.11.0)
       rspec-core (~> 3.11.0)
@@ -29,6 +34,7 @@ PLATFORMS
 DEPENDENCIES
   librariesio-url-parser!
+  pry (~> 0.14.1)
   rake (~> 12.0)
   rspec (~> 3.0)
   rspec_junit_formatter (~> 0.5)

data/bin/console ADDED Viewed

@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+require "bundler/setup"
+require "librariesio-url-parser"
+# You can add fixtures and/or initialization code here to make experimenting
+# with your gem easier. You can also use a different console, if you like.
+# (If you use this, don't forget to add pry to your Gemfile!)
+# require "pry"
+# Pry.start
+require "pry"
+Pry.start

data/lib/android_googlesource_url_parser.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+class AndroidGooglesourceUrlParser < URLParser
+  private
+  def full_domain
+    'https://android.googlesource.com'
+  end
+  def tlds
+    %w(com)
+  end
+  def domain
+    'android.googlesource'
+  end
+  def remove_domain
+    url.sub!(/(android\.googlesource\.com)+?(:|\/)?/i, '')
+  end
+  def remove_extra_segments
+    self.url = url.split('/').reject{ |s| s.strip.empty? }
+  end
+  def format_url
+    # if this is an Array then the url has gone through all the clean up steps
+    #
+    # if this is just a string then the url was not cleaned up and I have no idea how to format it
+    return nil unless url.is_a?(Array) && url.length.positive?
+    # the links that code into specific branches of the repository start with + in the path
+    # for example https://android.googlesource.com/device/amlogic/yukawa/ is the top level repository
+    # but looking at the master branch is the url
+    # https://android.googlesource.com/device/amlogic/yukawa/+/refs/heads/master
+    # and the same applies for tags
+    # https://android.googlesource.com/device/amlogic/yukawa/+/refs/tags/android-12.1.0_r16
+    self.url = url.join("/").split("+").first.chomp("/")
+  end
+end

data/lib/apache_git_wip_url_parser.rb ADDED Viewed

@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+class ApacheGitWipUrlParser < URLParser
+  private
+  def full_domain
+    'https://git-wip-us.apache.org/repos/asf'
+  end
+  def tlds
+    %w(org)
+  end
+  def domain
+    'git-wip-us.apache'
+  end
+  def remove_querystring
+    # it is common for the name to be passed in as a query parameter so we need to keep them in
+    # the url string for now and process them in later steps to pull the name out of the parameter
+    url
+  end
+  def remove_equals_sign
+    # we need to preserve the p=<some_name> query parameter
+    splits = url.split('=')
+    p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
+    if p_index
+      new_url = splits[0..p_index+1].join("=") if p_index
+      # remove separator characters present at the end of this string
+      # before the next parameter in the query parameter list
+      # ";"
+      new_url.gsub!(/[;,&].*/, '')
+      self.url = new_url
+    end
+  end
+  def domain_regex
+    # match only the repos/asf endpoint at the domain
+    "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
+  end
+  def remove_domain
+    url.sub!(/(git-wip-us\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
+  end
+  def remove_extra_segments
+    # by the time the URL gets here it should have been mostly pared down to the correct name
+    # however if the name was passed as a query parameter the ?p= is still at the front of the name
+    if url.is_a?(String) && url.start_with?("?p=")
+      self.url = url.split("=").last
+    end
+  end
+  def format_url
+    # ignore something if it comes in at as an Array at this point
+    url.is_a?(String) ? url : nil
+  end
+end

data/lib/apache_gitbox_url_parser.rb ADDED Viewed

@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+class ApacheGitboxUrlParser < URLParser
+  private
+  def full_domain
+    'https://gitbox.apache.org/repos/asf'
+  end
+  def tlds
+    %w(org)
+  end
+  def domain
+    'gitbox.apache'
+  end
+  def remove_querystring
+    # it is common for the name to be passed in as a query parameter so we need to keep them in
+    # the url string for now and process them in later steps to pull the name out of the parameter
+    url
+  end
+  def remove_equals_sign
+    # we need to preserve the p=<some_name> query parameter
+    splits = url.split('=')
+    p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
+    if p_index
+      new_url = splits[0..p_index+1].join("=") if p_index
+      # remove separator characters present at the end of this string
+      # before the next parameter in the query parameter list
+      # ";"
+      new_url.gsub!(/[;,&].*/, '')
+      self.url = new_url
+    end
+  end
+  def domain_regex
+    # match only the repos/asf endpoint at the domain
+    "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
+  end
+  def remove_domain
+    url.sub!(/(gitbox\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
+  end
+  def remove_extra_segments
+    # by the time the URL gets here it should have been mostly pared down to the correct name
+    # however if the name was passed as a query parameter the ?p= is still at the front of the name
+    if url.is_a?(String) && url.start_with?("?p=")
+      self.url = url.split("=").last
+    end
+  end
+  def format_url
+    # ignore something if it comes in at as an Array at this point
+    url.is_a?(String) ? url : nil
+  end
+end

data/lib/apache_svn_url_parser.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 class ApacheSvnUrlParser < URLParser
   SUBDIR_NAMES = %w[trunk tags branches].freeze
+  VALID_PATHS = %w[viewvc viewcvs\.cgi repos\/asf].freeze
   private
   def full_domain
@@ -17,13 +18,13 @@ class ApacheSvnUrlParser < URLParser
   def domain_regex
     # match only the viewvc endpoint at the domain
-    "#{domain.split("/").first}\.(#{tlds.join('|')})\/viewvc"
+    "#{domain}\.(#{tlds.join('|')})\/(#{VALID_PATHS.join("|")})"
   end
   def remove_domain
     # find the matches for any github domain characters in the url string
     # and replace only the first match incase we find a repo with something like github.com as the name
-    url.sub!(/(apache\.org\/(viewvc|repos))+?(:|\/)?/i, '')
+    url.sub!(/(apache\.org\/(viewvc|repos\/asf|viewcvs\.cgi))+?(:|\/)?/i, '')
   end
   def extractable_early?

data/lib/drupal_url_parser.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+class DrupalUrlParser < URLParser
+  private
+  def full_domain
+    'https://git.drupalcode.org/project'
+  end
+  def tlds
+    %w(org)
+  end
+  def domain
+    'git.drupalcode'
+  end
+  def remove_domain
+    # find the matches for any github domain characters in the url string
+    # and replace only the first match incase we find a repo with something like github.com as the name
+    url.sub!(/(drupalcode\.org\/project)+?(:|\/)?/i, '')
+  end
+  def format_url
+    # if this is an Array then the url has gone through all the clean up steps
+    return nil unless url.is_a?(Array) && url.length.positive?
+    url.join("/")
+  end
+end

data/lib/eclipse_git_url_parser.rb ADDED Viewed

@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+class EclipseGitUrlParser < URLParser
+  private
+  def full_domain
+    'https://git.eclipse.org/c'
+  end
+  def tlds
+    %w(org)
+  end
+  def domain
+    'git.eclipse'
+  end
+  def remove_git_extension
+    # the repository names all end in .git on the website, so don't remove it here
+    nil
+  end
+  def remove_domain
+    url.sub!(/(eclipse\.org\/c)+?(:|\/)?/i, '')
+  end
+end

data/lib/librariesio-url-parser.rb CHANGED Viewed

@@ -5,7 +5,13 @@ require_relative "bitbucket_url_parser"
 require_relative "github_url_parser"
 require_relative "gitlab_url_parser"
 require_relative "apache_svn_url_parser"
+require_relative "apache_git_wip_url_parser"
+require_relative "apache_gitbox_url_parser"
+require_relative "drupal_url_parser"
+require_relative "eclipse_git_url_parser"
+require_relative "android_googlesource_url_parser"
+require_relative "sourceforge_url_parser"
 module LibrariesioURLParser
-  VERSION = "1.0.2"
+  VERSION = "1.0.5"
 end

data/lib/sourceforge_url_parser.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+class SourceforgeUrlParser < URLParser
+  PROJECT_PATHS = %w[projects p].freeze
+  private
+  def full_domain
+    'https://sourceforge.net/projects'
+  end
+  def tlds
+    %w(net)
+  end
+  def domain
+    'sourceforge'
+  end
+  def remove_domain
+    url.sub!(/(sourceforge\.net\/(#{PROJECT_PATHS.join("|")}))+?(:|\/)?/i, '')
+  end
+  def extractable_early?
+    false
+  end
+  def remove_extra_segments
+    self.url = url.split('/').reject{ |s| s.strip.empty? }.first
+  end
+  def format_url
+    # the URL at this point should have been reduced down to a single string for the project name
+    return nil unless url.is_a?(String)
+    url
+  end
+end

data/lib/url_parser.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # frozen_string_literal: true
 class URLParser
   def self.parse(url)
     new(url).parse
@@ -28,22 +29,27 @@ class URLParser
   end
   def self.try_all(url)
-    GithubURLParser.parse_to_full_url(url) ||
-    GitlabURLParser.parse_to_full_url(url) ||
-    BitbucketURLParser.parse_to_full_url(url) ||
-    ApacheSvnUrlParser.parse_to_full_url(url)
+    # run through all the subclasses and try their parse method
+    # exit the reduce at the first non nil value and return that
+    descendants.reduce(nil) do |_, n|
+      r = n.parse_to_full_url(url)
+      break r if r
+    end
   end
   def parse_to_full_url
     path = parse
     return nil if path.nil? || path.empty?
     [full_domain, path].join('/')
   end
   def parse_to_full_user_url
     return nil unless parseable?
     path = clean_url
     return nil unless path.length == 1
     [full_domain, path].join('/')
   end
@@ -59,7 +65,12 @@ class URLParser
     remove_auth_user
     remove_equals_sign
     remove_scheme
-    return nil unless includes_domain?
+    unless includes_domain?
+      self.url = nil
+      return nil
+    end
     remove_subdomain
     remove_domain
     remove_git_extension
@@ -68,7 +79,9 @@ class URLParser
   end
   def format_url
+    return nil if url.nil?
     return nil unless url.length == 2
     url.join('/')
   end
@@ -84,14 +97,6 @@ class URLParser
     raise NotImplementedError
   end
-  def includes_domain?
-    raise NotImplementedError
-  end
-  def extractable_early?
-    raise NotImplementedError
-  end
   def domain_regex
     "#{domain}\.(#{tlds.join('|')})"
   end
@@ -162,4 +167,21 @@ class URLParser
   def remove_whitespace
     url.gsub!(/\s/, '')
   end
+  # This computation is memoized because it is expensive. This prevents use cases which require using
+  # .try_all in a tight loop. However, if this class is required directly (without requiring any subparsers),
+  # this method will memoize an empty array. It is recommended to simply require librariesio-url-parser.rb directly.
+  # This is the default behavior when installing this gem.
+  private_class_method def self.descendants
+    @descendants ||=
+      begin
+        descendants = []
+        ObjectSpace.each_object(singleton_class) do |k|
+          next if k.singleton_class?
+          descendants.unshift k unless k == self
+        end
+        descendants
+      end
+  end
 end

data/librariesio-url-parser.gemspec CHANGED Viewed

@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "rake", "~> 12.0"
   spec.add_development_dependency "rspec", "~> 3.0"
   spec.add_development_dependency "rspec_junit_formatter", "~> 0.5"
+  spec.add_development_dependency "pry", "~> 0.14.1"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: librariesio-url-parser
 version: !ruby/object:Gem::Version
-  version: 1.0.2
+  version: 1.0.5
 platform: ruby
 authors:
 - Matt Pace
-autorequire:
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-07-27 00:00:00.000000000 Z
+date: 2022-08-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -52,7 +52,21 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0.5'
-description:
+- !ruby/object:Gem::Dependency
+  name: pry
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.14.1
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.14.1
+description:
 email:
 - matt.pace@tidelift.com
 executables: []
@@ -65,11 +79,18 @@ files:
 - Gemfile.lock
 - README.md
 - Rakefile
+- bin/console
+- lib/android_googlesource_url_parser.rb
+- lib/apache_git_wip_url_parser.rb
+- lib/apache_gitbox_url_parser.rb
 - lib/apache_svn_url_parser.rb
 - lib/bitbucket_url_parser.rb
+- lib/drupal_url_parser.rb
+- lib/eclipse_git_url_parser.rb
 - lib/github_url_parser.rb
 - lib/gitlab_url_parser.rb
 - lib/librariesio-url-parser.rb
+- lib/sourceforge_url_parser.rb
 - lib/url_parser.rb
 - librariesio-url-parser.gemspec
 homepage: https://github.com/librariesio/librariesio-url-parser
@@ -77,7 +98,7 @@ licenses:
 - AGPL-3.0
 metadata:
   rubygems_mfa_required: 'true'
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -92,8 +113,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.3
-signing_key:
+rubygems_version: 3.0.9
+signing_key:
 specification_version: 4
 summary: Parse the URL for various repositories tracked by libraries.io
 test_files: []