RubyGems - librariesio-url-parser - Versions diffs - 1.0.1 → 1.0.4 - Mend

librariesio-url-parser 1.0.1 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/Gemfile +1 -3
data/Gemfile.lock +1 -1
data/README.md +1 -0
data/bin/console +14 -0
data/lib/android_googlesource_url_parser.rb +40 -0
data/lib/apache_git_wip_url_parser.rb +59 -0
data/lib/apache_gitbox_url_parser.rb +59 -0
data/lib/apache_svn_url_parser.rb +67 -0
data/lib/drupal_url_parser.rb +29 -0
data/lib/eclipse_git_url_parser.rb +25 -0
data/lib/librariesio-url-parser.rb +8 -1
data/lib/sourceforge_url_parser.rb +36 -0
data/lib/url_parser.rb +31 -14
data/librariesio-url-parser.gemspec +1 -0
metadata +29 -7

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: df16d2958656fa866177551cb6c6275e6b8303c40d41d3baeb639aff567aad07
-  data.tar.gz: ec4443d5e407716d312746c7157d698a21b9c10eaeea208f42101b42688f4b47
+  metadata.gz: 26ec3df36120630b6729b1d5a9b7d854acfeadede419ab34e895d0e00619fb7c
+  data.tar.gz: 7bf32628de41c3a243b08ee3b8aa8489e5dfbfef5fe673e7f68390a6f942dbd0
 SHA512:
-  metadata.gz: 8ca1db0935515b6fe9d0f2dc5c27e8af36bb8cd4b652dc70d4f5194a27edd1cd4257144bd83b4b9c7fb4ae6dc333a37789021e852a1bc34cdf4000a61a27b86f
-  data.tar.gz: f253083f04c93d8eff970002d9bcc02ce0ffb362167149371a6a5576dd4ed1ee9dbe2c760841695873487749cab40d121447fdf48f74d429b46be930322db683
+  metadata.gz: bbe57df347aafc5b82046f13a5e20d7c6deaf66cb67590540d122701d79bd516bb0b015b0b41f72d144e7c963f45e1ef7f67da61328aa819b8712c58d029b9af
+  data.tar.gz: c942042bb95db767107b91aa136c61cdd8408c11cfef763038abab07e0bdc515ee032cf8979e27040988aca8ce0b879214fae367511b1cf36a48a0f5e4415071

data/Gemfile CHANGED Viewed

@@ -2,6 +2,4 @@ source "https://rubygems.org"
 ruby "2.6.5"
 # Specify your gem's dependencies in librariesio-url-parser.gemspec
-gemspec
-gem "pry", "~> 0.14.1", :group => :development
+gemspec

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    librariesio-url-parser (1.0.1)
+    librariesio-url-parser (1.0.4)
 GEM
   remote: https://rubygems.org/

data/README.md CHANGED Viewed

@@ -45,6 +45,7 @@ URLParser.try_all("git@bitbucket.org:tildeslash/monit.git") #=> "https://bitbuck
 - GitHub
 - GitLab
 - Bitbucket
+- Apache SVN
 ## Development

data/bin/console ADDED Viewed

@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+require "bundler/setup"
+require "librariesio-url-parser"
+# You can add fixtures and/or initialization code here to make experimenting
+# with your gem easier. You can also use a different console, if you like.
+# (If you use this, don't forget to add pry to your Gemfile!)
+# require "pry"
+# Pry.start
+require "pry"
+Pry.start

data/lib/android_googlesource_url_parser.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+class AndroidGooglesourceUrlParser < URLParser
+  private
+  def full_domain
+    'https://android.googlesource.com'
+  end
+  def tlds
+    %w(com)
+  end
+  def domain
+    'android.googlesource'
+  end
+  def remove_domain
+    url.sub!(/(android\.googlesource\.com)+?(:|\/)?/i, '')
+  end
+  def remove_extra_segments
+    self.url = url.split('/').reject{ |s| s.strip.empty? }
+  end
+  def format_url
+    # if this is an Array then the url has gone through all the clean up steps
+    #
+    # if this is just a string then the url was not cleaned up and I have no idea how to format it
+    return nil unless url.is_a?(Array) && url.length.positive?
+    # the links that code into specific branches of the repository start with + in the path
+    # for example https://android.googlesource.com/device/amlogic/yukawa/ is the top level repository
+    # but looking at the master branch is the url
+    # https://android.googlesource.com/device/amlogic/yukawa/+/refs/heads/master
+    # and the same applies for tags
+    # https://android.googlesource.com/device/amlogic/yukawa/+/refs/tags/android-12.1.0_r16
+    self.url = url.join("/").split("+").first.chomp("/")
+  end
+end

data/lib/apache_git_wip_url_parser.rb ADDED Viewed

@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+class ApacheGitWipUrlParser < URLParser
+  private
+  def full_domain
+    'https://git-wip-us.apache.org/repos/asf'
+  end
+  def tlds
+    %w(org)
+  end
+  def domain
+    'git-wip-us.apache'
+  end
+  def remove_querystring
+    # it is common for the name to be passed in as a query parameter so we need to keep them in
+    # the url string for now and process them in later steps to pull the name out of the parameter
+    url
+  end
+  def remove_equals_sign
+    # we need to preserve the p=<some_name> query parameter
+    splits = url.split('=')
+    p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
+    if p_index
+      new_url = splits[0..p_index+1].join("=") if p_index
+      # remove separator characters present at the end of this string
+      # before the next parameter in the query parameter list
+      # ";"
+      new_url.gsub!(/[;,&].*/, '')
+      self.url = new_url
+    end
+  end
+  def domain_regex
+    # match only the repos/asf endpoint at the domain
+    "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
+  end
+  def remove_domain
+    url.sub!(/(git-wip-us\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
+  end
+  def remove_extra_segments
+    # by the time the URL gets here it should have been mostly pared down to the correct name
+    # however if the name was passed as a query parameter the ?p= is still at the front of the name
+    if url.is_a?(String) && url.start_with?("?p=")
+      self.url = url.split("=").last
+    end
+  end
+  def format_url
+    # ignore something if it comes in at as an Array at this point
+    url.is_a?(String) ? url : nil
+  end
+end

data/lib/apache_gitbox_url_parser.rb ADDED Viewed

@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+class ApacheGitboxUrlParser < URLParser
+  private
+  def full_domain
+    'https://gitbox.apache.org/repos/asf'
+  end
+  def tlds
+    %w(org)
+  end
+  def domain
+    'gitbox.apache'
+  end
+  def remove_querystring
+    # it is common for the name to be passed in as a query parameter so we need to keep them in
+    # the url string for now and process them in later steps to pull the name out of the parameter
+    url
+  end
+  def remove_equals_sign
+    # we need to preserve the p=<some_name> query parameter
+    splits = url.split('=')
+    p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
+    if p_index
+      new_url = splits[0..p_index+1].join("=") if p_index
+      # remove separator characters present at the end of this string
+      # before the next parameter in the query parameter list
+      # ";"
+      new_url.gsub!(/[;,&].*/, '')
+      self.url = new_url
+    end
+  end
+  def domain_regex
+    # match only the repos/asf endpoint at the domain
+    "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
+  end
+  def remove_domain
+    url.sub!(/(gitbox\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
+  end
+  def remove_extra_segments
+    # by the time the URL gets here it should have been mostly pared down to the correct name
+    # however if the name was passed as a query parameter the ?p= is still at the front of the name
+    if url.is_a?(String) && url.start_with?("?p=")
+      self.url = url.split("=").last
+    end
+  end
+  def format_url
+    # ignore something if it comes in at as an Array at this point
+    url.is_a?(String) ? url : nil
+  end
+end

data/lib/apache_svn_url_parser.rb ADDED Viewed

@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+class ApacheSvnUrlParser < URLParser
+  SUBDIR_NAMES = %w[trunk tags branches].freeze
+  VALID_PATHS = %w[viewvc viewcvs\.cgi repos\/asf].freeze
+  private
+  def full_domain
+    'https://svn.apache.org/viewvc'
+  end
+  def tlds
+    %w(org)
+  end
+  def domain
+    'svn.apache'
+  end
+  def domain_regex
+    # match only the viewvc endpoint at the domain
+    "#{domain}\.(#{tlds.join('|')})\/(#{VALID_PATHS.join("|")})"
+  end
+  def remove_domain
+    # find the matches for any github domain characters in the url string
+    # and replace only the first match incase we find a repo with something like github.com as the name
+    url.sub!(/(apache\.org\/(viewvc|repos\/asf|viewcvs\.cgi))+?(:|\/)?/i, '')
+  end
+  def extractable_early?
+    false
+  end
+  def remove_extra_segments
+    # split the url by / and remove any empty sections
+    self.url = url.split('/').reject{ |s| s.strip.empty? }
+    # check to see if any repository subdirectories are included in the segments
+    # this parser is parsing SVN projects, so any common folders used for branching should trip this
+    # truncate the array of segments to stop once we hit a top level sub directory typically seen in SVN repos
+    # and return everything up to that point
+    #
+    # for example apache.org/viewvnc/myproject/subproject/tags/my-1.0.0-release should stop at myproject/subproject
+    # since the tags are just part of that repository
+    subdir_index = url.index{ |s| SUBDIR_NAMES.include?(s) }
+    # it looks like the maven/pom directory on the Apache SVN server has a bunch of repositories stored under tags
+    # in this special case, grab the directory name under the subdirectory
+    # it looks like this is most likely to be the first directory under tags/
+    in_maven_pom_dir = url[0..1].join("/").downcase == "maven/pom"
+    if in_maven_pom_dir
+      self.url = url[0..subdir_index+1] if subdir_index
+    else
+      self.url = url[0..subdir_index-1] if subdir_index
+    end
+  end
+  def format_url
+    # if this is an Array then the url has gone through all the clean up steps
+    #
+    # if this is just a string then the url was not cleaned up and I have no idea how to format it
+    return nil unless url.is_a?(Array) && url.length.positive?
+    url.join("/")
+  end
+end

data/lib/drupal_url_parser.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+class DrupalUrlParser < URLParser
+  private
+  def full_domain
+    'https://git.drupalcode.org/project'
+  end
+  def tlds
+    %w(org)
+  end
+  def domain
+    'git.drupalcode'
+  end
+  def remove_domain
+    # find the matches for any github domain characters in the url string
+    # and replace only the first match incase we find a repo with something like github.com as the name
+    url.sub!(/(drupalcode\.org\/project)+?(:|\/)?/i, '')
+  end
+  def format_url
+    # if this is an Array then the url has gone through all the clean up steps
+    return nil unless url.is_a?(Array) && url.length.positive?
+    url.join("/")
+  end
+end

data/lib/eclipse_git_url_parser.rb ADDED Viewed

@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+class EclipseGitUrlParser < URLParser
+  private
+  def full_domain
+    'https://git.eclipse.org/c'
+  end
+  def tlds
+    %w(org)
+  end
+  def domain
+    'git.eclipse'
+  end
+  def remove_git_extension
+    # the repository names all end in .git on the website, so don't remove it here
+    nil
+  end
+  def remove_domain
+    url.sub!(/(eclipse\.org\/c)+?(:|\/)?/i, '')
+  end
+end

data/lib/librariesio-url-parser.rb CHANGED Viewed

@@ -4,7 +4,14 @@ require_relative "url_parser"
 require_relative "bitbucket_url_parser"
 require_relative "github_url_parser"
 require_relative "gitlab_url_parser"
+require_relative "apache_svn_url_parser"
+require_relative "apache_git_wip_url_parser"
+require_relative "apache_gitbox_url_parser"
+require_relative "drupal_url_parser"
+require_relative "eclipse_git_url_parser"
+require_relative "android_googlesource_url_parser"
+require_relative "sourceforge_url_parser"
 module LibrariesioURLParser
-  VERSION = "1.0.1"
+  VERSION = "1.0.4"
 end

data/lib/sourceforge_url_parser.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+class SourceforgeUrlParser < URLParser
+  PROJECT_PATHS = %w[projects p].freeze
+  private
+  def full_domain
+    'https://sourceforge.net/projects'
+  end
+  def tlds
+    %w(net)
+  end
+  def domain
+    'sourceforge'
+  end
+  def remove_domain
+    url.sub!(/(sourceforge\.net\/(#{PROJECT_PATHS.join("|")}))+?(:|\/)?/i, '')
+  end
+  def extractable_early?
+    false
+  end
+  def remove_extra_segments
+    self.url = url.split('/').reject{ |s| s.strip.empty? }.first
+  end
+  def format_url
+    # the URL at this point should have been reduced down to a single string for the project name
+    return nil unless url.is_a?(String)
+    url
+  end
+end

data/lib/url_parser.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # frozen_string_literal: true
 class URLParser
   def self.parse(url)
     new(url).parse
@@ -28,21 +29,27 @@ class URLParser
   end
   def self.try_all(url)
-    GithubURLParser.parse_to_full_url(url) ||
-    GitlabURLParser.parse_to_full_url(url) ||
-    BitbucketURLParser.parse_to_full_url(url)
+    # run through all the subclasses and try their parse method
+    # exit the reduce at the first non nil value and return that
+    descendants.reduce(nil) do |_, n|
+      r = n.parse_to_full_url(url)
+      break r if r
+    end
   end
   def parse_to_full_url
     path = parse
-    return nil unless path.present?
+    return nil if path.nil? || path.empty?
     [full_domain, path].join('/')
   end
   def parse_to_full_user_url
     return nil unless parseable?
     path = clean_url
     return nil unless path.length == 1
     [full_domain, path].join('/')
   end
@@ -68,6 +75,7 @@ class URLParser
   def format_url
     return nil unless url.length == 2
     url.join('/')
   end
@@ -83,14 +91,6 @@ class URLParser
     raise NotImplementedError
   end
-  def includes_domain?
-    raise NotImplementedError
-  end
-  def extractable_early?
-    raise NotImplementedError
-  end
   def domain_regex
     "#{domain}\.(#{tlds.join('|')})"
   end
@@ -151,14 +151,31 @@ class URLParser
   end
   def remove_scheme
-    url.gsub!(/(((git\+https|git|ssh|hg|svn|scm|http|https)+?:)+?)/i, '')
+    url.gsub!(/(((git\+https|git|ssh|hg|svn|scm|http|https)+?:)(\/\/)?)/i, '')
   end
   def remove_subdomain
-    url.gsub!(/(www|ssh|raw|git|wiki)+?\./i, '')
+    url.gsub!(/(www|ssh|raw|git|wiki|svn)+?\./i, '')
   end
   def remove_whitespace
     url.gsub!(/\s/, '')
   end
+  # This computation is memoized because it is expensive. This prevents use cases which require using
+  # .try_all in a tight loop. However, if this class is required directly (without requiring any subparsers),
+  # this method will memoize an empty array. It is recommended to simply require librariesio-url-parser.rb directly.
+  # This is the default behavior when installing this gem.
+  private_class_method def self.descendants
+    @descendants ||=
+      begin
+        descendants = []
+        ObjectSpace.each_object(singleton_class) do |k|
+          next if k.singleton_class?
+          descendants.unshift k unless k == self
+        end
+        descendants
+      end
+  end
 end

data/librariesio-url-parser.gemspec CHANGED Viewed

@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "rake", "~> 12.0"
   spec.add_development_dependency "rspec", "~> 3.0"
   spec.add_development_dependency "rspec_junit_formatter", "~> 0.5"
+  spec.add_development_dependency "pry", "~> 0.14.1"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: librariesio-url-parser
 version: !ruby/object:Gem::Version
-  version: 1.0.1
+  version: 1.0.4
 platform: ruby
 authors:
 - Matt Pace
-autorequire:
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-07-26 00:00:00.000000000 Z
+date: 2022-08-17 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -52,7 +52,21 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0.5'
-description:
+- !ruby/object:Gem::Dependency
+  name: pry
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.14.1
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.14.1
+description:
 email:
 - matt.pace@tidelift.com
 executables: []
@@ -65,10 +79,18 @@ files:
 - Gemfile.lock
 - README.md
 - Rakefile
+- bin/console
+- lib/android_googlesource_url_parser.rb
+- lib/apache_git_wip_url_parser.rb
+- lib/apache_gitbox_url_parser.rb
+- lib/apache_svn_url_parser.rb
 - lib/bitbucket_url_parser.rb
+- lib/drupal_url_parser.rb
+- lib/eclipse_git_url_parser.rb
 - lib/github_url_parser.rb
 - lib/gitlab_url_parser.rb
 - lib/librariesio-url-parser.rb
+- lib/sourceforge_url_parser.rb
 - lib/url_parser.rb
 - librariesio-url-parser.gemspec
 homepage: https://github.com/librariesio/librariesio-url-parser
@@ -76,7 +98,7 @@ licenses:
 - AGPL-3.0
 metadata:
   rubygems_mfa_required: 'true'
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -91,8 +113,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.3
-signing_key:
+rubygems_version: 3.0.9
+signing_key:
 specification_version: 4
 summary: Parse the URL for various repositories tracked by libraries.io
 test_files: []