RubyGems - librariesio-url-parser - Versions diffs - 1.0.0 → 1.0.3 - Mend

librariesio-url-parser 1.0.0 → 1.0.3

Files changed (19) hide show

checksums.yaml +4 -4
data/Gemfile +1 -1
data/Gemfile.lock +7 -1
data/README.md +1 -0
data/bin/console +14 -0
data/lib/android_googlesource_url_parser.rb +40 -0
data/lib/apache_git_wip_url_parser.rb +59 -0
data/lib/apache_gitbox_url_parser.rb +59 -0
data/lib/apache_svn_url_parser.rb +67 -0
data/lib/bitbucket_url_parser.rb +3 -1
data/lib/drupal_url_parser.rb +29 -0
data/lib/eclipse_git_url_parser.rb +25 -0
data/lib/github_url_parser.rb +3 -1
data/lib/gitlab_url_parser.rb +3 -1
data/lib/librariesio-url-parser.rb +8 -1
data/lib/sourceforge_url_parser.rb +36 -0
data/lib/url_parser.rb +24 -14
data/librariesio-url-parser.gemspec +1 -0
metadata +29 -7

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 4095a2003d55e594f33b981cb4fa104b37ab2ffba7f6f3988b905a901afb9785
-  data.tar.gz: 8dfbab7448f15692cd1064eb119cd55101d5fceab25dd806930780c48f3a65cd
+  metadata.gz: 1cbe610f7a8876b48f40ed8cb43058aff5e08abc55691dff82c7e3495dab9d20
+  data.tar.gz: bad609770a8779f49094c4f1d890051b51d0a85160cc586fa86312b5907cdbb3
 SHA512:
-  metadata.gz: 5aa7f74fcbaa9d1dc7774d61eedcbf349028a4e3bef26dc4a9e3b07e3856cdc2559feff970a36becf6e2c4e10eb4be77b68969470f814cd422a47e4f800e940e
-  data.tar.gz: f3dfc1047f3141e6a4f9ea39f7ac7514bbb607bd5f2577ac0809903614b3e008d7bb48916a01cb537886f5be0bad4fb793b9130e43d212d85fc0061bf1fb8161
+  metadata.gz: 88945a3e0dff0472969fdef9d09d5503420e41a539aeff5c46ce089c907190b8c8b1854af395f90a48ffb7248a63ac6df4ae285024678e1ac70dfb9c68f49474
+  data.tar.gz: 223accfcf45816de5185f3af8e2026f808810fc0371740a104f6654d5bca73f1fe2ff7ba8f374b6122e0126a86a2aacf1d3218004107c32552cf2c103b5c0885

data/Gemfile CHANGED Viewed

@@ -2,4 +2,4 @@ source "https://rubygems.org"
 ruby "2.6.5"
 # Specify your gem's dependencies in librariesio-url-parser.gemspec
-gemspec
+gemspec

data/Gemfile.lock CHANGED Viewed

@@ -1,12 +1,17 @@
 PATH
   remote: .
   specs:
-    librariesio-url-parser (1.0.0)
+    librariesio-url-parser (1.0.3)
 GEM
   remote: https://rubygems.org/
   specs:
+    coderay (1.1.3)
     diff-lcs (1.5.0)
+    method_source (1.0.0)
+    pry (0.14.1)
+      coderay (~> 1.1)
+      method_source (~> 1.0)
     rake (12.3.3)
     rspec (3.11.0)
       rspec-core (~> 3.11.0)
@@ -29,6 +34,7 @@ PLATFORMS
 DEPENDENCIES
   librariesio-url-parser!
+  pry (~> 0.14.1)
   rake (~> 12.0)
   rspec (~> 3.0)
   rspec_junit_formatter (~> 0.5)

data/README.md CHANGED Viewed

@@ -45,6 +45,7 @@ URLParser.try_all("git@bitbucket.org:tildeslash/monit.git") #=> "https://bitbuck
 - GitHub
 - GitLab
 - Bitbucket
+- Apache SVN
 ## Development

data/bin/console ADDED Viewed

@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+require "bundler/setup"
+require "librariesio-url-parser"
+# You can add fixtures and/or initialization code here to make experimenting
+# with your gem easier. You can also use a different console, if you like.
+# (If you use this, don't forget to add pry to your Gemfile!)
+# require "pry"
+# Pry.start
+require "pry"
+Pry.start

data/lib/android_googlesource_url_parser.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+class AndroidGooglesourceUrlParser < URLParser
+  private
+  def full_domain
+    'https://android.googlesource.com'
+  end
+  def tlds
+    %w(com)
+  end
+  def domain
+    'android.googlesource'
+  end
+  def remove_domain
+    url.sub!(/(android\.googlesource\.com)+?(:|\/)?/i, '')
+  end
+  def remove_extra_segments
+    self.url = url.split('/').reject{ |s| s.strip.empty? }
+  end
+  def format_url
+    # if this is an Array then the url has gone through all the clean up steps
+    #
+    # if this is just a string then the url was not cleaned up and I have no idea how to format it
+    return nil unless url.is_a?(Array) && url.length.positive?
+    # the links that code into specific branches of the repository start with + in the path
+    # for example https://android.googlesource.com/device/amlogic/yukawa/ is the top level repository
+    # but looking at the master branch is the url
+    # https://android.googlesource.com/device/amlogic/yukawa/+/refs/heads/master
+    # and the same applies for tags
+    # https://android.googlesource.com/device/amlogic/yukawa/+/refs/tags/android-12.1.0_r16
+    self.url = url.join("/").split("+").first.chomp("/")
+  end
+end

data/lib/apache_git_wip_url_parser.rb ADDED Viewed

@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+class ApacheGitWipUrlParser < URLParser
+  private
+  def full_domain
+    'https://git-wip-us.apache.org/repos/asf'
+  end
+  def tlds
+    %w(org)
+  end
+  def domain
+    'git-wip-us.apache'
+  end
+  def remove_querystring
+    # it is common for the name to be passed in as a query parameter so we need to keep them in
+    # the url string for now and process them in later steps to pull the name out of the parameter
+    url
+  end
+  def remove_equals_sign
+    # we need to preserve the p=<some_name> query parameter
+    splits = url.split('=')
+    p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
+    if p_index
+      new_url = splits[0..p_index+1].join("=") if p_index
+      # remove separator characters present at the end of this string
+      # before the next parameter in the query parameter list
+      # ";"
+      new_url.gsub!(/[;,&].*/, '')
+      self.url = new_url
+    end
+  end
+  def domain_regex
+    # match only the repos/asf endpoint at the domain
+    "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
+  end
+  def remove_domain
+    url.sub!(/(git-wip-us\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
+  end
+  def remove_extra_segments
+    # by the time the URL gets here it should have been mostly pared down to the correct name
+    # however if the name was passed as a query parameter the ?p= is still at the front of the name
+    if url.is_a?(String) && url.start_with?("?p=")
+      self.url = url.split("=").last
+    end
+  end
+  def format_url
+    # ignore something if it comes in at as an Array at this point
+    url.is_a?(String) ? url : nil
+  end
+end

data/lib/apache_gitbox_url_parser.rb ADDED Viewed

@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+class ApacheGitboxUrlParser < URLParser
+  private
+  def full_domain
+    'https://gitbox.apache.org/repos/asf'
+  end
+  def tlds
+    %w(org)
+  end
+  def domain
+    'gitbox.apache'
+  end
+  def remove_querystring
+    # it is common for the name to be passed in as a query parameter so we need to keep them in
+    # the url string for now and process them in later steps to pull the name out of the parameter
+    url
+  end
+  def remove_equals_sign
+    # we need to preserve the p=<some_name> query parameter
+    splits = url.split('=')
+    p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
+    if p_index
+      new_url = splits[0..p_index+1].join("=") if p_index
+      # remove separator characters present at the end of this string
+      # before the next parameter in the query parameter list
+      # ";"
+      new_url.gsub!(/[;,&].*/, '')
+      self.url = new_url
+    end
+  end
+  def domain_regex
+    # match only the repos/asf endpoint at the domain
+    "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
+  end
+  def remove_domain
+    url.sub!(/(gitbox\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
+  end
+  def remove_extra_segments
+    # by the time the URL gets here it should have been mostly pared down to the correct name
+    # however if the name was passed as a query parameter the ?p= is still at the front of the name
+    if url.is_a?(String) && url.start_with?("?p=")
+      self.url = url.split("=").last
+    end
+  end
+  def format_url
+    # ignore something if it comes in at as an Array at this point
+    url.is_a?(String) ? url : nil
+  end
+end

data/lib/apache_svn_url_parser.rb ADDED Viewed

@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+class ApacheSvnUrlParser < URLParser
+  SUBDIR_NAMES = %w[trunk tags branches].freeze
+  VALID_PATHS = %w[viewvc viewcvs\.cgi repos\/asf].freeze
+  private
+  def full_domain
+    'https://svn.apache.org/viewvc'
+  end
+  def tlds
+    %w(org)
+  end
+  def domain
+    'svn.apache'
+  end
+  def domain_regex
+    # match only the viewvc endpoint at the domain
+    "#{domain}\.(#{tlds.join('|')})\/(#{VALID_PATHS.join("|")})"
+  end
+  def remove_domain
+    # find the matches for any github domain characters in the url string
+    # and replace only the first match incase we find a repo with something like github.com as the name
+    url.sub!(/(apache\.org\/(viewvc|repos\/asf|viewcvs\.cgi))+?(:|\/)?/i, '')
+  end
+  def extractable_early?
+    false
+  end
+  def remove_extra_segments
+    # split the url by / and remove any empty sections
+    self.url = url.split('/').reject{ |s| s.strip.empty? }
+    # check to see if any repository subdirectories are included in the segments
+    # this parser is parsing SVN projects, so any common folders used for branching should trip this
+    # truncate the array of segments to stop once we hit a top level sub directory typically seen in SVN repos
+    # and return everything up to that point
+    #
+    # for example apache.org/viewvnc/myproject/subproject/tags/my-1.0.0-release should stop at myproject/subproject
+    # since the tags are just part of that repository
+    subdir_index = url.index{ |s| SUBDIR_NAMES.include?(s) }
+    # it looks like the maven/pom directory on the Apache SVN server has a bunch of repositories stored under tags
+    # in this special case, grab the directory name under the subdirectory
+    # it looks like this is most likely to be the first directory under tags/
+    in_maven_pom_dir = url[0..1].join("/").downcase == "maven/pom"
+    if in_maven_pom_dir
+      self.url = url[0..subdir_index+1] if subdir_index
+    else
+      self.url = url[0..subdir_index-1] if subdir_index
+    end
+  end
+  def format_url
+    # if this is an Array then the url has gone through all the clean up steps
+    #
+    # if this is just a string then the url was not cleaned up and I have no idea how to format it
+    return nil unless url.is_a?(Array) && url.length.positive?
+    url.join("/")
+  end
+end

data/lib/bitbucket_url_parser.rb CHANGED Viewed

@@ -15,6 +15,8 @@ class BitbucketURLParser < URLParser
   end
   def remove_domain
-    url.gsub!(/(bitbucket.com|bitbucket.org)+?(:|\/)?/i, '')
+    # find the matches for any github domain characters in the url string
+    # and replace only the first match incase we find a repo with something like github.com as the name
+    url.sub!(/(bitbucket\.com|bitbucket\.org)+?(:|\/)?/i, '')
   end
 end

data/lib/drupal_url_parser.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+class DrupalUrlParser < URLParser
+  private
+  def full_domain
+    'https://git.drupalcode.org/project'
+  end
+  def tlds
+    %w(org)
+  end
+  def domain
+    'git.drupalcode'
+  end
+  def remove_domain
+    # find the matches for any github domain characters in the url string
+    # and replace only the first match incase we find a repo with something like github.com as the name
+    url.sub!(/(drupalcode\.org\/project)+?(:|\/)?/i, '')
+  end
+  def format_url
+    # if this is an Array then the url has gone through all the clean up steps
+    return nil unless url.is_a?(Array) && url.length.positive?
+    url.join("/")
+  end
+end

data/lib/eclipse_git_url_parser.rb ADDED Viewed

@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+class EclipseGitUrlParser < URLParser
+  private
+  def full_domain
+    'https://git.eclipse.org/c'
+  end
+  def tlds
+    %w(org)
+  end
+  def domain
+    'git.eclipse'
+  end
+  def remove_git_extension
+    # the repository names all end in .git on the website, so don't remove it here
+    nil
+  end
+  def remove_domain
+    url.sub!(/(eclipse\.org\/c)+?(:|\/)?/i, '')
+  end
+end

data/lib/github_url_parser.rb CHANGED Viewed

@@ -15,6 +15,8 @@ class GithubURLParser < URLParser
   end
   def remove_domain
-    url.gsub!(/(github.io|github.com|github.org|raw.githubusercontent.com)+?(:|\/)?/i, '')
+    # find the matches for any github domain characters in the url string
+    # and replace only the first match incase we find a repo with something like github.com as the name
+    url.sub!(/(github\.io|github\.com|github\.org|raw\.githubusercontent\.com)+?(:|\/)?/i, '')
   end
 end

data/lib/gitlab_url_parser.rb CHANGED Viewed

@@ -15,6 +15,8 @@ class GitlabURLParser < URLParser
   end
   def remove_domain
-    url.gsub!(/(gitlab.com)+?(:|\/)?/i, '')
+    # find the matches for any github domain characters in the url string
+    # and replace only the first match incase we find a repo with something like github.com as the name
+    url.sub!(/(gitlab\.com)+?(:|\/)?/i, '')
   end
 end

data/lib/librariesio-url-parser.rb CHANGED Viewed

@@ -4,7 +4,14 @@ require_relative "url_parser"
 require_relative "bitbucket_url_parser"
 require_relative "github_url_parser"
 require_relative "gitlab_url_parser"
+require_relative "apache_svn_url_parser"
+require_relative "apache_git_wip_url_parser"
+require_relative "apache_gitbox_url_parser"
+require_relative "drupal_url_parser"
+require_relative "eclipse_git_url_parser"
+require_relative "android_googlesource_url_parser"
+require_relative "sourceforge_url_parser"
 module LibrariesioURLParser
-  VERSION = "1.0.0"
+  VERSION = "1.0.3"
 end

data/lib/sourceforge_url_parser.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+class SourceforgeUrlParser < URLParser
+  PROJECT_PATHS = %w[projects p].freeze
+  private
+  def full_domain
+    'https://sourceforge.net/projects'
+  end
+  def tlds
+    %w(net)
+  end
+  def domain
+    'sourceforge'
+  end
+  def remove_domain
+    url.sub!(/(sourceforge\.net\/(#{PROJECT_PATHS.join("|")}))+?(:|\/)?/i, '')
+  end
+  def extractable_early?
+    false
+  end
+  def remove_extra_segments
+    self.url = url.split('/').reject{ |s| s.strip.empty? }.first
+  end
+  def format_url
+    # the URL at this point should have been reduced down to a single string for the project name
+    return nil unless url.is_a?(String)
+    url
+  end
+end

data/lib/url_parser.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # frozen_string_literal: true
 class URLParser
   def self.parse(url)
     new(url).parse
@@ -28,21 +29,27 @@ class URLParser
   end
   def self.try_all(url)
-    GithubURLParser.parse_to_full_url(url) ||
-    GitlabURLParser.parse_to_full_url(url) ||
-    BitbucketURLParser.parse_to_full_url(url)
+    # run through all the subclasses and try their parse method
+    # exit the reduce at the first non nil value and return that
+    descendants.reduce(nil) do |_, n|
+      r = n.parse_to_full_url(url)
+      break r if r
+    end
   end
   def parse_to_full_url
     path = parse
-    return nil unless path.present?
+    return nil if path.nil? || path.empty?
     [full_domain, path].join('/')
   end
   def parse_to_full_user_url
     return nil unless parseable?
     path = clean_url
     return nil unless path.length == 1
     [full_domain, path].join('/')
   end
@@ -68,6 +75,7 @@ class URLParser
   def format_url
     return nil unless url.length == 2
     url.join('/')
   end
@@ -83,14 +91,6 @@ class URLParser
     raise NotImplementedError
   end
-  def includes_domain?
-    raise NotImplementedError
-  end
-  def extractable_early?
-    raise NotImplementedError
-  end
   def domain_regex
     "#{domain}\.(#{tlds.join('|')})"
   end
@@ -151,14 +151,24 @@ class URLParser
   end
   def remove_scheme
-    url.gsub!(/(((git\+https|git|ssh|hg|svn|scm|http|https)+?:)+?)/i, '')
+    url.gsub!(/(((git\+https|git|ssh|hg|svn|scm|http|https)+?:)(\/\/)?)/i, '')
   end
   def remove_subdomain
-    url.gsub!(/(www|ssh|raw|git|wiki)+?\./i, '')
+    url.gsub!(/(www|ssh|raw|git|wiki|svn)+?\./i, '')
   end
   def remove_whitespace
     url.gsub!(/\s/, '')
   end
+  private_class_method def self.descendants
+    descendants = []
+    ObjectSpace.each_object(singleton_class) do |k|
+      next if k.singleton_class?
+      descendants.unshift k unless k == self
+    end
+    descendants
+  end
 end

data/librariesio-url-parser.gemspec CHANGED Viewed

@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "rake", "~> 12.0"
   spec.add_development_dependency "rspec", "~> 3.0"
   spec.add_development_dependency "rspec_junit_formatter", "~> 0.5"
+  spec.add_development_dependency "pry", "~> 0.14.1"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: librariesio-url-parser
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 1.0.3
 platform: ruby
 authors:
 - Matt Pace
-autorequire:
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-06-13 00:00:00.000000000 Z
+date: 2022-08-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -52,7 +52,21 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0.5'
-description:
+- !ruby/object:Gem::Dependency
+  name: pry
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.14.1
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.14.1
+description:
 email:
 - matt.pace@tidelift.com
 executables: []
@@ -65,10 +79,18 @@ files:
 - Gemfile.lock
 - README.md
 - Rakefile
+- bin/console
+- lib/android_googlesource_url_parser.rb
+- lib/apache_git_wip_url_parser.rb
+- lib/apache_gitbox_url_parser.rb
+- lib/apache_svn_url_parser.rb
 - lib/bitbucket_url_parser.rb
+- lib/drupal_url_parser.rb
+- lib/eclipse_git_url_parser.rb
 - lib/github_url_parser.rb
 - lib/gitlab_url_parser.rb
 - lib/librariesio-url-parser.rb
+- lib/sourceforge_url_parser.rb
 - lib/url_parser.rb
 - librariesio-url-parser.gemspec
 homepage: https://github.com/librariesio/librariesio-url-parser
@@ -76,7 +98,7 @@ licenses:
 - AGPL-3.0
 metadata:
   rubygems_mfa_required: 'true'
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -91,8 +113,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.9
-signing_key:
+rubygems_version: 3.0.3
+signing_key:
 specification_version: 4
 summary: Parse the URL for various repositories tracked by libraries.io
 test_files: []