librariesio-url-parser 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 39533f7a7c92f1b4bcccd5a9a46b3c2ae5d14f04da87b0d643339be188184dfc
4
- data.tar.gz: bcf4fd5128de3343a11f46b25fadf2cf0f0daf479a9f37824f2d0ac93534753b
3
+ metadata.gz: 1cbe610f7a8876b48f40ed8cb43058aff5e08abc55691dff82c7e3495dab9d20
4
+ data.tar.gz: bad609770a8779f49094c4f1d890051b51d0a85160cc586fa86312b5907cdbb3
5
5
  SHA512:
6
- metadata.gz: 27401dad229d48fd32ec999b91ddf171a708b847fdbfbbb21b283cf6b72e1f8280731ccd2f51a012bdfcf7d73f9087a7c00ab8b8db3109b55913a434be70dba1
7
- data.tar.gz: ed74167bc7f4b712209ecf9e0fddbafe7c5c9f77dcda9fcfb883b658d8e99ee8283d89a70f4ddecd2fd3a62912c5cb04c72358804dbedb44932e0adafae75594
6
+ metadata.gz: 88945a3e0dff0472969fdef9d09d5503420e41a539aeff5c46ce089c907190b8c8b1854af395f90a48ffb7248a63ac6df4ae285024678e1ac70dfb9c68f49474
7
+ data.tar.gz: 223accfcf45816de5185f3af8e2026f808810fc0371740a104f6654d5bca73f1fe2ff7ba8f374b6122e0126a86a2aacf1d3218004107c32552cf2c103b5c0885
data/Gemfile CHANGED
@@ -2,4 +2,4 @@ source "https://rubygems.org"
2
2
  ruby "2.6.5"
3
3
 
4
4
  # Specify your gem's dependencies in librariesio-url-parser.gemspec
5
- gemspec
5
+ gemspec
data/Gemfile.lock CHANGED
@@ -1,12 +1,17 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- librariesio-url-parser (1.0.2)
4
+ librariesio-url-parser (1.0.3)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
+ coderay (1.1.3)
9
10
  diff-lcs (1.5.0)
11
+ method_source (1.0.0)
12
+ pry (0.14.1)
13
+ coderay (~> 1.1)
14
+ method_source (~> 1.0)
10
15
  rake (12.3.3)
11
16
  rspec (3.11.0)
12
17
  rspec-core (~> 3.11.0)
@@ -29,6 +34,7 @@ PLATFORMS
29
34
 
30
35
  DEPENDENCIES
31
36
  librariesio-url-parser!
37
+ pry (~> 0.14.1)
32
38
  rake (~> 12.0)
33
39
  rspec (~> 3.0)
34
40
  rspec_junit_formatter (~> 0.5)
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "librariesio-url-parser"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "pry"
14
+ Pry.start
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+ class AndroidGooglesourceUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://android.googlesource.com'
7
+ end
8
+
9
+ def tlds
10
+ %w(com)
11
+ end
12
+
13
+ def domain
14
+ 'android.googlesource'
15
+ end
16
+
17
+ def remove_domain
18
+ url.sub!(/(android\.googlesource\.com)+?(:|\/)?/i, '')
19
+ end
20
+
21
+ def remove_extra_segments
22
+ self.url = url.split('/').reject{ |s| s.strip.empty? }
23
+ end
24
+
25
+ def format_url
26
+ # if this is an Array then the url has gone through all the clean up steps
27
+ #
28
+ # if this is just a string then the url was not cleaned up and I have no idea how to format it
29
+ return nil unless url.is_a?(Array) && url.length.positive?
30
+
31
+ # the links that code into specific branches of the repository start with + in the path
32
+ # for example https://android.googlesource.com/device/amlogic/yukawa/ is the top level repository
33
+ # but looking at the master branch is the url
34
+ # https://android.googlesource.com/device/amlogic/yukawa/+/refs/heads/master
35
+ # and the same applies for tags
36
+ # https://android.googlesource.com/device/amlogic/yukawa/+/refs/tags/android-12.1.0_r16
37
+
38
+ self.url = url.join("/").split("+").first.chomp("/")
39
+ end
40
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+ class ApacheGitWipUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git-wip-us.apache.org/repos/asf'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git-wip-us.apache'
15
+ end
16
+
17
+ def remove_querystring
18
+ # it is common for the name to be passed in as a query parameter so we need to keep them in
19
+ # the url string for now and process them in later steps to pull the name out of the parameter
20
+ url
21
+ end
22
+
23
+ def remove_equals_sign
24
+ # we need to preserve the p=<some_name> query parameter
25
+ splits = url.split('=')
26
+ p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
27
+ if p_index
28
+ new_url = splits[0..p_index+1].join("=") if p_index
29
+ # remove separator characters present at the end of this string
30
+ # before the next parameter in the query parameter list
31
+ # ";"
32
+ new_url.gsub!(/[;,&].*/, '')
33
+
34
+ self.url = new_url
35
+ end
36
+ end
37
+
38
+ def domain_regex
39
+ # match only the repos/asf endpoint at the domain
40
+ "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
41
+ end
42
+
43
+ def remove_domain
44
+ url.sub!(/(git-wip-us\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
45
+ end
46
+
47
+ def remove_extra_segments
48
+ # by the time the URL gets here it should have been mostly pared down to the correct name
49
+ # however if the name was passed as a query parameter the ?p= is still at the front of the name
50
+ if url.is_a?(String) && url.start_with?("?p=")
51
+ self.url = url.split("=").last
52
+ end
53
+ end
54
+
55
+ def format_url
56
+ # ignore something if it comes in at as an Array at this point
57
+ url.is_a?(String) ? url : nil
58
+ end
59
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+ class ApacheGitboxUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://gitbox.apache.org/repos/asf'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'gitbox.apache'
15
+ end
16
+
17
+ def remove_querystring
18
+ # it is common for the name to be passed in as a query parameter so we need to keep them in
19
+ # the url string for now and process them in later steps to pull the name out of the parameter
20
+ url
21
+ end
22
+
23
+ def remove_equals_sign
24
+ # we need to preserve the p=<some_name> query parameter
25
+ splits = url.split('=')
26
+ p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
27
+ if p_index
28
+ new_url = splits[0..p_index+1].join("=") if p_index
29
+ # remove separator characters present at the end of this string
30
+ # before the next parameter in the query parameter list
31
+ # ";"
32
+ new_url.gsub!(/[;,&].*/, '')
33
+
34
+ self.url = new_url
35
+ end
36
+ end
37
+
38
+ def domain_regex
39
+ # match only the repos/asf endpoint at the domain
40
+ "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
41
+ end
42
+
43
+ def remove_domain
44
+ url.sub!(/(gitbox\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
45
+ end
46
+
47
+ def remove_extra_segments
48
+ # by the time the URL gets here it should have been mostly pared down to the correct name
49
+ # however if the name was passed as a query parameter the ?p= is still at the front of the name
50
+ if url.is_a?(String) && url.start_with?("?p=")
51
+ self.url = url.split("=").last
52
+ end
53
+ end
54
+
55
+ def format_url
56
+ # ignore something if it comes in at as an Array at this point
57
+ url.is_a?(String) ? url : nil
58
+ end
59
+ end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
  class ApacheSvnUrlParser < URLParser
3
3
  SUBDIR_NAMES = %w[trunk tags branches].freeze
4
+ VALID_PATHS = %w[viewvc viewcvs\.cgi repos\/asf].freeze
4
5
  private
5
6
 
6
7
  def full_domain
@@ -17,13 +18,13 @@ class ApacheSvnUrlParser < URLParser
17
18
 
18
19
  def domain_regex
19
20
  # match only the viewvc endpoint at the domain
20
- "#{domain.split("/").first}\.(#{tlds.join('|')})\/viewvc"
21
+ "#{domain}\.(#{tlds.join('|')})\/(#{VALID_PATHS.join("|")})"
21
22
  end
22
23
 
23
24
  def remove_domain
24
25
  # find the matches for any github domain characters in the url string
25
26
  # and replace only the first match incase we find a repo with something like github.com as the name
26
- url.sub!(/(apache\.org\/(viewvc|repos))+?(:|\/)?/i, '')
27
+ url.sub!(/(apache\.org\/(viewvc|repos\/asf|viewcvs\.cgi))+?(:|\/)?/i, '')
27
28
  end
28
29
 
29
30
  def extractable_early?
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+ class DrupalUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git.drupalcode.org/project'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git.drupalcode'
15
+ end
16
+
17
+ def remove_domain
18
+ # find the matches for any github domain characters in the url string
19
+ # and replace only the first match incase we find a repo with something like github.com as the name
20
+ url.sub!(/(drupalcode\.org\/project)+?(:|\/)?/i, '')
21
+ end
22
+
23
+ def format_url
24
+ # if this is an Array then the url has gone through all the clean up steps
25
+ return nil unless url.is_a?(Array) && url.length.positive?
26
+
27
+ url.join("/")
28
+ end
29
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+ class EclipseGitUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git.eclipse.org/c'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git.eclipse'
15
+ end
16
+
17
+ def remove_git_extension
18
+ # the repository names all end in .git on the website, so don't remove it here
19
+ nil
20
+ end
21
+
22
+ def remove_domain
23
+ url.sub!(/(eclipse\.org\/c)+?(:|\/)?/i, '')
24
+ end
25
+ end
@@ -5,7 +5,13 @@ require_relative "bitbucket_url_parser"
5
5
  require_relative "github_url_parser"
6
6
  require_relative "gitlab_url_parser"
7
7
  require_relative "apache_svn_url_parser"
8
+ require_relative "apache_git_wip_url_parser"
9
+ require_relative "apache_gitbox_url_parser"
10
+ require_relative "drupal_url_parser"
11
+ require_relative "eclipse_git_url_parser"
12
+ require_relative "android_googlesource_url_parser"
13
+ require_relative "sourceforge_url_parser"
8
14
 
9
15
  module LibrariesioURLParser
10
- VERSION = "1.0.2"
16
+ VERSION = "1.0.3"
11
17
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+ class SourceforgeUrlParser < URLParser
3
+ PROJECT_PATHS = %w[projects p].freeze
4
+ private
5
+
6
+ def full_domain
7
+ 'https://sourceforge.net/projects'
8
+ end
9
+
10
+ def tlds
11
+ %w(net)
12
+ end
13
+
14
+ def domain
15
+ 'sourceforge'
16
+ end
17
+
18
+ def remove_domain
19
+ url.sub!(/(sourceforge\.net\/(#{PROJECT_PATHS.join("|")}))+?(:|\/)?/i, '')
20
+ end
21
+
22
+ def extractable_early?
23
+ false
24
+ end
25
+
26
+ def remove_extra_segments
27
+ self.url = url.split('/').reject{ |s| s.strip.empty? }.first
28
+ end
29
+
30
+ def format_url
31
+ # the URL at this point should have been reduced down to a single string for the project name
32
+ return nil unless url.is_a?(String)
33
+
34
+ url
35
+ end
36
+ end
data/lib/url_parser.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  class URLParser
3
4
  def self.parse(url)
4
5
  new(url).parse
@@ -28,22 +29,27 @@ class URLParser
28
29
  end
29
30
 
30
31
  def self.try_all(url)
31
- GithubURLParser.parse_to_full_url(url) ||
32
- GitlabURLParser.parse_to_full_url(url) ||
33
- BitbucketURLParser.parse_to_full_url(url) ||
34
- ApacheSvnUrlParser.parse_to_full_url(url)
32
+ # run through all the subclasses and try their parse method
33
+ # exit the reduce at the first non nil value and return that
34
+ descendants.reduce(nil) do |_, n|
35
+ r = n.parse_to_full_url(url)
36
+ break r if r
37
+ end
35
38
  end
36
39
 
37
40
  def parse_to_full_url
38
41
  path = parse
39
42
  return nil if path.nil? || path.empty?
43
+
40
44
  [full_domain, path].join('/')
41
45
  end
42
46
 
43
47
  def parse_to_full_user_url
44
48
  return nil unless parseable?
49
+
45
50
  path = clean_url
46
51
  return nil unless path.length == 1
52
+
47
53
  [full_domain, path].join('/')
48
54
  end
49
55
 
@@ -69,6 +75,7 @@ class URLParser
69
75
 
70
76
  def format_url
71
77
  return nil unless url.length == 2
78
+
72
79
  url.join('/')
73
80
  end
74
81
 
@@ -84,14 +91,6 @@ class URLParser
84
91
  raise NotImplementedError
85
92
  end
86
93
 
87
- def includes_domain?
88
- raise NotImplementedError
89
- end
90
-
91
- def extractable_early?
92
- raise NotImplementedError
93
- end
94
-
95
94
  def domain_regex
96
95
  "#{domain}\.(#{tlds.join('|')})"
97
96
  end
@@ -162,4 +161,14 @@ class URLParser
162
161
  def remove_whitespace
163
162
  url.gsub!(/\s/, '')
164
163
  end
164
+
165
+ private_class_method def self.descendants
166
+ descendants = []
167
+ ObjectSpace.each_object(singleton_class) do |k|
168
+ next if k.singleton_class?
169
+
170
+ descendants.unshift k unless k == self
171
+ end
172
+ descendants
173
+ end
165
174
  end
@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
20
20
  spec.add_development_dependency "rake", "~> 12.0"
21
21
  spec.add_development_dependency "rspec", "~> 3.0"
22
22
  spec.add_development_dependency "rspec_junit_formatter", "~> 0.5"
23
+ spec.add_development_dependency "pry", "~> 0.14.1"
23
24
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: librariesio-url-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Pace
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-27 00:00:00.000000000 Z
11
+ date: 2022-08-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.5'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.14.1
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.14.1
55
69
  description:
56
70
  email:
57
71
  - matt.pace@tidelift.com
@@ -65,11 +79,18 @@ files:
65
79
  - Gemfile.lock
66
80
  - README.md
67
81
  - Rakefile
82
+ - bin/console
83
+ - lib/android_googlesource_url_parser.rb
84
+ - lib/apache_git_wip_url_parser.rb
85
+ - lib/apache_gitbox_url_parser.rb
68
86
  - lib/apache_svn_url_parser.rb
69
87
  - lib/bitbucket_url_parser.rb
88
+ - lib/drupal_url_parser.rb
89
+ - lib/eclipse_git_url_parser.rb
70
90
  - lib/github_url_parser.rb
71
91
  - lib/gitlab_url_parser.rb
72
92
  - lib/librariesio-url-parser.rb
93
+ - lib/sourceforge_url_parser.rb
73
94
  - lib/url_parser.rb
74
95
  - librariesio-url-parser.gemspec
75
96
  homepage: https://github.com/librariesio/librariesio-url-parser