librariesio-url-parser 1.0.2 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 39533f7a7c92f1b4bcccd5a9a46b3c2ae5d14f04da87b0d643339be188184dfc
4
- data.tar.gz: bcf4fd5128de3343a11f46b25fadf2cf0f0daf479a9f37824f2d0ac93534753b
3
+ metadata.gz: b7678a02447f9ecb298aca33105ed4a0a686258b39e1dbbf6069cadcc8bb5ee9
4
+ data.tar.gz: f5c7d4fa5dca4858e46f7740b33f4e54f67230e2801a2e2cfb00bfae7f1aad6a
5
5
  SHA512:
6
- metadata.gz: 27401dad229d48fd32ec999b91ddf171a708b847fdbfbbb21b283cf6b72e1f8280731ccd2f51a012bdfcf7d73f9087a7c00ab8b8db3109b55913a434be70dba1
7
- data.tar.gz: ed74167bc7f4b712209ecf9e0fddbafe7c5c9f77dcda9fcfb883b658d8e99ee8283d89a70f4ddecd2fd3a62912c5cb04c72358804dbedb44932e0adafae75594
6
+ metadata.gz: 940d8eccc878fe614347f7e18f381825f531cf2f14a0f22834bbd342e8ebf6be13e4daeb5c07e2627446ba5d5df76af8b634285731479ce6fe4d38807fb29326
7
+ data.tar.gz: 928daf9d94eeed789e059e099dd6346e7c6d1b4cbaee1f788d8d36dc2420fdd232c74a7ed68f051aa91efa26d1d6636107d89deee06e082d0fd7c2f6398b90ca
data/Gemfile CHANGED
@@ -2,4 +2,4 @@ source "https://rubygems.org"
2
2
  ruby "2.6.5"
3
3
 
4
4
  # Specify your gem's dependencies in librariesio-url-parser.gemspec
5
- gemspec
5
+ gemspec
data/Gemfile.lock CHANGED
@@ -1,12 +1,17 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- librariesio-url-parser (1.0.2)
4
+ librariesio-url-parser (1.0.5)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
+ coderay (1.1.3)
9
10
  diff-lcs (1.5.0)
11
+ method_source (1.0.0)
12
+ pry (0.14.1)
13
+ coderay (~> 1.1)
14
+ method_source (~> 1.0)
10
15
  rake (12.3.3)
11
16
  rspec (3.11.0)
12
17
  rspec-core (~> 3.11.0)
@@ -29,6 +34,7 @@ PLATFORMS
29
34
 
30
35
  DEPENDENCIES
31
36
  librariesio-url-parser!
37
+ pry (~> 0.14.1)
32
38
  rake (~> 12.0)
33
39
  rspec (~> 3.0)
34
40
  rspec_junit_formatter (~> 0.5)
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "librariesio-url-parser"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "pry"
14
+ Pry.start
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+ class AndroidGooglesourceUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://android.googlesource.com'
7
+ end
8
+
9
+ def tlds
10
+ %w(com)
11
+ end
12
+
13
+ def domain
14
+ 'android.googlesource'
15
+ end
16
+
17
+ def remove_domain
18
+ url.sub!(/(android\.googlesource\.com)+?(:|\/)?/i, '')
19
+ end
20
+
21
+ def remove_extra_segments
22
+ self.url = url.split('/').reject{ |s| s.strip.empty? }
23
+ end
24
+
25
+ def format_url
26
+ # if this is an Array then the url has gone through all the clean up steps
27
+ #
28
+ # if this is just a string then the url was not cleaned up and I have no idea how to format it
29
+ return nil unless url.is_a?(Array) && url.length.positive?
30
+
31
+ # the links that code into specific branches of the repository start with + in the path
32
+ # for example https://android.googlesource.com/device/amlogic/yukawa/ is the top level repository
33
+ # but looking at the master branch is the url
34
+ # https://android.googlesource.com/device/amlogic/yukawa/+/refs/heads/master
35
+ # and the same applies for tags
36
+ # https://android.googlesource.com/device/amlogic/yukawa/+/refs/tags/android-12.1.0_r16
37
+
38
+ self.url = url.join("/").split("+").first.chomp("/")
39
+ end
40
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+ class ApacheGitWipUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git-wip-us.apache.org/repos/asf'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git-wip-us.apache'
15
+ end
16
+
17
+ def remove_querystring
18
+ # it is common for the name to be passed in as a query parameter so we need to keep them in
19
+ # the url string for now and process them in later steps to pull the name out of the parameter
20
+ url
21
+ end
22
+
23
+ def remove_equals_sign
24
+ # we need to preserve the p=<some_name> query parameter
25
+ splits = url.split('=')
26
+ p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
27
+ if p_index
28
+ new_url = splits[0..p_index+1].join("=") if p_index
29
+ # remove separator characters present at the end of this string
30
+ # before the next parameter in the query parameter list
31
+ # ";"
32
+ new_url.gsub!(/[;,&].*/, '')
33
+
34
+ self.url = new_url
35
+ end
36
+ end
37
+
38
+ def domain_regex
39
+ # match only the repos/asf endpoint at the domain
40
+ "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
41
+ end
42
+
43
+ def remove_domain
44
+ url.sub!(/(git-wip-us\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
45
+ end
46
+
47
+ def remove_extra_segments
48
+ # by the time the URL gets here it should have been mostly pared down to the correct name
49
+ # however if the name was passed as a query parameter the ?p= is still at the front of the name
50
+ if url.is_a?(String) && url.start_with?("?p=")
51
+ self.url = url.split("=").last
52
+ end
53
+ end
54
+
55
+ def format_url
56
+ # ignore something if it comes in at as an Array at this point
57
+ url.is_a?(String) ? url : nil
58
+ end
59
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+ class ApacheGitboxUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://gitbox.apache.org/repos/asf'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'gitbox.apache'
15
+ end
16
+
17
+ def remove_querystring
18
+ # it is common for the name to be passed in as a query parameter so we need to keep them in
19
+ # the url string for now and process them in later steps to pull the name out of the parameter
20
+ url
21
+ end
22
+
23
+ def remove_equals_sign
24
+ # we need to preserve the p=<some_name> query parameter
25
+ splits = url.split('=')
26
+ p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
27
+ if p_index
28
+ new_url = splits[0..p_index+1].join("=") if p_index
29
+ # remove separator characters present at the end of this string
30
+ # before the next parameter in the query parameter list
31
+ # ";"
32
+ new_url.gsub!(/[;,&].*/, '')
33
+
34
+ self.url = new_url
35
+ end
36
+ end
37
+
38
+ def domain_regex
39
+ # match only the repos/asf endpoint at the domain
40
+ "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
41
+ end
42
+
43
+ def remove_domain
44
+ url.sub!(/(gitbox\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
45
+ end
46
+
47
+ def remove_extra_segments
48
+ # by the time the URL gets here it should have been mostly pared down to the correct name
49
+ # however if the name was passed as a query parameter the ?p= is still at the front of the name
50
+ if url.is_a?(String) && url.start_with?("?p=")
51
+ self.url = url.split("=").last
52
+ end
53
+ end
54
+
55
+ def format_url
56
+ # ignore something if it comes in at as an Array at this point
57
+ url.is_a?(String) ? url : nil
58
+ end
59
+ end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
  class ApacheSvnUrlParser < URLParser
3
3
  SUBDIR_NAMES = %w[trunk tags branches].freeze
4
+ VALID_PATHS = %w[viewvc viewcvs\.cgi repos\/asf].freeze
4
5
  private
5
6
 
6
7
  def full_domain
@@ -17,13 +18,13 @@ class ApacheSvnUrlParser < URLParser
17
18
 
18
19
  def domain_regex
19
20
  # match only the viewvc endpoint at the domain
20
- "#{domain.split("/").first}\.(#{tlds.join('|')})\/viewvc"
21
+ "#{domain}\.(#{tlds.join('|')})\/(#{VALID_PATHS.join("|")})"
21
22
  end
22
23
 
23
24
  def remove_domain
24
25
  # find the matches for any github domain characters in the url string
25
26
  # and replace only the first match incase we find a repo with something like github.com as the name
26
- url.sub!(/(apache\.org\/(viewvc|repos))+?(:|\/)?/i, '')
27
+ url.sub!(/(apache\.org\/(viewvc|repos\/asf|viewcvs\.cgi))+?(:|\/)?/i, '')
27
28
  end
28
29
 
29
30
  def extractable_early?
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+ class DrupalUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git.drupalcode.org/project'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git.drupalcode'
15
+ end
16
+
17
+ def remove_domain
18
+ # find the matches for any github domain characters in the url string
19
+ # and replace only the first match incase we find a repo with something like github.com as the name
20
+ url.sub!(/(drupalcode\.org\/project)+?(:|\/)?/i, '')
21
+ end
22
+
23
+ def format_url
24
+ # if this is an Array then the url has gone through all the clean up steps
25
+ return nil unless url.is_a?(Array) && url.length.positive?
26
+
27
+ url.join("/")
28
+ end
29
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+ class EclipseGitUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git.eclipse.org/c'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git.eclipse'
15
+ end
16
+
17
+ def remove_git_extension
18
+ # the repository names all end in .git on the website, so don't remove it here
19
+ nil
20
+ end
21
+
22
+ def remove_domain
23
+ url.sub!(/(eclipse\.org\/c)+?(:|\/)?/i, '')
24
+ end
25
+ end
@@ -5,7 +5,13 @@ require_relative "bitbucket_url_parser"
5
5
  require_relative "github_url_parser"
6
6
  require_relative "gitlab_url_parser"
7
7
  require_relative "apache_svn_url_parser"
8
+ require_relative "apache_git_wip_url_parser"
9
+ require_relative "apache_gitbox_url_parser"
10
+ require_relative "drupal_url_parser"
11
+ require_relative "eclipse_git_url_parser"
12
+ require_relative "android_googlesource_url_parser"
13
+ require_relative "sourceforge_url_parser"
8
14
 
9
15
  module LibrariesioURLParser
10
- VERSION = "1.0.2"
16
+ VERSION = "1.0.5"
11
17
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+ class SourceforgeUrlParser < URLParser
3
+ PROJECT_PATHS = %w[projects p].freeze
4
+ private
5
+
6
+ def full_domain
7
+ 'https://sourceforge.net/projects'
8
+ end
9
+
10
+ def tlds
11
+ %w(net)
12
+ end
13
+
14
+ def domain
15
+ 'sourceforge'
16
+ end
17
+
18
+ def remove_domain
19
+ url.sub!(/(sourceforge\.net\/(#{PROJECT_PATHS.join("|")}))+?(:|\/)?/i, '')
20
+ end
21
+
22
+ def extractable_early?
23
+ false
24
+ end
25
+
26
+ def remove_extra_segments
27
+ self.url = url.split('/').reject{ |s| s.strip.empty? }.first
28
+ end
29
+
30
+ def format_url
31
+ # the URL at this point should have been reduced down to a single string for the project name
32
+ return nil unless url.is_a?(String)
33
+
34
+ url
35
+ end
36
+ end
data/lib/url_parser.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  class URLParser
3
4
  def self.parse(url)
4
5
  new(url).parse
@@ -28,22 +29,27 @@ class URLParser
28
29
  end
29
30
 
30
31
  def self.try_all(url)
31
- GithubURLParser.parse_to_full_url(url) ||
32
- GitlabURLParser.parse_to_full_url(url) ||
33
- BitbucketURLParser.parse_to_full_url(url) ||
34
- ApacheSvnUrlParser.parse_to_full_url(url)
32
+ # run through all the subclasses and try their parse method
33
+ # exit the reduce at the first non nil value and return that
34
+ descendants.reduce(nil) do |_, n|
35
+ r = n.parse_to_full_url(url)
36
+ break r if r
37
+ end
35
38
  end
36
39
 
37
40
  def parse_to_full_url
38
41
  path = parse
39
42
  return nil if path.nil? || path.empty?
43
+
40
44
  [full_domain, path].join('/')
41
45
  end
42
46
 
43
47
  def parse_to_full_user_url
44
48
  return nil unless parseable?
49
+
45
50
  path = clean_url
46
51
  return nil unless path.length == 1
52
+
47
53
  [full_domain, path].join('/')
48
54
  end
49
55
 
@@ -59,7 +65,12 @@ class URLParser
59
65
  remove_auth_user
60
66
  remove_equals_sign
61
67
  remove_scheme
62
- return nil unless includes_domain?
68
+
69
+ unless includes_domain?
70
+ self.url = nil
71
+ return nil
72
+ end
73
+
63
74
  remove_subdomain
64
75
  remove_domain
65
76
  remove_git_extension
@@ -68,7 +79,9 @@ class URLParser
68
79
  end
69
80
 
70
81
  def format_url
82
+ return nil if url.nil?
71
83
  return nil unless url.length == 2
84
+
72
85
  url.join('/')
73
86
  end
74
87
 
@@ -84,14 +97,6 @@ class URLParser
84
97
  raise NotImplementedError
85
98
  end
86
99
 
87
- def includes_domain?
88
- raise NotImplementedError
89
- end
90
-
91
- def extractable_early?
92
- raise NotImplementedError
93
- end
94
-
95
100
  def domain_regex
96
101
  "#{domain}\.(#{tlds.join('|')})"
97
102
  end
@@ -162,4 +167,21 @@ class URLParser
162
167
  def remove_whitespace
163
168
  url.gsub!(/\s/, '')
164
169
  end
170
+
171
+ # This computation is memoized because it is expensive. This prevents use cases which require using
172
+ # .try_all in a tight loop. However, if this class is required directly (without requiring any subparsers),
173
+ # this method will memoize an empty array. It is recommended to simply require librariesio-url-parser.rb directly.
174
+ # This is the default behavior when installing this gem.
175
+ private_class_method def self.descendants
176
+ @descendants ||=
177
+ begin
178
+ descendants = []
179
+ ObjectSpace.each_object(singleton_class) do |k|
180
+ next if k.singleton_class?
181
+
182
+ descendants.unshift k unless k == self
183
+ end
184
+ descendants
185
+ end
186
+ end
165
187
  end
@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
20
20
  spec.add_development_dependency "rake", "~> 12.0"
21
21
  spec.add_development_dependency "rspec", "~> 3.0"
22
22
  spec.add_development_dependency "rspec_junit_formatter", "~> 0.5"
23
+ spec.add_development_dependency "pry", "~> 0.14.1"
23
24
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: librariesio-url-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Pace
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-27 00:00:00.000000000 Z
11
+ date: 2022-08-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -52,7 +52,21 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.5'
55
- description:
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.14.1
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.14.1
69
+ description:
56
70
  email:
57
71
  - matt.pace@tidelift.com
58
72
  executables: []
@@ -65,11 +79,18 @@ files:
65
79
  - Gemfile.lock
66
80
  - README.md
67
81
  - Rakefile
82
+ - bin/console
83
+ - lib/android_googlesource_url_parser.rb
84
+ - lib/apache_git_wip_url_parser.rb
85
+ - lib/apache_gitbox_url_parser.rb
68
86
  - lib/apache_svn_url_parser.rb
69
87
  - lib/bitbucket_url_parser.rb
88
+ - lib/drupal_url_parser.rb
89
+ - lib/eclipse_git_url_parser.rb
70
90
  - lib/github_url_parser.rb
71
91
  - lib/gitlab_url_parser.rb
72
92
  - lib/librariesio-url-parser.rb
93
+ - lib/sourceforge_url_parser.rb
73
94
  - lib/url_parser.rb
74
95
  - librariesio-url-parser.gemspec
75
96
  homepage: https://github.com/librariesio/librariesio-url-parser
@@ -77,7 +98,7 @@ licenses:
77
98
  - AGPL-3.0
78
99
  metadata:
79
100
  rubygems_mfa_required: 'true'
80
- post_install_message:
101
+ post_install_message:
81
102
  rdoc_options: []
82
103
  require_paths:
83
104
  - lib
@@ -92,8 +113,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
92
113
  - !ruby/object:Gem::Version
93
114
  version: '0'
94
115
  requirements: []
95
- rubygems_version: 3.0.3
96
- signing_key:
116
+ rubygems_version: 3.0.9
117
+ signing_key:
97
118
  specification_version: 4
98
119
  summary: Parse the URL for various repositories tracked by libraries.io
99
120
  test_files: []