librariesio-url-parser 1.0.0 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4095a2003d55e594f33b981cb4fa104b37ab2ffba7f6f3988b905a901afb9785
4
- data.tar.gz: 8dfbab7448f15692cd1064eb119cd55101d5fceab25dd806930780c48f3a65cd
3
+ metadata.gz: 1cbe610f7a8876b48f40ed8cb43058aff5e08abc55691dff82c7e3495dab9d20
4
+ data.tar.gz: bad609770a8779f49094c4f1d890051b51d0a85160cc586fa86312b5907cdbb3
5
5
  SHA512:
6
- metadata.gz: 5aa7f74fcbaa9d1dc7774d61eedcbf349028a4e3bef26dc4a9e3b07e3856cdc2559feff970a36becf6e2c4e10eb4be77b68969470f814cd422a47e4f800e940e
7
- data.tar.gz: f3dfc1047f3141e6a4f9ea39f7ac7514bbb607bd5f2577ac0809903614b3e008d7bb48916a01cb537886f5be0bad4fb793b9130e43d212d85fc0061bf1fb8161
6
+ metadata.gz: 88945a3e0dff0472969fdef9d09d5503420e41a539aeff5c46ce089c907190b8c8b1854af395f90a48ffb7248a63ac6df4ae285024678e1ac70dfb9c68f49474
7
+ data.tar.gz: 223accfcf45816de5185f3af8e2026f808810fc0371740a104f6654d5bca73f1fe2ff7ba8f374b6122e0126a86a2aacf1d3218004107c32552cf2c103b5c0885
data/Gemfile CHANGED
@@ -2,4 +2,4 @@ source "https://rubygems.org"
2
2
  ruby "2.6.5"
3
3
 
4
4
  # Specify your gem's dependencies in librariesio-url-parser.gemspec
5
- gemspec
5
+ gemspec
data/Gemfile.lock CHANGED
@@ -1,12 +1,17 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- librariesio-url-parser (1.0.0)
4
+ librariesio-url-parser (1.0.3)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
+ coderay (1.1.3)
9
10
  diff-lcs (1.5.0)
11
+ method_source (1.0.0)
12
+ pry (0.14.1)
13
+ coderay (~> 1.1)
14
+ method_source (~> 1.0)
10
15
  rake (12.3.3)
11
16
  rspec (3.11.0)
12
17
  rspec-core (~> 3.11.0)
@@ -29,6 +34,7 @@ PLATFORMS
29
34
 
30
35
  DEPENDENCIES
31
36
  librariesio-url-parser!
37
+ pry (~> 0.14.1)
32
38
  rake (~> 12.0)
33
39
  rspec (~> 3.0)
34
40
  rspec_junit_formatter (~> 0.5)
data/README.md CHANGED
@@ -45,6 +45,7 @@ URLParser.try_all("git@bitbucket.org:tildeslash/monit.git") #=> "https://bitbuck
45
45
  - GitHub
46
46
  - GitLab
47
47
  - Bitbucket
48
+ - Apache SVN
48
49
 
49
50
  ## Development
50
51
 
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "librariesio-url-parser"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "pry"
14
+ Pry.start
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+ class AndroidGooglesourceUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://android.googlesource.com'
7
+ end
8
+
9
+ def tlds
10
+ %w(com)
11
+ end
12
+
13
+ def domain
14
+ 'android.googlesource'
15
+ end
16
+
17
+ def remove_domain
18
+ url.sub!(/(android\.googlesource\.com)+?(:|\/)?/i, '')
19
+ end
20
+
21
+ def remove_extra_segments
22
+ self.url = url.split('/').reject{ |s| s.strip.empty? }
23
+ end
24
+
25
+ def format_url
26
+ # if this is an Array then the url has gone through all the clean up steps
27
+ #
28
+ # if this is just a string then the url was not cleaned up and I have no idea how to format it
29
+ return nil unless url.is_a?(Array) && url.length.positive?
30
+
31
+ # the links that code into specific branches of the repository start with + in the path
32
+ # for example https://android.googlesource.com/device/amlogic/yukawa/ is the top level repository
33
+ # but looking at the master branch is the url
34
+ # https://android.googlesource.com/device/amlogic/yukawa/+/refs/heads/master
35
+ # and the same applies for tags
36
+ # https://android.googlesource.com/device/amlogic/yukawa/+/refs/tags/android-12.1.0_r16
37
+
38
+ self.url = url.join("/").split("+").first.chomp("/")
39
+ end
40
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+ class ApacheGitWipUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git-wip-us.apache.org/repos/asf'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git-wip-us.apache'
15
+ end
16
+
17
+ def remove_querystring
18
+ # it is common for the name to be passed in as a query parameter so we need to keep them in
19
+ # the url string for now and process them in later steps to pull the name out of the parameter
20
+ url
21
+ end
22
+
23
+ def remove_equals_sign
24
+ # we need to preserve the p=<some_name> query parameter
25
+ splits = url.split('=')
26
+ p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
27
+ if p_index
28
+ new_url = splits[0..p_index+1].join("=") if p_index
29
+ # remove separator characters present at the end of this string
30
+ # before the next parameter in the query parameter list
31
+ # ";"
32
+ new_url.gsub!(/[;,&].*/, '')
33
+
34
+ self.url = new_url
35
+ end
36
+ end
37
+
38
+ def domain_regex
39
+ # match only the repos/asf endpoint at the domain
40
+ "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
41
+ end
42
+
43
+ def remove_domain
44
+ url.sub!(/(git-wip-us\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
45
+ end
46
+
47
+ def remove_extra_segments
48
+ # by the time the URL gets here it should have been mostly pared down to the correct name
49
+ # however if the name was passed as a query parameter the ?p= is still at the front of the name
50
+ if url.is_a?(String) && url.start_with?("?p=")
51
+ self.url = url.split("=").last
52
+ end
53
+ end
54
+
55
+ def format_url
56
+ # ignore something if it comes in at as an Array at this point
57
+ url.is_a?(String) ? url : nil
58
+ end
59
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+ class ApacheGitboxUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://gitbox.apache.org/repos/asf'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'gitbox.apache'
15
+ end
16
+
17
+ def remove_querystring
18
+ # it is common for the name to be passed in as a query parameter so we need to keep them in
19
+ # the url string for now and process them in later steps to pull the name out of the parameter
20
+ url
21
+ end
22
+
23
+ def remove_equals_sign
24
+ # we need to preserve the p=<some_name> query parameter
25
+ splits = url.split('=')
26
+ p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
27
+ if p_index
28
+ new_url = splits[0..p_index+1].join("=") if p_index
29
+ # remove separator characters present at the end of this string
30
+ # before the next parameter in the query parameter list
31
+ # ";"
32
+ new_url.gsub!(/[;,&].*/, '')
33
+
34
+ self.url = new_url
35
+ end
36
+ end
37
+
38
+ def domain_regex
39
+ # match only the repos/asf endpoint at the domain
40
+ "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
41
+ end
42
+
43
+ def remove_domain
44
+ url.sub!(/(gitbox\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
45
+ end
46
+
47
+ def remove_extra_segments
48
+ # by the time the URL gets here it should have been mostly pared down to the correct name
49
+ # however if the name was passed as a query parameter the ?p= is still at the front of the name
50
+ if url.is_a?(String) && url.start_with?("?p=")
51
+ self.url = url.split("=").last
52
+ end
53
+ end
54
+
55
+ def format_url
56
+ # ignore something if it comes in at as an Array at this point
57
+ url.is_a?(String) ? url : nil
58
+ end
59
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+ class ApacheSvnUrlParser < URLParser
3
+ SUBDIR_NAMES = %w[trunk tags branches].freeze
4
+ VALID_PATHS = %w[viewvc viewcvs\.cgi repos\/asf].freeze
5
+ private
6
+
7
+ def full_domain
8
+ 'https://svn.apache.org/viewvc'
9
+ end
10
+
11
+ def tlds
12
+ %w(org)
13
+ end
14
+
15
+ def domain
16
+ 'svn.apache'
17
+ end
18
+
19
+ def domain_regex
20
+ # match only the viewvc endpoint at the domain
21
+ "#{domain}\.(#{tlds.join('|')})\/(#{VALID_PATHS.join("|")})"
22
+ end
23
+
24
+ def remove_domain
25
+ # find the matches for any github domain characters in the url string
26
+ # and replace only the first match incase we find a repo with something like github.com as the name
27
+ url.sub!(/(apache\.org\/(viewvc|repos\/asf|viewcvs\.cgi))+?(:|\/)?/i, '')
28
+ end
29
+
30
+ def extractable_early?
31
+ false
32
+ end
33
+
34
+ def remove_extra_segments
35
+ # split the url by / and remove any empty sections
36
+ self.url = url.split('/').reject{ |s| s.strip.empty? }
37
+
38
+ # check to see if any repository subdirectories are included in the segments
39
+ # this parser is parsing SVN projects, so any common folders used for branching should trip this
40
+ # truncate the array of segments to stop once we hit a top level sub directory typically seen in SVN repos
41
+ # and return everything up to that point
42
+ #
43
+ # for example apache.org/viewvnc/myproject/subproject/tags/my-1.0.0-release should stop at myproject/subproject
44
+ # since the tags are just part of that repository
45
+ subdir_index = url.index{ |s| SUBDIR_NAMES.include?(s) }
46
+
47
+ # it looks like the maven/pom directory on the Apache SVN server has a bunch of repositories stored under tags
48
+ # in this special case, grab the directory name under the subdirectory
49
+ # it looks like this is most likely to be the first directory under tags/
50
+ in_maven_pom_dir = url[0..1].join("/").downcase == "maven/pom"
51
+
52
+ if in_maven_pom_dir
53
+ self.url = url[0..subdir_index+1] if subdir_index
54
+ else
55
+ self.url = url[0..subdir_index-1] if subdir_index
56
+ end
57
+ end
58
+
59
+ def format_url
60
+ # if this is an Array then the url has gone through all the clean up steps
61
+ #
62
+ # if this is just a string then the url was not cleaned up and I have no idea how to format it
63
+ return nil unless url.is_a?(Array) && url.length.positive?
64
+
65
+ url.join("/")
66
+ end
67
+ end
@@ -15,6 +15,8 @@ class BitbucketURLParser < URLParser
15
15
  end
16
16
 
17
17
  def remove_domain
18
- url.gsub!(/(bitbucket.com|bitbucket.org)+?(:|\/)?/i, '')
18
+ # find the matches for any github domain characters in the url string
19
+ # and replace only the first match incase we find a repo with something like github.com as the name
20
+ url.sub!(/(bitbucket\.com|bitbucket\.org)+?(:|\/)?/i, '')
19
21
  end
20
22
  end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+ class DrupalUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git.drupalcode.org/project'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git.drupalcode'
15
+ end
16
+
17
+ def remove_domain
18
+ # find the matches for any github domain characters in the url string
19
+ # and replace only the first match incase we find a repo with something like github.com as the name
20
+ url.sub!(/(drupalcode\.org\/project)+?(:|\/)?/i, '')
21
+ end
22
+
23
+ def format_url
24
+ # if this is an Array then the url has gone through all the clean up steps
25
+ return nil unless url.is_a?(Array) && url.length.positive?
26
+
27
+ url.join("/")
28
+ end
29
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+ class EclipseGitUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git.eclipse.org/c'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git.eclipse'
15
+ end
16
+
17
+ def remove_git_extension
18
+ # the repository names all end in .git on the website, so don't remove it here
19
+ nil
20
+ end
21
+
22
+ def remove_domain
23
+ url.sub!(/(eclipse\.org\/c)+?(:|\/)?/i, '')
24
+ end
25
+ end
@@ -15,6 +15,8 @@ class GithubURLParser < URLParser
15
15
  end
16
16
 
17
17
  def remove_domain
18
- url.gsub!(/(github.io|github.com|github.org|raw.githubusercontent.com)+?(:|\/)?/i, '')
18
+ # find the matches for any github domain characters in the url string
19
+ # and replace only the first match incase we find a repo with something like github.com as the name
20
+ url.sub!(/(github\.io|github\.com|github\.org|raw\.githubusercontent\.com)+?(:|\/)?/i, '')
19
21
  end
20
22
  end
@@ -15,6 +15,8 @@ class GitlabURLParser < URLParser
15
15
  end
16
16
 
17
17
  def remove_domain
18
- url.gsub!(/(gitlab.com)+?(:|\/)?/i, '')
18
+ # find the matches for any github domain characters in the url string
19
+ # and replace only the first match incase we find a repo with something like github.com as the name
20
+ url.sub!(/(gitlab\.com)+?(:|\/)?/i, '')
19
21
  end
20
22
  end
@@ -4,7 +4,14 @@ require_relative "url_parser"
4
4
  require_relative "bitbucket_url_parser"
5
5
  require_relative "github_url_parser"
6
6
  require_relative "gitlab_url_parser"
7
+ require_relative "apache_svn_url_parser"
8
+ require_relative "apache_git_wip_url_parser"
9
+ require_relative "apache_gitbox_url_parser"
10
+ require_relative "drupal_url_parser"
11
+ require_relative "eclipse_git_url_parser"
12
+ require_relative "android_googlesource_url_parser"
13
+ require_relative "sourceforge_url_parser"
7
14
 
8
15
  module LibrariesioURLParser
9
- VERSION = "1.0.0"
16
+ VERSION = "1.0.3"
10
17
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+ class SourceforgeUrlParser < URLParser
3
+ PROJECT_PATHS = %w[projects p].freeze
4
+ private
5
+
6
+ def full_domain
7
+ 'https://sourceforge.net/projects'
8
+ end
9
+
10
+ def tlds
11
+ %w(net)
12
+ end
13
+
14
+ def domain
15
+ 'sourceforge'
16
+ end
17
+
18
+ def remove_domain
19
+ url.sub!(/(sourceforge\.net\/(#{PROJECT_PATHS.join("|")}))+?(:|\/)?/i, '')
20
+ end
21
+
22
+ def extractable_early?
23
+ false
24
+ end
25
+
26
+ def remove_extra_segments
27
+ self.url = url.split('/').reject{ |s| s.strip.empty? }.first
28
+ end
29
+
30
+ def format_url
31
+ # the URL at this point should have been reduced down to a single string for the project name
32
+ return nil unless url.is_a?(String)
33
+
34
+ url
35
+ end
36
+ end
data/lib/url_parser.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  class URLParser
3
4
  def self.parse(url)
4
5
  new(url).parse
@@ -28,21 +29,27 @@ class URLParser
28
29
  end
29
30
 
30
31
  def self.try_all(url)
31
- GithubURLParser.parse_to_full_url(url) ||
32
- GitlabURLParser.parse_to_full_url(url) ||
33
- BitbucketURLParser.parse_to_full_url(url)
32
+ # run through all the subclasses and try their parse method
33
+ # exit the reduce at the first non nil value and return that
34
+ descendants.reduce(nil) do |_, n|
35
+ r = n.parse_to_full_url(url)
36
+ break r if r
37
+ end
34
38
  end
35
39
 
36
40
  def parse_to_full_url
37
41
  path = parse
38
- return nil unless path.present?
42
+ return nil if path.nil? || path.empty?
43
+
39
44
  [full_domain, path].join('/')
40
45
  end
41
46
 
42
47
  def parse_to_full_user_url
43
48
  return nil unless parseable?
49
+
44
50
  path = clean_url
45
51
  return nil unless path.length == 1
52
+
46
53
  [full_domain, path].join('/')
47
54
  end
48
55
 
@@ -68,6 +75,7 @@ class URLParser
68
75
 
69
76
  def format_url
70
77
  return nil unless url.length == 2
78
+
71
79
  url.join('/')
72
80
  end
73
81
 
@@ -83,14 +91,6 @@ class URLParser
83
91
  raise NotImplementedError
84
92
  end
85
93
 
86
- def includes_domain?
87
- raise NotImplementedError
88
- end
89
-
90
- def extractable_early?
91
- raise NotImplementedError
92
- end
93
-
94
94
  def domain_regex
95
95
  "#{domain}\.(#{tlds.join('|')})"
96
96
  end
@@ -151,14 +151,24 @@ class URLParser
151
151
  end
152
152
 
153
153
  def remove_scheme
154
- url.gsub!(/(((git\+https|git|ssh|hg|svn|scm|http|https)+?:)+?)/i, '')
154
+ url.gsub!(/(((git\+https|git|ssh|hg|svn|scm|http|https)+?:)(\/\/)?)/i, '')
155
155
  end
156
156
 
157
157
  def remove_subdomain
158
- url.gsub!(/(www|ssh|raw|git|wiki)+?\./i, '')
158
+ url.gsub!(/(www|ssh|raw|git|wiki|svn)+?\./i, '')
159
159
  end
160
160
 
161
161
  def remove_whitespace
162
162
  url.gsub!(/\s/, '')
163
163
  end
164
+
165
+ private_class_method def self.descendants
166
+ descendants = []
167
+ ObjectSpace.each_object(singleton_class) do |k|
168
+ next if k.singleton_class?
169
+
170
+ descendants.unshift k unless k == self
171
+ end
172
+ descendants
173
+ end
164
174
  end
@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
20
20
  spec.add_development_dependency "rake", "~> 12.0"
21
21
  spec.add_development_dependency "rspec", "~> 3.0"
22
22
  spec.add_development_dependency "rspec_junit_formatter", "~> 0.5"
23
+ spec.add_development_dependency "pry", "~> 0.14.1"
23
24
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: librariesio-url-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Pace
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-13 00:00:00.000000000 Z
11
+ date: 2022-08-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -52,7 +52,21 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.5'
55
- description:
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.14.1
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.14.1
69
+ description:
56
70
  email:
57
71
  - matt.pace@tidelift.com
58
72
  executables: []
@@ -65,10 +79,18 @@ files:
65
79
  - Gemfile.lock
66
80
  - README.md
67
81
  - Rakefile
82
+ - bin/console
83
+ - lib/android_googlesource_url_parser.rb
84
+ - lib/apache_git_wip_url_parser.rb
85
+ - lib/apache_gitbox_url_parser.rb
86
+ - lib/apache_svn_url_parser.rb
68
87
  - lib/bitbucket_url_parser.rb
88
+ - lib/drupal_url_parser.rb
89
+ - lib/eclipse_git_url_parser.rb
69
90
  - lib/github_url_parser.rb
70
91
  - lib/gitlab_url_parser.rb
71
92
  - lib/librariesio-url-parser.rb
93
+ - lib/sourceforge_url_parser.rb
72
94
  - lib/url_parser.rb
73
95
  - librariesio-url-parser.gemspec
74
96
  homepage: https://github.com/librariesio/librariesio-url-parser
@@ -76,7 +98,7 @@ licenses:
76
98
  - AGPL-3.0
77
99
  metadata:
78
100
  rubygems_mfa_required: 'true'
79
- post_install_message:
101
+ post_install_message:
80
102
  rdoc_options: []
81
103
  require_paths:
82
104
  - lib
@@ -91,8 +113,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
91
113
  - !ruby/object:Gem::Version
92
114
  version: '0'
93
115
  requirements: []
94
- rubygems_version: 3.0.9
95
- signing_key:
116
+ rubygems_version: 3.0.3
117
+ signing_key:
96
118
  specification_version: 4
97
119
  summary: Parse the URL for various repositories tracked by libraries.io
98
120
  test_files: []