librariesio-url-parser 1.0.0 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4095a2003d55e594f33b981cb4fa104b37ab2ffba7f6f3988b905a901afb9785
4
- data.tar.gz: 8dfbab7448f15692cd1064eb119cd55101d5fceab25dd806930780c48f3a65cd
3
+ metadata.gz: 1cbe610f7a8876b48f40ed8cb43058aff5e08abc55691dff82c7e3495dab9d20
4
+ data.tar.gz: bad609770a8779f49094c4f1d890051b51d0a85160cc586fa86312b5907cdbb3
5
5
  SHA512:
6
- metadata.gz: 5aa7f74fcbaa9d1dc7774d61eedcbf349028a4e3bef26dc4a9e3b07e3856cdc2559feff970a36becf6e2c4e10eb4be77b68969470f814cd422a47e4f800e940e
7
- data.tar.gz: f3dfc1047f3141e6a4f9ea39f7ac7514bbb607bd5f2577ac0809903614b3e008d7bb48916a01cb537886f5be0bad4fb793b9130e43d212d85fc0061bf1fb8161
6
+ metadata.gz: 88945a3e0dff0472969fdef9d09d5503420e41a539aeff5c46ce089c907190b8c8b1854af395f90a48ffb7248a63ac6df4ae285024678e1ac70dfb9c68f49474
7
+ data.tar.gz: 223accfcf45816de5185f3af8e2026f808810fc0371740a104f6654d5bca73f1fe2ff7ba8f374b6122e0126a86a2aacf1d3218004107c32552cf2c103b5c0885
data/Gemfile CHANGED
@@ -2,4 +2,4 @@ source "https://rubygems.org"
2
2
  ruby "2.6.5"
3
3
 
4
4
  # Specify your gem's dependencies in librariesio-url-parser.gemspec
5
- gemspec
5
+ gemspec
data/Gemfile.lock CHANGED
@@ -1,12 +1,17 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- librariesio-url-parser (1.0.0)
4
+ librariesio-url-parser (1.0.3)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
+ coderay (1.1.3)
9
10
  diff-lcs (1.5.0)
11
+ method_source (1.0.0)
12
+ pry (0.14.1)
13
+ coderay (~> 1.1)
14
+ method_source (~> 1.0)
10
15
  rake (12.3.3)
11
16
  rspec (3.11.0)
12
17
  rspec-core (~> 3.11.0)
@@ -29,6 +34,7 @@ PLATFORMS
29
34
 
30
35
  DEPENDENCIES
31
36
  librariesio-url-parser!
37
+ pry (~> 0.14.1)
32
38
  rake (~> 12.0)
33
39
  rspec (~> 3.0)
34
40
  rspec_junit_formatter (~> 0.5)
data/README.md CHANGED
@@ -45,6 +45,7 @@ URLParser.try_all("git@bitbucket.org:tildeslash/monit.git") #=> "https://bitbuck
45
45
  - GitHub
46
46
  - GitLab
47
47
  - Bitbucket
48
+ - Apache SVN
48
49
 
49
50
  ## Development
50
51
 
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "librariesio-url-parser"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "pry"
14
+ Pry.start
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+ class AndroidGooglesourceUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://android.googlesource.com'
7
+ end
8
+
9
+ def tlds
10
+ %w(com)
11
+ end
12
+
13
+ def domain
14
+ 'android.googlesource'
15
+ end
16
+
17
+ def remove_domain
18
+ url.sub!(/(android\.googlesource\.com)+?(:|\/)?/i, '')
19
+ end
20
+
21
+ def remove_extra_segments
22
+ self.url = url.split('/').reject{ |s| s.strip.empty? }
23
+ end
24
+
25
+ def format_url
26
+ # if this is an Array then the url has gone through all the clean up steps
27
+ #
28
+ # if this is just a string then the url was not cleaned up and I have no idea how to format it
29
+ return nil unless url.is_a?(Array) && url.length.positive?
30
+
31
+ # the links that code into specific branches of the repository start with + in the path
32
+ # for example https://android.googlesource.com/device/amlogic/yukawa/ is the top level repository
33
+ # but looking at the master branch is the url
34
+ # https://android.googlesource.com/device/amlogic/yukawa/+/refs/heads/master
35
+ # and the same applies for tags
36
+ # https://android.googlesource.com/device/amlogic/yukawa/+/refs/tags/android-12.1.0_r16
37
+
38
+ self.url = url.join("/").split("+").first.chomp("/")
39
+ end
40
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+ class ApacheGitWipUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git-wip-us.apache.org/repos/asf'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git-wip-us.apache'
15
+ end
16
+
17
+ def remove_querystring
18
+ # it is common for the name to be passed in as a query parameter so we need to keep them in
19
+ # the url string for now and process them in later steps to pull the name out of the parameter
20
+ url
21
+ end
22
+
23
+ def remove_equals_sign
24
+ # we need to preserve the p=<some_name> query parameter
25
+ splits = url.split('=')
26
+ p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
27
+ if p_index
28
+ new_url = splits[0..p_index+1].join("=") if p_index
29
+ # remove separator characters present at the end of this string
30
+ # before the next parameter in the query parameter list
31
+ # ";"
32
+ new_url.gsub!(/[;,&].*/, '')
33
+
34
+ self.url = new_url
35
+ end
36
+ end
37
+
38
+ def domain_regex
39
+ # match only the repos/asf endpoint at the domain
40
+ "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
41
+ end
42
+
43
+ def remove_domain
44
+ url.sub!(/(git-wip-us\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
45
+ end
46
+
47
+ def remove_extra_segments
48
+ # by the time the URL gets here it should have been mostly pared down to the correct name
49
+ # however if the name was passed as a query parameter the ?p= is still at the front of the name
50
+ if url.is_a?(String) && url.start_with?("?p=")
51
+ self.url = url.split("=").last
52
+ end
53
+ end
54
+
55
+ def format_url
56
+ # ignore something if it comes in at as an Array at this point
57
+ url.is_a?(String) ? url : nil
58
+ end
59
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+ class ApacheGitboxUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://gitbox.apache.org/repos/asf'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'gitbox.apache'
15
+ end
16
+
17
+ def remove_querystring
18
+ # it is common for the name to be passed in as a query parameter so we need to keep them in
19
+ # the url string for now and process them in later steps to pull the name out of the parameter
20
+ url
21
+ end
22
+
23
+ def remove_equals_sign
24
+ # we need to preserve the p=<some_name> query parameter
25
+ splits = url.split('=')
26
+ p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
27
+ if p_index
28
+ new_url = splits[0..p_index+1].join("=") if p_index
29
+ # remove separator characters present at the end of this string
30
+ # before the next parameter in the query parameter list
31
+ # ";"
32
+ new_url.gsub!(/[;,&].*/, '')
33
+
34
+ self.url = new_url
35
+ end
36
+ end
37
+
38
+ def domain_regex
39
+ # match only the repos/asf endpoint at the domain
40
+ "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
41
+ end
42
+
43
+ def remove_domain
44
+ url.sub!(/(gitbox\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
45
+ end
46
+
47
+ def remove_extra_segments
48
+ # by the time the URL gets here it should have been mostly pared down to the correct name
49
+ # however if the name was passed as a query parameter the ?p= is still at the front of the name
50
+ if url.is_a?(String) && url.start_with?("?p=")
51
+ self.url = url.split("=").last
52
+ end
53
+ end
54
+
55
+ def format_url
56
+ # ignore something if it comes in at as an Array at this point
57
+ url.is_a?(String) ? url : nil
58
+ end
59
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+ class ApacheSvnUrlParser < URLParser
3
+ SUBDIR_NAMES = %w[trunk tags branches].freeze
4
+ VALID_PATHS = %w[viewvc viewcvs\.cgi repos\/asf].freeze
5
+ private
6
+
7
+ def full_domain
8
+ 'https://svn.apache.org/viewvc'
9
+ end
10
+
11
+ def tlds
12
+ %w(org)
13
+ end
14
+
15
+ def domain
16
+ 'svn.apache'
17
+ end
18
+
19
+ def domain_regex
20
+ # match only the viewvc endpoint at the domain
21
+ "#{domain}\.(#{tlds.join('|')})\/(#{VALID_PATHS.join("|")})"
22
+ end
23
+
24
+ def remove_domain
25
+ # find the matches for any github domain characters in the url string
26
+ # and replace only the first match incase we find a repo with something like github.com as the name
27
+ url.sub!(/(apache\.org\/(viewvc|repos\/asf|viewcvs\.cgi))+?(:|\/)?/i, '')
28
+ end
29
+
30
+ def extractable_early?
31
+ false
32
+ end
33
+
34
+ def remove_extra_segments
35
+ # split the url by / and remove any empty sections
36
+ self.url = url.split('/').reject{ |s| s.strip.empty? }
37
+
38
+ # check to see if any repository subdirectories are included in the segments
39
+ # this parser is parsing SVN projects, so any common folders used for branching should trip this
40
+ # truncate the array of segments to stop once we hit a top level sub directory typically seen in SVN repos
41
+ # and return everything up to that point
42
+ #
43
+ # for example apache.org/viewvnc/myproject/subproject/tags/my-1.0.0-release should stop at myproject/subproject
44
+ # since the tags are just part of that repository
45
+ subdir_index = url.index{ |s| SUBDIR_NAMES.include?(s) }
46
+
47
+ # it looks like the maven/pom directory on the Apache SVN server has a bunch of repositories stored under tags
48
+ # in this special case, grab the directory name under the subdirectory
49
+ # it looks like this is most likely to be the first directory under tags/
50
+ in_maven_pom_dir = url[0..1].join("/").downcase == "maven/pom"
51
+
52
+ if in_maven_pom_dir
53
+ self.url = url[0..subdir_index+1] if subdir_index
54
+ else
55
+ self.url = url[0..subdir_index-1] if subdir_index
56
+ end
57
+ end
58
+
59
+ def format_url
60
+ # if this is an Array then the url has gone through all the clean up steps
61
+ #
62
+ # if this is just a string then the url was not cleaned up and I have no idea how to format it
63
+ return nil unless url.is_a?(Array) && url.length.positive?
64
+
65
+ url.join("/")
66
+ end
67
+ end
@@ -15,6 +15,8 @@ class BitbucketURLParser < URLParser
15
15
  end
16
16
 
17
17
  def remove_domain
18
- url.gsub!(/(bitbucket.com|bitbucket.org)+?(:|\/)?/i, '')
18
+ # find the matches for any github domain characters in the url string
19
+ # and replace only the first match incase we find a repo with something like github.com as the name
20
+ url.sub!(/(bitbucket\.com|bitbucket\.org)+?(:|\/)?/i, '')
19
21
  end
20
22
  end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+ class DrupalUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git.drupalcode.org/project'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git.drupalcode'
15
+ end
16
+
17
+ def remove_domain
18
+ # find the matches for any github domain characters in the url string
19
+ # and replace only the first match incase we find a repo with something like github.com as the name
20
+ url.sub!(/(drupalcode\.org\/project)+?(:|\/)?/i, '')
21
+ end
22
+
23
+ def format_url
24
+ # if this is an Array then the url has gone through all the clean up steps
25
+ return nil unless url.is_a?(Array) && url.length.positive?
26
+
27
+ url.join("/")
28
+ end
29
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+ class EclipseGitUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git.eclipse.org/c'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git.eclipse'
15
+ end
16
+
17
+ def remove_git_extension
18
+ # the repository names all end in .git on the website, so don't remove it here
19
+ nil
20
+ end
21
+
22
+ def remove_domain
23
+ url.sub!(/(eclipse\.org\/c)+?(:|\/)?/i, '')
24
+ end
25
+ end
@@ -15,6 +15,8 @@ class GithubURLParser < URLParser
15
15
  end
16
16
 
17
17
  def remove_domain
18
- url.gsub!(/(github.io|github.com|github.org|raw.githubusercontent.com)+?(:|\/)?/i, '')
18
+ # find the matches for any github domain characters in the url string
19
+ # and replace only the first match incase we find a repo with something like github.com as the name
20
+ url.sub!(/(github\.io|github\.com|github\.org|raw\.githubusercontent\.com)+?(:|\/)?/i, '')
19
21
  end
20
22
  end
@@ -15,6 +15,8 @@ class GitlabURLParser < URLParser
15
15
  end
16
16
 
17
17
  def remove_domain
18
- url.gsub!(/(gitlab.com)+?(:|\/)?/i, '')
18
+ # find the matches for any github domain characters in the url string
19
+ # and replace only the first match incase we find a repo with something like github.com as the name
20
+ url.sub!(/(gitlab\.com)+?(:|\/)?/i, '')
19
21
  end
20
22
  end
@@ -4,7 +4,14 @@ require_relative "url_parser"
4
4
  require_relative "bitbucket_url_parser"
5
5
  require_relative "github_url_parser"
6
6
  require_relative "gitlab_url_parser"
7
+ require_relative "apache_svn_url_parser"
8
+ require_relative "apache_git_wip_url_parser"
9
+ require_relative "apache_gitbox_url_parser"
10
+ require_relative "drupal_url_parser"
11
+ require_relative "eclipse_git_url_parser"
12
+ require_relative "android_googlesource_url_parser"
13
+ require_relative "sourceforge_url_parser"
7
14
 
8
15
  module LibrariesioURLParser
9
- VERSION = "1.0.0"
16
+ VERSION = "1.0.3"
10
17
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+ class SourceforgeUrlParser < URLParser
3
+ PROJECT_PATHS = %w[projects p].freeze
4
+ private
5
+
6
+ def full_domain
7
+ 'https://sourceforge.net/projects'
8
+ end
9
+
10
+ def tlds
11
+ %w(net)
12
+ end
13
+
14
+ def domain
15
+ 'sourceforge'
16
+ end
17
+
18
+ def remove_domain
19
+ url.sub!(/(sourceforge\.net\/(#{PROJECT_PATHS.join("|")}))+?(:|\/)?/i, '')
20
+ end
21
+
22
+ def extractable_early?
23
+ false
24
+ end
25
+
26
+ def remove_extra_segments
27
+ self.url = url.split('/').reject{ |s| s.strip.empty? }.first
28
+ end
29
+
30
+ def format_url
31
+ # the URL at this point should have been reduced down to a single string for the project name
32
+ return nil unless url.is_a?(String)
33
+
34
+ url
35
+ end
36
+ end
data/lib/url_parser.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  class URLParser
3
4
  def self.parse(url)
4
5
  new(url).parse
@@ -28,21 +29,27 @@ class URLParser
28
29
  end
29
30
 
30
31
  def self.try_all(url)
31
- GithubURLParser.parse_to_full_url(url) ||
32
- GitlabURLParser.parse_to_full_url(url) ||
33
- BitbucketURLParser.parse_to_full_url(url)
32
+ # run through all the subclasses and try their parse method
33
+ # exit the reduce at the first non nil value and return that
34
+ descendants.reduce(nil) do |_, n|
35
+ r = n.parse_to_full_url(url)
36
+ break r if r
37
+ end
34
38
  end
35
39
 
36
40
  def parse_to_full_url
37
41
  path = parse
38
- return nil unless path.present?
42
+ return nil if path.nil? || path.empty?
43
+
39
44
  [full_domain, path].join('/')
40
45
  end
41
46
 
42
47
  def parse_to_full_user_url
43
48
  return nil unless parseable?
49
+
44
50
  path = clean_url
45
51
  return nil unless path.length == 1
52
+
46
53
  [full_domain, path].join('/')
47
54
  end
48
55
 
@@ -68,6 +75,7 @@ class URLParser
68
75
 
69
76
  def format_url
70
77
  return nil unless url.length == 2
78
+
71
79
  url.join('/')
72
80
  end
73
81
 
@@ -83,14 +91,6 @@ class URLParser
83
91
  raise NotImplementedError
84
92
  end
85
93
 
86
- def includes_domain?
87
- raise NotImplementedError
88
- end
89
-
90
- def extractable_early?
91
- raise NotImplementedError
92
- end
93
-
94
94
  def domain_regex
95
95
  "#{domain}\.(#{tlds.join('|')})"
96
96
  end
@@ -151,14 +151,24 @@ class URLParser
151
151
  end
152
152
 
153
153
  def remove_scheme
154
- url.gsub!(/(((git\+https|git|ssh|hg|svn|scm|http|https)+?:)+?)/i, '')
154
+ url.gsub!(/(((git\+https|git|ssh|hg|svn|scm|http|https)+?:)(\/\/)?)/i, '')
155
155
  end
156
156
 
157
157
  def remove_subdomain
158
- url.gsub!(/(www|ssh|raw|git|wiki)+?\./i, '')
158
+ url.gsub!(/(www|ssh|raw|git|wiki|svn)+?\./i, '')
159
159
  end
160
160
 
161
161
  def remove_whitespace
162
162
  url.gsub!(/\s/, '')
163
163
  end
164
+
165
+ private_class_method def self.descendants
166
+ descendants = []
167
+ ObjectSpace.each_object(singleton_class) do |k|
168
+ next if k.singleton_class?
169
+
170
+ descendants.unshift k unless k == self
171
+ end
172
+ descendants
173
+ end
164
174
  end
@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
20
20
  spec.add_development_dependency "rake", "~> 12.0"
21
21
  spec.add_development_dependency "rspec", "~> 3.0"
22
22
  spec.add_development_dependency "rspec_junit_formatter", "~> 0.5"
23
+ spec.add_development_dependency "pry", "~> 0.14.1"
23
24
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: librariesio-url-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Pace
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-13 00:00:00.000000000 Z
11
+ date: 2022-08-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -52,7 +52,21 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.5'
55
- description:
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.14.1
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.14.1
69
+ description:
56
70
  email:
57
71
  - matt.pace@tidelift.com
58
72
  executables: []
@@ -65,10 +79,18 @@ files:
65
79
  - Gemfile.lock
66
80
  - README.md
67
81
  - Rakefile
82
+ - bin/console
83
+ - lib/android_googlesource_url_parser.rb
84
+ - lib/apache_git_wip_url_parser.rb
85
+ - lib/apache_gitbox_url_parser.rb
86
+ - lib/apache_svn_url_parser.rb
68
87
  - lib/bitbucket_url_parser.rb
88
+ - lib/drupal_url_parser.rb
89
+ - lib/eclipse_git_url_parser.rb
69
90
  - lib/github_url_parser.rb
70
91
  - lib/gitlab_url_parser.rb
71
92
  - lib/librariesio-url-parser.rb
93
+ - lib/sourceforge_url_parser.rb
72
94
  - lib/url_parser.rb
73
95
  - librariesio-url-parser.gemspec
74
96
  homepage: https://github.com/librariesio/librariesio-url-parser
@@ -76,7 +98,7 @@ licenses:
76
98
  - AGPL-3.0
77
99
  metadata:
78
100
  rubygems_mfa_required: 'true'
79
- post_install_message:
101
+ post_install_message:
80
102
  rdoc_options: []
81
103
  require_paths:
82
104
  - lib
@@ -91,8 +113,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
91
113
  - !ruby/object:Gem::Version
92
114
  version: '0'
93
115
  requirements: []
94
- rubygems_version: 3.0.9
95
- signing_key:
116
+ rubygems_version: 3.0.3
117
+ signing_key:
96
118
  specification_version: 4
97
119
  summary: Parse the URL for various repositories tracked by libraries.io
98
120
  test_files: []