librariesio-url-parser 1.0.1 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: df16d2958656fa866177551cb6c6275e6b8303c40d41d3baeb639aff567aad07
4
- data.tar.gz: ec4443d5e407716d312746c7157d698a21b9c10eaeea208f42101b42688f4b47
3
+ metadata.gz: 26ec3df36120630b6729b1d5a9b7d854acfeadede419ab34e895d0e00619fb7c
4
+ data.tar.gz: 7bf32628de41c3a243b08ee3b8aa8489e5dfbfef5fe673e7f68390a6f942dbd0
5
5
  SHA512:
6
- metadata.gz: 8ca1db0935515b6fe9d0f2dc5c27e8af36bb8cd4b652dc70d4f5194a27edd1cd4257144bd83b4b9c7fb4ae6dc333a37789021e852a1bc34cdf4000a61a27b86f
7
- data.tar.gz: f253083f04c93d8eff970002d9bcc02ce0ffb362167149371a6a5576dd4ed1ee9dbe2c760841695873487749cab40d121447fdf48f74d429b46be930322db683
6
+ metadata.gz: bbe57df347aafc5b82046f13a5e20d7c6deaf66cb67590540d122701d79bd516bb0b015b0b41f72d144e7c963f45e1ef7f67da61328aa819b8712c58d029b9af
7
+ data.tar.gz: c942042bb95db767107b91aa136c61cdd8408c11cfef763038abab07e0bdc515ee032cf8979e27040988aca8ce0b879214fae367511b1cf36a48a0f5e4415071
data/Gemfile CHANGED
@@ -2,6 +2,4 @@ source "https://rubygems.org"
2
2
  ruby "2.6.5"
3
3
 
4
4
  # Specify your gem's dependencies in librariesio-url-parser.gemspec
5
- gemspec
6
-
7
- gem "pry", "~> 0.14.1", :group => :development
5
+ gemspec
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- librariesio-url-parser (1.0.1)
4
+ librariesio-url-parser (1.0.4)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -45,6 +45,7 @@ URLParser.try_all("git@bitbucket.org:tildeslash/monit.git") #=> "https://bitbuck
45
45
  - GitHub
46
46
  - GitLab
47
47
  - Bitbucket
48
+ - Apache SVN
48
49
 
49
50
  ## Development
50
51
 
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "librariesio-url-parser"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "pry"
14
+ Pry.start
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+ class AndroidGooglesourceUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://android.googlesource.com'
7
+ end
8
+
9
+ def tlds
10
+ %w(com)
11
+ end
12
+
13
+ def domain
14
+ 'android.googlesource'
15
+ end
16
+
17
+ def remove_domain
18
+ url.sub!(/(android\.googlesource\.com)+?(:|\/)?/i, '')
19
+ end
20
+
21
+ def remove_extra_segments
22
+ self.url = url.split('/').reject{ |s| s.strip.empty? }
23
+ end
24
+
25
+ def format_url
26
+ # if this is an Array then the url has gone through all the clean up steps
27
+ #
28
+ # if this is just a string then the url was not cleaned up and I have no idea how to format it
29
+ return nil unless url.is_a?(Array) && url.length.positive?
30
+
31
+ # the links that code into specific branches of the repository start with + in the path
32
+ # for example https://android.googlesource.com/device/amlogic/yukawa/ is the top level repository
33
+ # but looking at the master branch is the url
34
+ # https://android.googlesource.com/device/amlogic/yukawa/+/refs/heads/master
35
+ # and the same applies for tags
36
+ # https://android.googlesource.com/device/amlogic/yukawa/+/refs/tags/android-12.1.0_r16
37
+
38
+ self.url = url.join("/").split("+").first.chomp("/")
39
+ end
40
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+ class ApacheGitWipUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git-wip-us.apache.org/repos/asf'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git-wip-us.apache'
15
+ end
16
+
17
+ def remove_querystring
18
+ # it is common for the name to be passed in as a query parameter so we need to keep them in
19
+ # the url string for now and process them in later steps to pull the name out of the parameter
20
+ url
21
+ end
22
+
23
+ def remove_equals_sign
24
+ # we need to preserve the p=<some_name> query parameter
25
+ splits = url.split('=')
26
+ p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
27
+ if p_index
28
+ new_url = splits[0..p_index+1].join("=") if p_index
29
+ # remove separator characters present at the end of this string
30
+ # before the next parameter in the query parameter list
31
+ # ";"
32
+ new_url.gsub!(/[;,&].*/, '')
33
+
34
+ self.url = new_url
35
+ end
36
+ end
37
+
38
+ def domain_regex
39
+ # match only the repos/asf endpoint at the domain
40
+ "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
41
+ end
42
+
43
+ def remove_domain
44
+ url.sub!(/(git-wip-us\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
45
+ end
46
+
47
+ def remove_extra_segments
48
+ # by the time the URL gets here it should have been mostly pared down to the correct name
49
+ # however if the name was passed as a query parameter the ?p= is still at the front of the name
50
+ if url.is_a?(String) && url.start_with?("?p=")
51
+ self.url = url.split("=").last
52
+ end
53
+ end
54
+
55
+ def format_url
56
+ # ignore something if it comes in at as an Array at this point
57
+ url.is_a?(String) ? url : nil
58
+ end
59
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+ class ApacheGitboxUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://gitbox.apache.org/repos/asf'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'gitbox.apache'
15
+ end
16
+
17
+ def remove_querystring
18
+ # it is common for the name to be passed in as a query parameter so we need to keep them in
19
+ # the url string for now and process them in later steps to pull the name out of the parameter
20
+ url
21
+ end
22
+
23
+ def remove_equals_sign
24
+ # we need to preserve the p=<some_name> query parameter
25
+ splits = url.split('=')
26
+ p_index = splits.index{|s| s.end_with?("?p") || s.end_with?("&p")}
27
+ if p_index
28
+ new_url = splits[0..p_index+1].join("=") if p_index
29
+ # remove separator characters present at the end of this string
30
+ # before the next parameter in the query parameter list
31
+ # ";"
32
+ new_url.gsub!(/[;,&].*/, '')
33
+
34
+ self.url = new_url
35
+ end
36
+ end
37
+
38
+ def domain_regex
39
+ # match only the repos/asf endpoint at the domain
40
+ "#{domain.split("/").first}\.(#{tlds.join('|')})\/repos/asf"
41
+ end
42
+
43
+ def remove_domain
44
+ url.sub!(/(gitbox\.apache\.org\/(repos\/asf))+?(:|\/)?/i, '')
45
+ end
46
+
47
+ def remove_extra_segments
48
+ # by the time the URL gets here it should have been mostly pared down to the correct name
49
+ # however if the name was passed as a query parameter the ?p= is still at the front of the name
50
+ if url.is_a?(String) && url.start_with?("?p=")
51
+ self.url = url.split("=").last
52
+ end
53
+ end
54
+
55
+ def format_url
56
+ # ignore something if it comes in at as an Array at this point
57
+ url.is_a?(String) ? url : nil
58
+ end
59
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+ class ApacheSvnUrlParser < URLParser
3
+ SUBDIR_NAMES = %w[trunk tags branches].freeze
4
+ VALID_PATHS = %w[viewvc viewcvs\.cgi repos\/asf].freeze
5
+ private
6
+
7
+ def full_domain
8
+ 'https://svn.apache.org/viewvc'
9
+ end
10
+
11
+ def tlds
12
+ %w(org)
13
+ end
14
+
15
+ def domain
16
+ 'svn.apache'
17
+ end
18
+
19
+ def domain_regex
20
+ # match only the viewvc endpoint at the domain
21
+ "#{domain}\.(#{tlds.join('|')})\/(#{VALID_PATHS.join("|")})"
22
+ end
23
+
24
+ def remove_domain
25
+ # find the matches for any github domain characters in the url string
26
+ # and replace only the first match incase we find a repo with something like github.com as the name
27
+ url.sub!(/(apache\.org\/(viewvc|repos\/asf|viewcvs\.cgi))+?(:|\/)?/i, '')
28
+ end
29
+
30
+ def extractable_early?
31
+ false
32
+ end
33
+
34
+ def remove_extra_segments
35
+ # split the url by / and remove any empty sections
36
+ self.url = url.split('/').reject{ |s| s.strip.empty? }
37
+
38
+ # check to see if any repository subdirectories are included in the segments
39
+ # this parser is parsing SVN projects, so any common folders used for branching should trip this
40
+ # truncate the array of segments to stop once we hit a top level sub directory typically seen in SVN repos
41
+ # and return everything up to that point
42
+ #
43
+ # for example apache.org/viewvnc/myproject/subproject/tags/my-1.0.0-release should stop at myproject/subproject
44
+ # since the tags are just part of that repository
45
+ subdir_index = url.index{ |s| SUBDIR_NAMES.include?(s) }
46
+
47
+ # it looks like the maven/pom directory on the Apache SVN server has a bunch of repositories stored under tags
48
+ # in this special case, grab the directory name under the subdirectory
49
+ # it looks like this is most likely to be the first directory under tags/
50
+ in_maven_pom_dir = url[0..1].join("/").downcase == "maven/pom"
51
+
52
+ if in_maven_pom_dir
53
+ self.url = url[0..subdir_index+1] if subdir_index
54
+ else
55
+ self.url = url[0..subdir_index-1] if subdir_index
56
+ end
57
+ end
58
+
59
+ def format_url
60
+ # if this is an Array then the url has gone through all the clean up steps
61
+ #
62
+ # if this is just a string then the url was not cleaned up and I have no idea how to format it
63
+ return nil unless url.is_a?(Array) && url.length.positive?
64
+
65
+ url.join("/")
66
+ end
67
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+ class DrupalUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git.drupalcode.org/project'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git.drupalcode'
15
+ end
16
+
17
+ def remove_domain
18
+ # find the matches for any github domain characters in the url string
19
+ # and replace only the first match incase we find a repo with something like github.com as the name
20
+ url.sub!(/(drupalcode\.org\/project)+?(:|\/)?/i, '')
21
+ end
22
+
23
+ def format_url
24
+ # if this is an Array then the url has gone through all the clean up steps
25
+ return nil unless url.is_a?(Array) && url.length.positive?
26
+
27
+ url.join("/")
28
+ end
29
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+ class EclipseGitUrlParser < URLParser
3
+ private
4
+
5
+ def full_domain
6
+ 'https://git.eclipse.org/c'
7
+ end
8
+
9
+ def tlds
10
+ %w(org)
11
+ end
12
+
13
+ def domain
14
+ 'git.eclipse'
15
+ end
16
+
17
+ def remove_git_extension
18
+ # the repository names all end in .git on the website, so don't remove it here
19
+ nil
20
+ end
21
+
22
+ def remove_domain
23
+ url.sub!(/(eclipse\.org\/c)+?(:|\/)?/i, '')
24
+ end
25
+ end
@@ -4,7 +4,14 @@ require_relative "url_parser"
4
4
  require_relative "bitbucket_url_parser"
5
5
  require_relative "github_url_parser"
6
6
  require_relative "gitlab_url_parser"
7
+ require_relative "apache_svn_url_parser"
8
+ require_relative "apache_git_wip_url_parser"
9
+ require_relative "apache_gitbox_url_parser"
10
+ require_relative "drupal_url_parser"
11
+ require_relative "eclipse_git_url_parser"
12
+ require_relative "android_googlesource_url_parser"
13
+ require_relative "sourceforge_url_parser"
7
14
 
8
15
  module LibrariesioURLParser
9
- VERSION = "1.0.1"
16
+ VERSION = "1.0.4"
10
17
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+ class SourceforgeUrlParser < URLParser
3
+ PROJECT_PATHS = %w[projects p].freeze
4
+ private
5
+
6
+ def full_domain
7
+ 'https://sourceforge.net/projects'
8
+ end
9
+
10
+ def tlds
11
+ %w(net)
12
+ end
13
+
14
+ def domain
15
+ 'sourceforge'
16
+ end
17
+
18
+ def remove_domain
19
+ url.sub!(/(sourceforge\.net\/(#{PROJECT_PATHS.join("|")}))+?(:|\/)?/i, '')
20
+ end
21
+
22
+ def extractable_early?
23
+ false
24
+ end
25
+
26
+ def remove_extra_segments
27
+ self.url = url.split('/').reject{ |s| s.strip.empty? }.first
28
+ end
29
+
30
+ def format_url
31
+ # the URL at this point should have been reduced down to a single string for the project name
32
+ return nil unless url.is_a?(String)
33
+
34
+ url
35
+ end
36
+ end
data/lib/url_parser.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  class URLParser
3
4
  def self.parse(url)
4
5
  new(url).parse
@@ -28,21 +29,27 @@ class URLParser
28
29
  end
29
30
 
30
31
  def self.try_all(url)
31
- GithubURLParser.parse_to_full_url(url) ||
32
- GitlabURLParser.parse_to_full_url(url) ||
33
- BitbucketURLParser.parse_to_full_url(url)
32
+ # run through all the subclasses and try their parse method
33
+ # exit the reduce at the first non nil value and return that
34
+ descendants.reduce(nil) do |_, n|
35
+ r = n.parse_to_full_url(url)
36
+ break r if r
37
+ end
34
38
  end
35
39
 
36
40
  def parse_to_full_url
37
41
  path = parse
38
- return nil unless path.present?
42
+ return nil if path.nil? || path.empty?
43
+
39
44
  [full_domain, path].join('/')
40
45
  end
41
46
 
42
47
  def parse_to_full_user_url
43
48
  return nil unless parseable?
49
+
44
50
  path = clean_url
45
51
  return nil unless path.length == 1
52
+
46
53
  [full_domain, path].join('/')
47
54
  end
48
55
 
@@ -68,6 +75,7 @@ class URLParser
68
75
 
69
76
  def format_url
70
77
  return nil unless url.length == 2
78
+
71
79
  url.join('/')
72
80
  end
73
81
 
@@ -83,14 +91,6 @@ class URLParser
83
91
  raise NotImplementedError
84
92
  end
85
93
 
86
- def includes_domain?
87
- raise NotImplementedError
88
- end
89
-
90
- def extractable_early?
91
- raise NotImplementedError
92
- end
93
-
94
94
  def domain_regex
95
95
  "#{domain}\.(#{tlds.join('|')})"
96
96
  end
@@ -151,14 +151,31 @@ class URLParser
151
151
  end
152
152
 
153
153
  def remove_scheme
154
- url.gsub!(/(((git\+https|git|ssh|hg|svn|scm|http|https)+?:)+?)/i, '')
154
+ url.gsub!(/(((git\+https|git|ssh|hg|svn|scm|http|https)+?:)(\/\/)?)/i, '')
155
155
  end
156
156
 
157
157
  def remove_subdomain
158
- url.gsub!(/(www|ssh|raw|git|wiki)+?\./i, '')
158
+ url.gsub!(/(www|ssh|raw|git|wiki|svn)+?\./i, '')
159
159
  end
160
160
 
161
161
  def remove_whitespace
162
162
  url.gsub!(/\s/, '')
163
163
  end
164
+
165
+ # This computation is memoized because it is expensive. This prevents use cases which require using
166
+ # .try_all in a tight loop. However, if this class is required directly (without requiring any subparsers),
167
+ # this method will memoize an empty array. It is recommended to simply require librariesio-url-parser.rb directly.
168
+ # This is the default behavior when installing this gem.
169
+ private_class_method def self.descendants
170
+ @descendants ||=
171
+ begin
172
+ descendants = []
173
+ ObjectSpace.each_object(singleton_class) do |k|
174
+ next if k.singleton_class?
175
+
176
+ descendants.unshift k unless k == self
177
+ end
178
+ descendants
179
+ end
180
+ end
164
181
  end
@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
20
20
  spec.add_development_dependency "rake", "~> 12.0"
21
21
  spec.add_development_dependency "rspec", "~> 3.0"
22
22
  spec.add_development_dependency "rspec_junit_formatter", "~> 0.5"
23
+ spec.add_development_dependency "pry", "~> 0.14.1"
23
24
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: librariesio-url-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Pace
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-26 00:00:00.000000000 Z
11
+ date: 2022-08-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -52,7 +52,21 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.5'
55
- description:
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.14.1
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.14.1
69
+ description:
56
70
  email:
57
71
  - matt.pace@tidelift.com
58
72
  executables: []
@@ -65,10 +79,18 @@ files:
65
79
  - Gemfile.lock
66
80
  - README.md
67
81
  - Rakefile
82
+ - bin/console
83
+ - lib/android_googlesource_url_parser.rb
84
+ - lib/apache_git_wip_url_parser.rb
85
+ - lib/apache_gitbox_url_parser.rb
86
+ - lib/apache_svn_url_parser.rb
68
87
  - lib/bitbucket_url_parser.rb
88
+ - lib/drupal_url_parser.rb
89
+ - lib/eclipse_git_url_parser.rb
69
90
  - lib/github_url_parser.rb
70
91
  - lib/gitlab_url_parser.rb
71
92
  - lib/librariesio-url-parser.rb
93
+ - lib/sourceforge_url_parser.rb
72
94
  - lib/url_parser.rb
73
95
  - librariesio-url-parser.gemspec
74
96
  homepage: https://github.com/librariesio/librariesio-url-parser
@@ -76,7 +98,7 @@ licenses:
76
98
  - AGPL-3.0
77
99
  metadata:
78
100
  rubygems_mfa_required: 'true'
79
- post_install_message:
101
+ post_install_message:
80
102
  rdoc_options: []
81
103
  require_paths:
82
104
  - lib
@@ -91,8 +113,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
91
113
  - !ruby/object:Gem::Version
92
114
  version: '0'
93
115
  requirements: []
94
- rubygems_version: 3.0.3
95
- signing_key:
116
+ rubygems_version: 3.0.9
117
+ signing_key:
96
118
  specification_version: 4
97
119
  summary: Parse the URL for various repositories tracked by libraries.io
98
120
  test_files: []