url_parser 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,206 @@
1
+ require 'forwardable'
2
+ require 'resolv'
3
+
4
+ module UrlParser
5
+ class URI
6
+ extend Forwardable
7
+
8
+ LOCALHOST_REGEXP = /(\A|\.)localhost\z/
9
+
10
+ COMPONENTS = [
11
+ :scheme, # Top level URI naming structure / protocol.
12
+ :username, # Username portion of the userinfo.
13
+ :user, # Alias for #username.
14
+ :password, # Password portion of the userinfo.
15
+ :userinfo, # URI username and password for authentication.
16
+ :hostname, # Fully qualified domain name or IP address.
17
+ :naked_hostname, # Hostname without any ww? prefix.
18
+ :port, # Port number.
19
+ :host, # Hostname and port.
20
+ :www, # The ww? portion of the subdomain.
21
+ :tld, # Returns the top level domain portion, aka the extension.
22
+ :top_level_domain, # Alias for #tld.
23
+ :extension, # Alias for #tld.
24
+ :sld, # Returns the second level domain portion, aka the domain part.
25
+ :second_level_domain, # Alias for #sld.
26
+ :domain_name, # Alias for #sld.
27
+ :trd, # Returns the third level domain portion, aka the subdomain part.
28
+ :third_level_domain, # Alias for #trd.
29
+ :subdomains, # Alias for #trd.
30
+ :naked_trd, # Any non-ww? subdomains.
31
+ :naked_subdomain, # Alias for #naked_trd.
32
+ :domain, # The domain name with the tld.
33
+ :subdomain, # All subdomains, include ww?.
34
+ :origin, # Scheme and host.
35
+ :authority, # Userinfo and host.
36
+ :site, # Scheme, userinfo, and host.
37
+ :path, # Directory and segment.
38
+ :segment, # Last portion of the path.
39
+ :directory, # Any directories following the site within the URI.
40
+ :filename, # Segment if a file extension is present.
41
+ :suffix, # The file extension of the filename.
42
+ :query, # Params and values as a string.
43
+ :query_values, # A hash of params and values.
44
+ :fragment, # Fragment identifier.
45
+ :resource, # Path, query, and fragment.
46
+ :location # Directory and resource - everything after the site.
47
+ ]
48
+
49
+ def_delegators :@model, *COMPONENTS
50
+
51
+ def_delegator :@model, :parsed_domain
52
+ def_delegator :parsed_domain, :labels
53
+
54
+ attr_reader :input, :uri, :options
55
+
56
+ def initialize(uri, options = {}, &blk)
57
+ @input = uri
58
+ @options = set_options(options, &blk)
59
+ @block = blk ? blk : block_builder
60
+ @uri = UrlParser::Parser.call(@input, @options, &@block)
61
+ @model = UrlParser::Model.new(@uri)
62
+ end
63
+
64
+ def unescaped?
65
+ !!options[:unescape]
66
+ end
67
+
68
+ def parsed?
69
+ true
70
+ end
71
+
72
+ def unembedded?
73
+ !!options[:unembed]
74
+ end
75
+
76
+ def canonicalized?
77
+ !!options[:canonicalize]
78
+ end
79
+
80
+ def normalized?
81
+ !!options[:normalize]
82
+ end
83
+
84
+ def cleaned?
85
+ !!options[:clean] || (
86
+ unescaped? &&
87
+ parsed? &&
88
+ unembedded? &&
89
+ canonicalized? &&
90
+ normalized?
91
+ )
92
+ end
93
+
94
+ def clean
95
+ if cleaned?
96
+ raw
97
+ else
98
+ UrlParser::Parser.call(@input, raw: true) { |uri| uri.clean! }
99
+ end
100
+ end
101
+
102
+ # Cleans and converts into a naked hostname
103
+ #
104
+ def canonical
105
+ opts = { raw: true }
106
+ curi = naked_hostname + location
107
+
108
+ UrlParser::Parser.call(curi, opts) do |uri|
109
+ uri.clean!
110
+ end.sub(/\A[a-z]+:\/\//i, '//')
111
+ end
112
+
113
+ def clean?
114
+ cleaned? || self.to_s == clean
115
+ end
116
+
117
+ def relative?
118
+ uri.relative?
119
+ end
120
+
121
+ def absolute?
122
+ uri.absolute?
123
+ end
124
+
125
+ def localhost?
126
+ !!(hostname.to_s[LOCALHOST_REGEXP])
127
+ end
128
+
129
+ def ipv4
130
+ hostname.to_s[Resolv::IPv4::Regex]
131
+ end
132
+
133
+ def ipv4?
134
+ !!ipv4
135
+ end
136
+
137
+ def ipv6
138
+ host.to_s[Resolv::IPv6::Regex]
139
+ end
140
+
141
+ def ipv6?
142
+ !!ipv6
143
+ end
144
+
145
+ def ip_address?
146
+ ipv4? || ipv6?
147
+ end
148
+
149
+ def naked?
150
+ !localhost? && www.nil?
151
+ end
152
+
153
+ def raw
154
+ uri.to_s
155
+ end
156
+ alias_method :to_s, :raw
157
+
158
+ def sha1
159
+ Digest::SHA1.hexdigest(raw)
160
+ end
161
+ alias_method :hash, :sha1
162
+
163
+ def ==(uri)
164
+ clean == self.class.new(uri, clean: true).clean
165
+ end
166
+
167
+ def =~(uri)
168
+ canonical == self.class.new(uri, clean: true).canonical
169
+ end
170
+
171
+ def +(uri)
172
+ self.class.new(uri.to_s, options.merge({ base_uri: self.to_s}), &@block)
173
+ end
174
+ alias_method :join, :+
175
+
176
+ def valid?
177
+ return false if input.nil? || relative?
178
+ return true if ip_address? || localhost?
179
+ parsed_domain.valid?
180
+ end
181
+
182
+ private
183
+
184
+ def set_options(opts = {}, &blk)
185
+ UrlParser::OptionSetter
186
+ .new(opts, &blk)
187
+ .to_hash
188
+ .merge(raw: false)
189
+ end
190
+
191
+ def block_builder
192
+ proc do |uri|
193
+ if cleaned?
194
+ uri.clean!
195
+ else
196
+ uri.unescape! if unescaped?
197
+ uri.parse! if parsed?
198
+ uri.unembed! if unembedded?
199
+ uri.canonicalize! if canonicalized?
200
+ uri.normalize! if normalized?
201
+ end
202
+ end
203
+ end
204
+
205
+ end
206
+ end
@@ -1,3 +1,3 @@
1
1
  module UrlParser
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -1,4 +1,10 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+
3
+ require 'codeclimate-test-reporter'
4
+ CodeClimate::TestReporter.start
5
+
1
6
  require "rspec"
7
+
2
8
  begin
3
9
  require "pry"
4
10
  rescue LoadError; end
@@ -7,19 +13,90 @@ require "url_parser"
7
13
 
8
14
  # This file was generated by the `rspec --init` command. Conventionally, all
9
15
  # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
10
- # Require this file using `require "spec_helper"` to ensure that it is only
11
- # loaded once.
16
+ # The generated `.rspec` file contains `--require spec_helper` which will cause this
17
+ # file to always be loaded, without a need to explicitly require it in any files.
18
+ #
19
+ # Given that it is always loaded, you are encouraged to keep this file as
20
+ # light-weight as possible. Requiring heavyweight dependencies from this file
21
+ # will add to the boot time of your test suite on EVERY test run, even for an
22
+ # individual file that may not need all of that loaded. Instead, consider making
23
+ # a separate helper file that requires the additional dependencies and performs
24
+ # the additional setup, and require it from the spec files that actually need it.
25
+ #
26
+ # The `.rspec` file also contains a few flags that are not defaults but that
27
+ # users commonly want.
12
28
  #
13
29
  # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
14
30
  RSpec.configure do |config|
15
- config.run_all_when_everything_filtered = true
31
+
32
+ # rspec-expectations config goes here. You can use an alternate
33
+ # assertion/expectation library such as wrong or the stdlib/minitest
34
+ # assertions if you prefer.
35
+ config.expect_with :rspec do |expectations|
36
+ # This option will default to `true` in RSpec 4. It makes the `description`
37
+ # and `failure_message` of custom matchers include text for helper methods
38
+ # defined using `chain`, e.g.:
39
+ # be_bigger_than(2).and_smaller_than(4).description
40
+ # # => "be bigger than 2 and smaller than 4"
41
+ # ...rather than:
42
+ # # => "be bigger than 2"
43
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
44
+ end
45
+
46
+ # rspec-mocks config goes here. You can use an alternate test double
47
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
48
+ config.mock_with :rspec do |mocks|
49
+ # Prevents you from mocking or stubbing a method that does not exist on
50
+ # a real object. This is generally recommended, and will default to
51
+ # `true` in RSpec 4.
52
+ mocks.verify_partial_doubles = true
53
+ end
54
+
55
+ # These two settings work together to allow you to limit a spec run
56
+ # to individual examples or groups you care about by tagging them with
57
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
58
+ # get run.
16
59
  config.filter_run :focus
17
- config.raise_errors_for_deprecations!
60
+ config.run_all_when_everything_filtered = true
61
+
62
+ # Limits the available syntax to the non-monkey patched syntax that is recommended.
63
+ # For more details, see:
64
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
65
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
66
+ # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
67
+ config.disable_monkey_patching!
68
+
69
+ # This setting enables warnings. It's recommended, but in some cases may
70
+ # be too noisy due to issues in dependencies.
71
+ # config.warnings = true
72
+
73
+ # Many RSpec users commonly either run the entire suite or an individual
74
+ # file, and it's useful to allow more verbose output when running an
75
+ # individual spec file.
76
+ # if config.files_to_run.one?
77
+ # Use the documentation formatter for detailed output,
78
+ # unless a formatter has already been configured
79
+ # (e.g. via a command-line flag).
80
+ # config.default_formatter = 'doc'
81
+ # end
82
+
83
+ # Print the 10 slowest examples and example groups at the
84
+ # end of the spec run, to help surface which specs are running
85
+ # particularly slow.
86
+ # config.profile_examples = 10
87
+
18
88
  # Run specs in random order to surface order dependencies. If you find an
19
89
  # order dependency and want to debug it, you can fix the order by providing
20
90
  # the seed, which is printed after each run.
21
91
  # --seed 1234
22
- config.order = 'random'
23
- end
92
+ config.order = :random
24
93
 
94
+ # Seed global randomization in this process using the `--seed` CLI option.
95
+ # Setting this allows you to use `--seed` to deterministically reproduce
96
+ # test failures related to randomization by passing the same `--seed` value
97
+ # as the one that triggered the failure.
98
+ Kernel.srand config.seed
99
+
100
+ end
25
101
 
102
+ Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each {|f| require f }
File without changes
@@ -0,0 +1,7 @@
1
+ RSpec.configure do |c|
2
+ c.around(:each, :disable_raise_error_warning) do |example|
3
+ RSpec::Expectations.configuration.warn_about_potential_false_positives = false
4
+ example.call
5
+ RSpec::Expectations.configuration.warn_about_potential_false_positives = true
6
+ end
7
+ end
@@ -0,0 +1,163 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe UrlParser::Domain do
4
+
5
+ context ".new" do
6
+
7
+ it "downcases the input" do
8
+ instance = described_class.new('EXAMPLE.COM')
9
+ expect(instance.original).to eq 'example.com'
10
+ end
11
+
12
+ it "removes the root label from absolute domains" do
13
+ instance = described_class.new('example.com.')
14
+ expect(instance.original).to eq 'example.com'
15
+ end
16
+
17
+ it "sets #original as the input string" do
18
+ instance = described_class.new("💩.la")
19
+ expect(instance.original).to eq "💩.la"
20
+ end
21
+
22
+ it "sets the name as a string containing only ASCII characters" do
23
+ instance = described_class.new("💩.la")
24
+ expect(instance.name).to eq "xn--ls8h.la"
25
+ end
26
+
27
+ end
28
+
29
+ context "#labels" do
30
+
31
+ it "returns an array of domain parts" do
32
+ instance = described_class.new('www.my.example.com')
33
+ expect(instance.labels).to eq(["com", "example", "my", "www"])
34
+ end
35
+
36
+ end
37
+
38
+ context "#suffix" do
39
+
40
+ it "when domain is valid, returns a PublicSuffix::Domain" do
41
+ instance = described_class.new('my.example.com')
42
+ expect(instance.suffix).to be_a PublicSuffix::Domain
43
+ end
44
+
45
+ it "with a PublicSuffix::Domain, a call to #to_s returns the domain" do
46
+ instance = described_class.new('my.example.com')
47
+ expect(instance.suffix.to_s).to eq 'my.example.com'
48
+ end
49
+
50
+ it "when domain is invalid, returns a OpenStruct" do
51
+ instance = described_class.new('//')
52
+ expect(instance.suffix).to be_a OpenStruct
53
+ end
54
+
55
+ it "when domain is invalid, a call to #to_s returns an empty string" do
56
+ instance = described_class.new('//')
57
+ expect(instance.suffix.to_s).to eq ''
58
+ end
59
+
60
+ end
61
+
62
+ context "#tld" do
63
+
64
+ it "when domain is valid, returns the top level domain" do
65
+ instance = described_class.new('www.my.example.com')
66
+ expect(instance.tld).to eq 'com'
67
+ end
68
+
69
+ it "when domain is invalid, returns nil" do
70
+ instance = described_class.new('//')
71
+ expect(instance.tld).to be_nil
72
+ end
73
+
74
+ end
75
+
76
+ context "#sld" do
77
+
78
+ it "when domain is valid, returns the second level domain" do
79
+ instance = described_class.new('www.my.example.com')
80
+ expect(instance.sld).to eq 'example'
81
+ end
82
+
83
+ it "when domain is invalid, returns nil" do
84
+ instance = described_class.new('//')
85
+ expect(instance.sld).to be_nil
86
+ end
87
+
88
+ end
89
+
90
+ context "#trd" do
91
+
92
+ it "when domain is valid, returns the third level domain" do
93
+ instance = described_class.new('www.my.example.com')
94
+ expect(instance.trd).to eq 'www.my'
95
+ end
96
+
97
+ it "when domain is invalid, returns nil" do
98
+ instance = described_class.new('//')
99
+ expect(instance.trd).to be_nil
100
+ end
101
+
102
+ end
103
+
104
+ context "#valid?" do
105
+
106
+ it "does not fail on an empty string" do
107
+ instance = described_class.new("")
108
+ expect(instance).not_to be_valid
109
+ end
110
+
111
+ it "is false when containing invalid characters" do
112
+ instance = described_class.new('my&example.com')
113
+ expect(instance).not_to be_valid
114
+ expect(instance.errors).to include "contains invalid characters"
115
+ end
116
+
117
+ it "is true with a valid suffix" do
118
+ instance = described_class.new('example.co.uk')
119
+ expect(instance).to be_valid
120
+ end
121
+
122
+ it "is false with an invalid suffix" do
123
+ instance = described_class.new('//')
124
+ expect(instance).not_to be_valid
125
+ expect(instance.errors).to include "'//' is not a valid domain"
126
+ end
127
+
128
+ it "is true with 127 labels or less" do
129
+ instance = described_class.new('.'*126+'com')
130
+ expect(instance).to be_valid
131
+ end
132
+
133
+ it "is false when exceeding 127 labels" do
134
+ instance = described_class.new('.'*127+'com')
135
+ expect(instance).not_to be_valid
136
+ expect(instance.errors).to include "exceeds 127 labels"
137
+ end
138
+
139
+ it "is true when no labels are greater than 63 characters" do
140
+ instance = described_class.new('a'*63+'.com')
141
+ expect(instance).to be_valid
142
+ end
143
+
144
+ it "is false with labels greater than 63 characters" do
145
+ instance = described_class.new('a'*64+'.com')
146
+ expect(instance).not_to be_valid
147
+ expect(instance.errors).to include "exceeds maximum label length of 63 characters"
148
+ end
149
+
150
+ it "is true with 253 ASCII characters or less" do
151
+ instance = described_class.new('a'*49+'.'+'b'*49+'.'+'c'*49+'.'+'d'*49+'.'+'e'*49+'.com')
152
+ expect(instance).to be_valid
153
+ end
154
+
155
+ it "is true with 253 ASCII characters or less" do
156
+ instance = described_class.new('a'*49+'.'+'b'*49+'.'+'c'*49+'.'+'d'*49+'.'+'e'*49+'.aero')
157
+ expect(instance).not_to be_valid
158
+ expect(instance.errors).to include "exceeds 253 ASCII characters"
159
+ end
160
+
161
+ end
162
+
163
+ end