url_parser 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,206 @@
1
+ require 'forwardable'
2
+ require 'resolv'
3
+
4
+ module UrlParser
5
+ class URI
6
+ extend Forwardable
7
+
8
+ LOCALHOST_REGEXP = /(\A|\.)localhost\z/
9
+
10
+ COMPONENTS = [
11
+ :scheme, # Top level URI naming structure / protocol.
12
+ :username, # Username portion of the userinfo.
13
+ :user, # Alias for #username.
14
+ :password, # Password portion of the userinfo.
15
+ :userinfo, # URI username and password for authentication.
16
+ :hostname, # Fully qualified domain name or IP address.
17
+ :naked_hostname, # Hostname without any ww? prefix.
18
+ :port, # Port number.
19
+ :host, # Hostname and port.
20
+ :www, # The ww? portion of the subdomain.
21
+ :tld, # Returns the top level domain portion, aka the extension.
22
+ :top_level_domain, # Alias for #tld.
23
+ :extension, # Alias for #tld.
24
+ :sld, # Returns the second level domain portion, aka the domain part.
25
+ :second_level_domain, # Alias for #sld.
26
+ :domain_name, # Alias for #sld.
27
+ :trd, # Returns the third level domain portion, aka the subdomain part.
28
+ :third_level_domain, # Alias for #trd.
29
+ :subdomains, # Alias for #trd.
30
+ :naked_trd, # Any non-ww? subdomains.
31
+ :naked_subdomain, # Alias for #naked_trd.
32
+ :domain, # The domain name with the tld.
33
+ :subdomain, # All subdomains, include ww?.
34
+ :origin, # Scheme and host.
35
+ :authority, # Userinfo and host.
36
+ :site, # Scheme, userinfo, and host.
37
+ :path, # Directory and segment.
38
+ :segment, # Last portion of the path.
39
+ :directory, # Any directories following the site within the URI.
40
+ :filename, # Segment if a file extension is present.
41
+ :suffix, # The file extension of the filename.
42
+ :query, # Params and values as a string.
43
+ :query_values, # A hash of params and values.
44
+ :fragment, # Fragment identifier.
45
+ :resource, # Path, query, and fragment.
46
+ :location # Directory and resource - everything after the site.
47
+ ]
48
+
49
+ def_delegators :@model, *COMPONENTS
50
+
51
+ def_delegator :@model, :parsed_domain
52
+ def_delegator :parsed_domain, :labels
53
+
54
+ attr_reader :input, :uri, :options
55
+
56
+ def initialize(uri, options = {}, &blk)
57
+ @input = uri
58
+ @options = set_options(options, &blk)
59
+ @block = blk ? blk : block_builder
60
+ @uri = UrlParser::Parser.call(@input, @options, &@block)
61
+ @model = UrlParser::Model.new(@uri)
62
+ end
63
+
64
+ def unescaped?
65
+ !!options[:unescape]
66
+ end
67
+
68
+ def parsed?
69
+ true
70
+ end
71
+
72
+ def unembedded?
73
+ !!options[:unembed]
74
+ end
75
+
76
+ def canonicalized?
77
+ !!options[:canonicalize]
78
+ end
79
+
80
+ def normalized?
81
+ !!options[:normalize]
82
+ end
83
+
84
+ def cleaned?
85
+ !!options[:clean] || (
86
+ unescaped? &&
87
+ parsed? &&
88
+ unembedded? &&
89
+ canonicalized? &&
90
+ normalized?
91
+ )
92
+ end
93
+
94
+ def clean
95
+ if cleaned?
96
+ raw
97
+ else
98
+ UrlParser::Parser.call(@input, raw: true) { |uri| uri.clean! }
99
+ end
100
+ end
101
+
102
+ # Cleans and converts into a naked hostname
103
+ #
104
+ def canonical
105
+ opts = { raw: true }
106
+ curi = naked_hostname + location
107
+
108
+ UrlParser::Parser.call(curi, opts) do |uri|
109
+ uri.clean!
110
+ end.sub(/\A[a-z]+:\/\//i, '//')
111
+ end
112
+
113
+ def clean?
114
+ cleaned? || self.to_s == clean
115
+ end
116
+
117
+ def relative?
118
+ uri.relative?
119
+ end
120
+
121
+ def absolute?
122
+ uri.absolute?
123
+ end
124
+
125
+ def localhost?
126
+ !!(hostname.to_s[LOCALHOST_REGEXP])
127
+ end
128
+
129
+ def ipv4
130
+ hostname.to_s[Resolv::IPv4::Regex]
131
+ end
132
+
133
+ def ipv4?
134
+ !!ipv4
135
+ end
136
+
137
+ def ipv6
138
+ host.to_s[Resolv::IPv6::Regex]
139
+ end
140
+
141
+ def ipv6?
142
+ !!ipv6
143
+ end
144
+
145
+ def ip_address?
146
+ ipv4? || ipv6?
147
+ end
148
+
149
+ def naked?
150
+ !localhost? && www.nil?
151
+ end
152
+
153
+ def raw
154
+ uri.to_s
155
+ end
156
+ alias_method :to_s, :raw
157
+
158
+ def sha1
159
+ Digest::SHA1.hexdigest(raw)
160
+ end
161
+ alias_method :hash, :sha1
162
+
163
+ def ==(uri)
164
+ clean == self.class.new(uri, clean: true).clean
165
+ end
166
+
167
+ def =~(uri)
168
+ canonical == self.class.new(uri, clean: true).canonical
169
+ end
170
+
171
+ def +(uri)
172
+ self.class.new(uri.to_s, options.merge({ base_uri: self.to_s}), &@block)
173
+ end
174
+ alias_method :join, :+
175
+
176
+ def valid?
177
+ return false if input.nil? || relative?
178
+ return true if ip_address? || localhost?
179
+ parsed_domain.valid?
180
+ end
181
+
182
+ private
183
+
184
+ def set_options(opts = {}, &blk)
185
+ UrlParser::OptionSetter
186
+ .new(opts, &blk)
187
+ .to_hash
188
+ .merge(raw: false)
189
+ end
190
+
191
+ def block_builder
192
+ proc do |uri|
193
+ if cleaned?
194
+ uri.clean!
195
+ else
196
+ uri.unescape! if unescaped?
197
+ uri.parse! if parsed?
198
+ uri.unembed! if unembedded?
199
+ uri.canonicalize! if canonicalized?
200
+ uri.normalize! if normalized?
201
+ end
202
+ end
203
+ end
204
+
205
+ end
206
+ end
@@ -1,3 +1,3 @@
1
1
  module UrlParser
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -1,4 +1,10 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+
3
+ require 'codeclimate-test-reporter'
4
+ CodeClimate::TestReporter.start
5
+
1
6
  require "rspec"
7
+
2
8
  begin
3
9
  require "pry"
4
10
  rescue LoadError; end
@@ -7,19 +13,90 @@ require "url_parser"
7
13
 
8
14
  # This file was generated by the `rspec --init` command. Conventionally, all
9
15
  # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
10
- # Require this file using `require "spec_helper"` to ensure that it is only
11
- # loaded once.
16
+ # The generated `.rspec` file contains `--require spec_helper` which will cause this
17
+ # file to always be loaded, without a need to explicitly require it in any files.
18
+ #
19
+ # Given that it is always loaded, you are encouraged to keep this file as
20
+ # light-weight as possible. Requiring heavyweight dependencies from this file
21
+ # will add to the boot time of your test suite on EVERY test run, even for an
22
+ # individual file that may not need all of that loaded. Instead, consider making
23
+ # a separate helper file that requires the additional dependencies and performs
24
+ # the additional setup, and require it from the spec files that actually need it.
25
+ #
26
+ # The `.rspec` file also contains a few flags that are not defaults but that
27
+ # users commonly want.
12
28
  #
13
29
  # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
14
30
  RSpec.configure do |config|
15
- config.run_all_when_everything_filtered = true
31
+
32
+ # rspec-expectations config goes here. You can use an alternate
33
+ # assertion/expectation library such as wrong or the stdlib/minitest
34
+ # assertions if you prefer.
35
+ config.expect_with :rspec do |expectations|
36
+ # This option will default to `true` in RSpec 4. It makes the `description`
37
+ # and `failure_message` of custom matchers include text for helper methods
38
+ # defined using `chain`, e.g.:
39
+ # be_bigger_than(2).and_smaller_than(4).description
40
+ # # => "be bigger than 2 and smaller than 4"
41
+ # ...rather than:
42
+ # # => "be bigger than 2"
43
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
44
+ end
45
+
46
+ # rspec-mocks config goes here. You can use an alternate test double
47
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
48
+ config.mock_with :rspec do |mocks|
49
+ # Prevents you from mocking or stubbing a method that does not exist on
50
+ # a real object. This is generally recommended, and will default to
51
+ # `true` in RSpec 4.
52
+ mocks.verify_partial_doubles = true
53
+ end
54
+
55
+ # These two settings work together to allow you to limit a spec run
56
+ # to individual examples or groups you care about by tagging them with
57
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
58
+ # get run.
16
59
  config.filter_run :focus
17
- config.raise_errors_for_deprecations!
60
+ config.run_all_when_everything_filtered = true
61
+
62
+ # Limits the available syntax to the non-monkey patched syntax that is recommended.
63
+ # For more details, see:
64
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
65
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
66
+ # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
67
+ config.disable_monkey_patching!
68
+
69
+ # This setting enables warnings. It's recommended, but in some cases may
70
+ # be too noisy due to issues in dependencies.
71
+ # config.warnings = true
72
+
73
+ # Many RSpec users commonly either run the entire suite or an individual
74
+ # file, and it's useful to allow more verbose output when running an
75
+ # individual spec file.
76
+ # if config.files_to_run.one?
77
+ # Use the documentation formatter for detailed output,
78
+ # unless a formatter has already been configured
79
+ # (e.g. via a command-line flag).
80
+ # config.default_formatter = 'doc'
81
+ # end
82
+
83
+ # Print the 10 slowest examples and example groups at the
84
+ # end of the spec run, to help surface which specs are running
85
+ # particularly slow.
86
+ # config.profile_examples = 10
87
+
18
88
  # Run specs in random order to surface order dependencies. If you find an
19
89
  # order dependency and want to debug it, you can fix the order by providing
20
90
  # the seed, which is printed after each run.
21
91
  # --seed 1234
22
- config.order = 'random'
23
- end
92
+ config.order = :random
24
93
 
94
+ # Seed global randomization in this process using the `--seed` CLI option.
95
+ # Setting this allows you to use `--seed` to deterministically reproduce
96
+ # test failures related to randomization by passing the same `--seed` value
97
+ # as the one that triggered the failure.
98
+ Kernel.srand config.seed
99
+
100
+ end
25
101
 
102
+ Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each {|f| require f }
File without changes
@@ -0,0 +1,7 @@
1
+ RSpec.configure do |c|
2
+ c.around(:each, :disable_raise_error_warning) do |example|
3
+ RSpec::Expectations.configuration.warn_about_potential_false_positives = false
4
+ example.call
5
+ RSpec::Expectations.configuration.warn_about_potential_false_positives = true
6
+ end
7
+ end
@@ -0,0 +1,163 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe UrlParser::Domain do
4
+
5
+ context ".new" do
6
+
7
+ it "downcases the input" do
8
+ instance = described_class.new('EXAMPLE.COM')
9
+ expect(instance.original).to eq 'example.com'
10
+ end
11
+
12
+ it "removes the root label from absolute domains" do
13
+ instance = described_class.new('example.com.')
14
+ expect(instance.original).to eq 'example.com'
15
+ end
16
+
17
+ it "sets #original as the input string" do
18
+ instance = described_class.new("💩.la")
19
+ expect(instance.original).to eq "💩.la"
20
+ end
21
+
22
+ it "sets the name as a string containing only ASCII characters" do
23
+ instance = described_class.new("💩.la")
24
+ expect(instance.name).to eq "xn--ls8h.la"
25
+ end
26
+
27
+ end
28
+
29
+ context "#labels" do
30
+
31
+ it "returns an array of domain parts" do
32
+ instance = described_class.new('www.my.example.com')
33
+ expect(instance.labels).to eq(["com", "example", "my", "www"])
34
+ end
35
+
36
+ end
37
+
38
+ context "#suffix" do
39
+
40
+ it "when domain is valid, returns a PublicSuffix::Domain" do
41
+ instance = described_class.new('my.example.com')
42
+ expect(instance.suffix).to be_a PublicSuffix::Domain
43
+ end
44
+
45
+ it "with a PublicSuffix::Domain, a call to #to_s returns the domain" do
46
+ instance = described_class.new('my.example.com')
47
+ expect(instance.suffix.to_s).to eq 'my.example.com'
48
+ end
49
+
50
+ it "when domain is invalid, returns a OpenStruct" do
51
+ instance = described_class.new('//')
52
+ expect(instance.suffix).to be_a OpenStruct
53
+ end
54
+
55
+ it "when domain is invalid, a call to #to_s returns an empty string" do
56
+ instance = described_class.new('//')
57
+ expect(instance.suffix.to_s).to eq ''
58
+ end
59
+
60
+ end
61
+
62
+ context "#tld" do
63
+
64
+ it "when domain is valid, returns the top level domain" do
65
+ instance = described_class.new('www.my.example.com')
66
+ expect(instance.tld).to eq 'com'
67
+ end
68
+
69
+ it "when domain is invalid, returns nil" do
70
+ instance = described_class.new('//')
71
+ expect(instance.tld).to be_nil
72
+ end
73
+
74
+ end
75
+
76
+ context "#sld" do
77
+
78
+ it "when domain is valid, returns the second level domain" do
79
+ instance = described_class.new('www.my.example.com')
80
+ expect(instance.sld).to eq 'example'
81
+ end
82
+
83
+ it "when domain is invalid, returns nil" do
84
+ instance = described_class.new('//')
85
+ expect(instance.sld).to be_nil
86
+ end
87
+
88
+ end
89
+
90
+ context "#trd" do
91
+
92
+ it "when domain is valid, returns the third level domain" do
93
+ instance = described_class.new('www.my.example.com')
94
+ expect(instance.trd).to eq 'www.my'
95
+ end
96
+
97
+ it "when domain is invalid, returns nil" do
98
+ instance = described_class.new('//')
99
+ expect(instance.trd).to be_nil
100
+ end
101
+
102
+ end
103
+
104
+ context "#valid?" do
105
+
106
+ it "does not fail on an empty string" do
107
+ instance = described_class.new("")
108
+ expect(instance).not_to be_valid
109
+ end
110
+
111
+ it "is false when containing invalid characters" do
112
+ instance = described_class.new('my&example.com')
113
+ expect(instance).not_to be_valid
114
+ expect(instance.errors).to include "contains invalid characters"
115
+ end
116
+
117
+ it "is true with a valid suffix" do
118
+ instance = described_class.new('example.co.uk')
119
+ expect(instance).to be_valid
120
+ end
121
+
122
+ it "is false with an invalid suffix" do
123
+ instance = described_class.new('//')
124
+ expect(instance).not_to be_valid
125
+ expect(instance.errors).to include "'//' is not a valid domain"
126
+ end
127
+
128
+ it "is true with 127 labels or less" do
129
+ instance = described_class.new('.'*126+'com')
130
+ expect(instance).to be_valid
131
+ end
132
+
133
+ it "is false when exceeding 127 labels" do
134
+ instance = described_class.new('.'*127+'com')
135
+ expect(instance).not_to be_valid
136
+ expect(instance.errors).to include "exceeds 127 labels"
137
+ end
138
+
139
+ it "is true when no labels are greater than 63 characters" do
140
+ instance = described_class.new('a'*63+'.com')
141
+ expect(instance).to be_valid
142
+ end
143
+
144
+ it "is false with labels greater than 63 characters" do
145
+ instance = described_class.new('a'*64+'.com')
146
+ expect(instance).not_to be_valid
147
+ expect(instance.errors).to include "exceeds maximum label length of 63 characters"
148
+ end
149
+
150
+ it "is true with 253 ASCII characters or less" do
151
+ instance = described_class.new('a'*49+'.'+'b'*49+'.'+'c'*49+'.'+'d'*49+'.'+'e'*49+'.com')
152
+ expect(instance).to be_valid
153
+ end
154
+
155
+ it "is true with 253 ASCII characters or less" do
156
+ instance = described_class.new('a'*49+'.'+'b'*49+'.'+'c'*49+'.'+'d'*49+'.'+'e'*49+'.aero')
157
+ expect(instance).not_to be_valid
158
+ expect(instance.errors).to include "exceeds 253 ASCII characters"
159
+ end
160
+
161
+ end
162
+
163
+ end