wgit 0.10.8 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,193 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Wgit
4
+ # The RobotsParser class handles parsing and processing of a web servers
5
+ # robots.txt file.
6
+ class RobotsParser
7
+ include Wgit::Assertable
8
+
9
+ # Key representing the start of a comment.
10
+ KEY_COMMENT = "#"
11
+ # Key value separator used in robots.txt files.
12
+ KEY_SEPARATOR = ":"
13
+ # Key representing a user agent.
14
+ KEY_USER_AGENT = "User-agent"
15
+ # Key representing an allow URL rule.
16
+ KEY_ALLOW = "Allow"
17
+ # Key representing a disallow URL rule.
18
+ KEY_DISALLOW = "Disallow"
19
+
20
+ # Value representing the Wgit user agent.
21
+ USER_AGENT_WGIT = :wgit
22
+ # Value representing any user agent including Wgit.
23
+ USER_AGENT_ANY = :*
24
+
25
+ # Value representing any and all paths.
26
+ PATHS_ALL = %w[/ *].freeze
27
+
28
+ # Hash containing the user-agent allow/disallow URL rules. Looks like:
29
+ # allow_paths: ["/"]
30
+ # disallow_paths: ["/accounts", ...]
31
+ attr_reader :rules
32
+
33
+ # Initializes and returns a Wgit::RobotsParser instance having parsed the
34
+ # robot.txt contents.
35
+ #
36
+ # @param contents [String, #to_s] The contents of the robots.txt file to be
37
+ # parsed.
38
+ def initialize(contents)
39
+ @rules = {
40
+ allow_paths: Set.new,
41
+ disallow_paths: Set.new
42
+ }
43
+
44
+ assert_respond_to(contents, :to_s)
45
+ parse(contents.to_s)
46
+ end
47
+
48
+ # Overrides String#inspect to shorten the printed output of a Parser.
49
+ #
50
+ # @return [String] A short textual representation of this Parser.
51
+ def inspect
52
+ "#<Wgit::RobotsParser has_rules=#{rules?} no_index=#{no_index?}>"
53
+ end
54
+
55
+ # Returns the allow paths/rules for this parser's robots.txt contents.
56
+ #
57
+ # @return [Array<String>] The allow paths/rules to follow.
58
+ def allow_paths
59
+ @rules[:allow_paths].to_a
60
+ end
61
+
62
+ # Returns the disallow paths/rules for this parser's robots.txt contents.
63
+ #
64
+ # @return [Array<String>] The disallow paths/rules to follow.
65
+ def disallow_paths
66
+ @rules[:disallow_paths].to_a
67
+ end
68
+
69
+ # Returns whether or not there are rules applying to Wgit.
70
+ #
71
+ # @return [Boolean] True if there are rules for Wgit to follow, false
72
+ # otherwise.
73
+ def rules?
74
+ allow_rules? || disallow_rules?
75
+ end
76
+
77
+ # Returns whether or not there are allow rules applying to Wgit.
78
+ #
79
+ # @return [Boolean] True if there are allow rules for Wgit to follow,
80
+ # false otherwise.
81
+ def allow_rules?
82
+ @rules[:allow_paths].any?
83
+ end
84
+
85
+ # Returns whether or not there are disallow rules applying to Wgit.
86
+ #
87
+ # @return [Boolean] True if there are disallow rules for Wgit to follow,
88
+ # false otherwise.
89
+ def disallow_rules?
90
+ @rules[:disallow_paths].any?
91
+ end
92
+
93
+ # Returns whether or not Wgit is banned from indexing this site.
94
+ #
95
+ # @return [Boolean] True if Wgit should not index this site, false
96
+ # otherwise.
97
+ def no_index?
98
+ @rules[:disallow_paths].any? { |path| PATHS_ALL.include?(path) }
99
+ end
100
+
101
+ private
102
+
103
+ # Parses the file contents and sets @rules.
104
+ def parse(contents)
105
+ user_agents = []
106
+ new_block = false
107
+
108
+ contents.split("\n").each do |line|
109
+ line.strip!
110
+ next if line.empty? || line.start_with?(KEY_COMMENT)
111
+
112
+ # A user agent block is denoted by N User-agent's followed by N
113
+ # Allow/Disallow's. After which a new block is formed from scratch.
114
+ if start_with_any_case?(line, KEY_USER_AGENT)
115
+ if new_block
116
+ user_agents = []
117
+ new_block = false
118
+ end
119
+ user_agents << remove_key(line, KEY_USER_AGENT).downcase.to_sym
120
+ else
121
+ new_block = true
122
+ end
123
+
124
+ if start_with_any_case?(line, KEY_ALLOW)
125
+ append_allow_rule(user_agents, line)
126
+ elsif start_with_any_case?(line, KEY_DISALLOW)
127
+ append_disallow_rule(user_agents, line)
128
+ elsif !start_with_any_case?(line, KEY_USER_AGENT)
129
+ Wgit.logger.debug("Skipping unsupported robots.txt line: #{line}")
130
+ end
131
+ end
132
+ end
133
+
134
+ # Implements start_with? but case insensitive.
135
+ def start_with_any_case?(str, prefix)
136
+ str.downcase.start_with?(prefix.downcase)
137
+ end
138
+
139
+ # Returns line with key removed (if present). Otherwise line is returned
140
+ # as given.
141
+ def remove_key(line, key)
142
+ return line unless start_with_any_case?(line, key)
143
+ return line unless line.count(KEY_SEPARATOR) == 1
144
+
145
+ segs = line.split(KEY_SEPARATOR)
146
+ return "" if segs.size == 1
147
+
148
+ segs.last.strip
149
+ end
150
+
151
+ # Don't append * or /, as this means all paths, which is the same as no
152
+ # allow_paths when passed to Wgit::Crawler.
153
+ def append_allow_rule(user_agents, line)
154
+ return unless wgit_user_agent?(user_agents)
155
+
156
+ path = remove_key(line, KEY_ALLOW)
157
+ path = parse_special_syntax(path)
158
+ return if PATHS_ALL.include?(path)
159
+
160
+ @rules[:allow_paths] << path
161
+ end
162
+
163
+ def append_disallow_rule(user_agents, line)
164
+ return unless wgit_user_agent?(user_agents)
165
+
166
+ path = remove_key(line, KEY_DISALLOW)
167
+ path = parse_special_syntax(path)
168
+ @rules[:disallow_paths] << path
169
+ end
170
+
171
+ def wgit_user_agent?(user_agents)
172
+ user_agents.any? do |agent|
173
+ [USER_AGENT_ANY, USER_AGENT_WGIT].include?(agent.downcase)
174
+ end
175
+ end
176
+
177
+ def parse_special_syntax(path)
178
+ # Remove $ e.g. "/blah$" becomes "/blah"
179
+ path = path.gsub("$", "")
180
+
181
+ # Remove any inline comments e.g. "/blah # comment" becomes "/blah"
182
+ path = path.split(" #{KEY_COMMENT}").first if path.include?(" #{KEY_COMMENT}")
183
+
184
+ # Replace an empty path with * e.g. "Allow: " becomes "Allow: *"
185
+ path = "*" if path.empty?
186
+
187
+ path
188
+ end
189
+
190
+ alias_method :paths, :rules
191
+ alias_method :banned?, :no_index?
192
+ end
193
+ end
data/lib/wgit/url.rb CHANGED
@@ -1,9 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'utils'
4
- require_relative 'assertable'
5
- require 'uri'
6
- require 'addressable/uri'
3
+ require_relative "utils"
4
+ require_relative "assertable"
5
+ require "uri"
6
+ require "addressable/uri"
7
7
 
8
8
  module Wgit
9
9
  # Class modeling/serialising a web based HTTP URL.
@@ -28,6 +28,9 @@ module Wgit
28
28
  # The duration of the crawl for this Url (in seconds).
29
29
  attr_accessor :crawl_duration
30
30
 
31
+ # Record the redirects from the initial Url to the final Url.
32
+ attr_reader :redirects
33
+
31
34
  # Initializes a new instance of Wgit::Url which models a web based
32
35
  # HTTP URL.
33
36
  #
@@ -53,16 +56,18 @@ module Wgit
53
56
  obj = url_or_obj
54
57
  assert_respond_to(obj, :fetch)
55
58
 
56
- url = obj.fetch('url') # Should always be present.
57
- crawled = obj.fetch('crawled', false)
58
- date_crawled = obj.fetch('date_crawled', nil)
59
- crawl_duration = obj.fetch('crawl_duration', nil)
59
+ url = obj.fetch("url") # Should always be present.
60
+ crawled = obj.fetch("crawled", false)
61
+ date_crawled = obj.fetch("date_crawled", nil)
62
+ crawl_duration = obj.fetch("crawl_duration", nil)
63
+ redirects = obj.fetch("redirects", {})
60
64
  end
61
65
 
62
66
  @uri = Addressable::URI.parse(url)
63
67
  @crawled = crawled
64
68
  @date_crawled = date_crawled
65
69
  @crawl_duration = crawl_duration
70
+ @redirects = redirects || {}
66
71
 
67
72
  super(url)
68
73
  end
@@ -84,7 +89,7 @@ module Wgit
84
89
  # @raise [StandardError] If obj.is_a?(String) is false.
85
90
  # @return [Wgit::Url] A Wgit::Url instance.
86
91
  def self.parse(obj)
87
- raise 'Can only parse if obj#is_a?(String)' unless obj.is_a?(String)
92
+ raise "Can only parse if obj#is_a?(String)" unless obj.is_a?(String)
88
93
 
89
94
  # Return a Wgit::Url as is to avoid losing state e.g. date_crawled etc.
90
95
  obj.is_a?(Wgit::Url) ? obj : new(obj)
@@ -107,16 +112,6 @@ Addressable::URI::InvalidURIError")
107
112
  nil
108
113
  end
109
114
 
110
- # Sets the @crawled instance var, also setting @date_crawled for
111
- # convenience.
112
- #
113
- # @param bool [Boolean] True if this Url has been crawled, false otherwise.
114
- # @return [Boolean] The value of bool having been set.
115
- def crawled=(bool)
116
- @crawled = bool
117
- @date_crawled = bool ? Wgit::Utils.time_stamp : nil
118
- end
119
-
120
115
  # Overrides String#inspect to distingiush this Url from a String.
121
116
  #
122
117
  # @return [String] A short textual representation of this Url.
@@ -134,6 +129,71 @@ Addressable::URI::InvalidURIError")
134
129
  super(new_url)
135
130
  end
136
131
 
132
+ # Overrides String#concat which oddly returns a Wgit::Url object, and
133
+ # instead returns a String. Therefore this method works the same as if
134
+ # you call String#concat, or its alias String#+, which is desired for
135
+ # this method. If you want to join two Urls, use Wgit::Url#join method.
136
+ #
137
+ # @param other [String] The String to concat onto this one.
138
+ # @return [String] The new concatted String, not a Wgit::Url.
139
+ def concat(other)
140
+ to_s.concat(other.to_s)
141
+ end
142
+
143
+ # Sets the @crawled instance var, also setting @date_crawled for
144
+ # convenience.
145
+ #
146
+ # @param bool [Boolean] True if this Url has been crawled, false otherwise.
147
+ # @return [Boolean] The value of bool having been set.
148
+ def crawled=(bool)
149
+ @crawled = bool
150
+ @date_crawled = bool ? Wgit::Utils.time_stamp : nil
151
+ end
152
+
153
+ # Sets the @redirects instance var, mapping any Strings into Wgit::Urls.
154
+ #
155
+ # @param redirects [Hash] The redirects Hash to set for this Url.
156
+ def redirects=(redirects)
157
+ assert_type(redirects, Hash)
158
+
159
+ map_to_url = proc do |url|
160
+ Wgit::Url.new(url.to_s, crawled: @crawled, date_crawled: @date_crawled)
161
+ end
162
+
163
+ @redirects = redirects
164
+ .map { |from, to| [map_to_url.call(from), map_to_url.call(to)] }
165
+ .to_h
166
+ end
167
+
168
+ # Returns the Wgit::Url's starting with the originally requested Url to be
169
+ # crawled, followed by each redirected to Url, finishing with the final
170
+ # crawled Url e.g.
171
+ #
172
+ # Example Url redirects journey (dictated by the webserver):
173
+ #
174
+ # ```
175
+ # http://example.com => 301 to https://example.com
176
+ # https://example.com => 301 to https://example.com/
177
+ # https://example.com/ => 200 OK (no more redirects, crawl complete)
178
+ # ```
179
+ #
180
+ # Would return an Array of Wgit::Url's in the form of:
181
+ #
182
+ # ```
183
+ # %w(
184
+ # http://example.com
185
+ # https://example.com
186
+ # https://example.com/
187
+ # )
188
+ # ```
189
+ #
190
+ # @return [Array<Wgit::Url>] Each redirected to Url's finishing with the
191
+ # final (successfully) crawled Url. If no redirects took place, then just
192
+ # the originally requested Url is returned inside the Array.
193
+ def redirects_journey
194
+ [redirects.keys, self].flatten
195
+ end
196
+
137
197
  # Returns true if self is a relative Url; false if absolute.
138
198
  #
139
199
  # An absolute URL must have a scheme prefix e.g.
@@ -167,10 +227,10 @@ Addressable::URI::InvalidURIError")
167
227
  def relative?(opts = {})
168
228
  defaults = { origin: nil, host: nil, domain: nil, brand: nil }
169
229
  opts = defaults.merge(opts)
170
- raise 'Url (self) cannot be empty' if empty?
230
+ raise "Url (self) cannot be empty" if empty?
171
231
 
172
232
  return false if scheme_relative?
173
- return true if @uri.relative?
233
+ return true if @uri.relative?
174
234
 
175
235
  # Self is absolute but may be relative to the opts param e.g. host.
176
236
  opts.select! { |_k, v| v }
@@ -226,22 +286,23 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
226
286
  !valid?
227
287
  end
228
288
 
229
- # Concats self and other together before returning a new Url. Self is not
230
- # modified.
289
+ # Joins self and other together before returning a new Url. Self is not
290
+ # modified. Some magic occurs depending on what is being joined, see
291
+ # the source code for more information.
231
292
  #
232
- # @param other [Wgit::Url, String] The other to concat to the end of self.
293
+ # @param other [Wgit::Url, String] The other (relative) Url to join to the
294
+ # end of self.
233
295
  # @return [Wgit::Url] self + separator + other, separator depends on other.
234
- def concat(other)
296
+ def join(other)
235
297
  other = Wgit::Url.new(other)
236
- raise 'other must be relative' unless other.relative?
298
+ raise "other must be relative" unless other.relative?
237
299
 
238
300
  other = other.omit_leading_slash
239
- separator = %w[# ? .].include?(other[0]) ? '' : '/'
240
-
241
- # We use to_s below to call String#+, not Wgit::Url#+ (alias for concat).
242
- concatted = omit_trailing_slash.to_s + separator.to_s + other.to_s
301
+ separator = %w[# ? .].include?(other[0]) ? "" : "/"
302
+ separator = "" if end_with?("/")
303
+ joined = self + separator + other
243
304
 
244
- Wgit::Url.new(concatted)
305
+ Wgit::Url.new(joined)
245
306
  end
246
307
 
247
308
  # Normalizes/escapes self and returns a new Wgit::Url. Self isn't modified.
@@ -257,7 +318,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
257
318
  #
258
319
  # If self is absolute then it's returned as is, making this method
259
320
  # idempotent. The doc's `<base>` element is used if present, otherwise
260
- # `doc.url` is used as the base; which is concatted with self.
321
+ # `doc.url` is used as the base; which is joined with self.
261
322
  #
262
323
  # Typically used to build an absolute link obtained from a document.
263
324
  #
@@ -267,19 +328,19 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
267
328
  #
268
329
  # link.make_absolute(doc) # => "http://example.com/favicon.png"
269
330
  #
270
- # @param doc [Wgit::Document] The doc whose base Url is concatted with
331
+ # @param doc [Wgit::Document] The doc whose base Url is joined with
271
332
  # self.
272
333
  # @raise [StandardError] If doc isn't a Wgit::Document or if `doc.base_url`
273
334
  # raises an Exception.
274
335
  # @return [Wgit::Url] Self in absolute form.
275
336
  def make_absolute(doc)
276
337
  assert_type(doc, Wgit::Document)
277
- raise 'Cannot make absolute when Document @url is not valid' \
338
+ raise "Cannot make absolute when Document @url is not valid" \
278
339
  unless doc.url.valid?
279
340
 
280
341
  return prefix_scheme(doc.url.to_scheme&.to_sym) if scheme_relative?
281
342
 
282
- absolute? ? self : doc.base_url(link: self).concat(self)
343
+ absolute? ? self : doc.base_url(link: self).join(self)
283
344
  end
284
345
 
285
346
  # Returns self having prefixed a scheme/protocol. Doesn't modify receiver.
@@ -294,7 +355,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
294
355
 
295
356
  return self if absolute? && !scheme_relative?
296
357
 
297
- separator = scheme_relative? ? '' : '//'
358
+ separator = scheme_relative? ? "" : "//"
298
359
  Wgit::Url.new("#{scheme}:#{separator}#{self}")
299
360
  end
300
361
 
@@ -303,8 +364,8 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
303
364
  #
304
365
  # @return [Hash] self's instance vars as a Hash.
305
366
  def to_h
306
- h = Wgit::Utils.to_h(self, ignore: ['@uri'])
307
- Hash[h.to_a.insert(0, ['url', self])] # Insert url at position 0.
367
+ h = Wgit::Utils.to_h(self, ignore: ["@uri"])
368
+ Hash[h.to_a.insert(0, ["url", to_s])] # Insert url at position 0.
308
369
  end
309
370
 
310
371
  # Returns a normalised URI object for this URL.
@@ -379,7 +440,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
379
440
  dot_domain = ".#{to_domain}"
380
441
  return nil unless include?(dot_domain)
381
442
 
382
- sub_domain = to_host.sub(dot_domain, '')
443
+ sub_domain = to_host.sub(dot_domain, "")
383
444
  Wgit::Url.new(sub_domain)
384
445
  end
385
446
 
@@ -389,7 +450,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
389
450
  # @return [Wgit::Url, nil] Containing just the brand or nil.
390
451
  def to_brand
391
452
  domain = to_domain
392
- domain ? Wgit::Url.new(domain.split('.').first) : nil
453
+ domain ? Wgit::Url.new(domain.split(".").first) : nil
393
454
  end
394
455
 
395
456
  # Returns only the base of this URL e.g. the protocol scheme and host
@@ -425,9 +486,9 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
425
486
  def to_path
426
487
  path = @uri.path
427
488
  return nil if path.nil? || path.empty?
428
- return Wgit::Url.new('/') if path == '/'
489
+ return Wgit::Url.new("/") if path == "/"
429
490
 
430
- Wgit::Url.new(path).omit_slashes
491
+ Wgit::Url.new(path).omit_leading_slash
431
492
  end
432
493
 
433
494
  # Returns the endpoint of this URL e.g. the bit after the host with any
@@ -439,7 +500,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
439
500
  # an endpoint, / is returned.
440
501
  def to_endpoint
441
502
  endpoint = @uri.path
442
- endpoint = '/' + endpoint unless endpoint.start_with?('/')
503
+ endpoint = "/#{endpoint}" unless endpoint.start_with?("/")
443
504
  Wgit::Url.new(endpoint)
444
505
  end
445
506
 
@@ -463,8 +524,8 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
463
524
  query_str = to_query
464
525
  return {} unless query_str
465
526
 
466
- query_str.split('&').each_with_object({}) do |param, hash|
467
- k, v = param.split('=')
527
+ query_str.split("&").each_with_object({}) do |param, hash|
528
+ k, v = param.split("=")
468
529
  k = k.to_sym if symbolize_keys
469
530
  hash[k] = v
470
531
  end
@@ -484,10 +545,10 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
484
545
  #
485
546
  # @return [Wgit::Url, nil] Containing just the extension string or nil.
486
547
  def to_extension
487
- path = to_path
548
+ path = to_path&.omit_trailing_slash
488
549
  return nil unless path
489
550
 
490
- segs = path.split('.')
551
+ segs = path.split(".")
491
552
  segs.length > 1 ? Wgit::Url.new(segs.last) : nil
492
553
  end
493
554
 
@@ -530,7 +591,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
530
591
  #
531
592
  # @return [Wgit::Url] Self without a trailing slash.
532
593
  def omit_leading_slash
533
- start_with?('/') ? Wgit::Url.new(self[1..-1]) : self
594
+ start_with?("/") ? Wgit::Url.new(self[1..]) : self
534
595
  end
535
596
 
536
597
  # Returns a new Wgit::Url containing self without a trailing slash. Is
@@ -539,7 +600,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
539
600
  #
540
601
  # @return [Wgit::Url] Self without a trailing slash.
541
602
  def omit_trailing_slash
542
- end_with?('/') ? Wgit::Url.new(chop) : self
603
+ end_with?("/") ? Wgit::Url.new(chop) : self
543
604
  end
544
605
 
545
606
  # Returns a new Wgit::Url containing self without a leading or trailing
@@ -560,11 +621,11 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
560
621
  # @return [Wgit::Url] Self containing everything after the base.
561
622
  def omit_base
562
623
  base_url = to_base
563
- omit_base = base_url ? gsub(base_url, '') : self
624
+ omit_base = base_url ? gsub(base_url, "") : self
564
625
 
565
- return self if ['', '/'].include?(omit_base)
626
+ return self if ["", "/"].include?(omit_base)
566
627
 
567
- Wgit::Url.new(omit_base).omit_slashes
628
+ Wgit::Url.new(omit_base).omit_leading_slash
568
629
  end
569
630
 
570
631
  # Returns a new Wgit::Url with the origin (base + port) removed e.g. Given
@@ -575,11 +636,11 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
575
636
  # @return [Wgit::Url] Self containing everything after the origin.
576
637
  def omit_origin
577
638
  origin = to_origin
578
- omit_origin = origin ? gsub(origin, '') : self
639
+ omit_origin = origin ? gsub(origin, "") : self
579
640
 
580
- return self if ['', '/'].include?(omit_origin)
641
+ return self if ["", "/"].include?(omit_origin)
581
642
 
582
- Wgit::Url.new(omit_origin).omit_slashes
643
+ Wgit::Url.new(omit_origin).omit_leading_slash
583
644
  end
584
645
 
585
646
  # Returns a new Wgit::Url with the query string portion removed e.g. Given
@@ -591,7 +652,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
591
652
  # @return [Wgit::Url] Self with the query string portion removed.
592
653
  def omit_query
593
654
  query = to_query
594
- omit_query_string = query ? gsub("?#{query}", '') : self
655
+ omit_query_string = query ? gsub("?#{query}", "") : self
595
656
 
596
657
  Wgit::Url.new(omit_query_string)
597
658
  end
@@ -606,7 +667,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
606
667
  # @return [Wgit::Url] Self with the fragment portion removed.
607
668
  def omit_fragment
608
669
  fragment = to_fragment
609
- omit_fragment = fragment ? gsub("##{fragment}", '') : self
670
+ omit_fragment = fragment ? gsub("##{fragment}", "") : self
610
671
 
611
672
  Wgit::Url.new(omit_fragment)
612
673
  end
@@ -616,7 +677,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
616
677
  #
617
678
  # @return [Boolean] True if self is a query string, false otherwise.
618
679
  def query?
619
- start_with?('?')
680
+ start_with?("?")
620
681
  end
621
682
 
622
683
  # Returns true if self is a URL fragment e.g. #top etc. Note this
@@ -624,14 +685,14 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
624
685
  #
625
686
  # @return [Boolean] True if self is a fragment, false otherwise.
626
687
  def fragment?
627
- start_with?('#')
688
+ start_with?("#")
628
689
  end
629
690
 
630
691
  # Returns true if self equals '/' a.k.a. index.
631
692
  #
632
693
  # @return [Boolean] True if self equals '/', false otherwise.
633
694
  def index?
634
- self == '/'
695
+ self == "/"
635
696
  end
636
697
 
637
698
  # Returns true if self starts with '//' a.k.a a scheme/protocol relative
@@ -639,35 +700,34 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
639
700
  #
640
701
  # @return [Boolean] True if self starts with '//', false otherwise.
641
702
  def scheme_relative?
642
- start_with?('//')
643
- end
644
-
645
- alias + concat
646
- alias crawled? crawled
647
- alias is_relative? relative?
648
- alias is_absolute? absolute?
649
- alias is_valid? valid?
650
- alias is_query? query?
651
- alias is_fragment? fragment?
652
- alias is_index? index?
653
- alias is_scheme_relative? scheme_relative?
654
- alias uri to_uri
655
- alias url to_url
656
- alias scheme to_scheme
657
- alias host to_host
658
- alias port to_port
659
- alias domain to_domain
660
- alias brand to_brand
661
- alias base to_base
662
- alias origin to_origin
663
- alias path to_path
664
- alias endpoint to_endpoint
665
- alias query to_query
666
- alias query_hash to_query_hash
667
- alias fragment to_fragment
668
- alias extension to_extension
669
- alias user to_user
670
- alias password to_password
671
- alias sub_domain to_sub_domain
703
+ start_with?("//")
704
+ end
705
+
706
+ alias_method :crawled?, :crawled
707
+ alias_method :is_relative?, :relative?
708
+ alias_method :is_absolute?, :absolute?
709
+ alias_method :is_valid?, :valid?
710
+ alias_method :is_query?, :query?
711
+ alias_method :is_fragment?, :fragment?
712
+ alias_method :is_index?, :index?
713
+ alias_method :is_scheme_relative?, :scheme_relative?
714
+ alias_method :uri, :to_uri
715
+ alias_method :url, :to_url
716
+ alias_method :scheme, :to_scheme
717
+ alias_method :host, :to_host
718
+ alias_method :port, :to_port
719
+ alias_method :domain, :to_domain
720
+ alias_method :brand, :to_brand
721
+ alias_method :base, :to_base
722
+ alias_method :origin, :to_origin
723
+ alias_method :path, :to_path
724
+ alias_method :endpoint, :to_endpoint
725
+ alias_method :query, :to_query
726
+ alias_method :query_hash, :to_query_hash
727
+ alias_method :fragment, :to_fragment
728
+ alias_method :extension, :to_extension
729
+ alias_method :user, :to_user
730
+ alias_method :password, :to_password
731
+ alias_method :sub_domain, :to_sub_domain
672
732
  end
673
733
  end