wgit 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,8 +34,8 @@ Wgit::Document.define_extension(
34
34
  '//meta[@name="keywords"]/@content',
35
35
  singleton: true,
36
36
  text_content_only: true
37
- ) do |keywords, source|
38
- if keywords && (source == :html)
37
+ ) do |keywords, _source, type|
38
+ if keywords && (type == :document)
39
39
  keywords = keywords.split(',')
40
40
  Wgit::Utils.process_arr(keywords)
41
41
  end
data/lib/wgit/indexer.rb CHANGED
@@ -44,12 +44,16 @@ module Wgit
44
44
  # inserted into the database allowing for prior manipulation.
45
45
  # @return [Integer] The total number of pages crawled within the website.
46
46
  def self.index_site(
47
- url, connection_string: nil, insert_externals: true, &block
47
+ url, connection_string: nil, insert_externals: true,
48
+ allow_paths: nil, disallow_paths: nil, &block
48
49
  )
49
50
  url = Wgit::Url.parse(url)
50
51
  db = Wgit::Database.new(connection_string)
51
52
  indexer = Wgit::Indexer.new(db)
52
- indexer.index_site(url, insert_externals: insert_externals, &block)
53
+ indexer.index_site(
54
+ url, insert_externals: insert_externals,
55
+ allow_paths: allow_paths, disallow_paths: disallow_paths, &block
56
+ )
53
57
  end
54
58
 
55
59
  # Convience method to index a single webpage using
@@ -215,10 +219,13 @@ the next iteration.")
215
219
  # nil or false from the block to prevent the document from being saved
216
220
  # into the database.
217
221
  # @return [Integer] The total number of webpages/documents indexed.
218
- def index_site(url, insert_externals: true)
222
+ def index_site(
223
+ url, insert_externals: true, allow_paths: nil, disallow_paths: nil
224
+ )
225
+ crawl_opts = { allow_paths: allow_paths, disallow_paths: disallow_paths }
219
226
  total_pages_indexed = 0
220
227
 
221
- ext_urls = @crawler.crawl_site(url) do |doc|
228
+ ext_urls = @crawler.crawl_site(url, crawl_opts) do |doc|
222
229
  result = true
223
230
  result = yield(doc) if block_given?
224
231
 
@@ -231,8 +238,8 @@ the next iteration.")
231
238
  @db.url?(url) ? @db.update(url) : @db.insert(url)
232
239
 
233
240
  if insert_externals && ext_urls
234
- write_urls_to_db(ext_urls)
235
- Wgit.logger.info("Found and saved #{ext_urls.length} external url(s)")
241
+ num_inserted_urls = write_urls_to_db(ext_urls)
242
+ Wgit.logger.info("Found and saved #{num_inserted_urls} external url(s)")
236
243
  end
237
244
 
238
245
  Wgit.logger.info("Crawled and saved #{total_pages_indexed} docs for the \
@@ -266,8 +273,8 @@ site: #{url}")
266
273
 
267
274
  ext_urls = document&.external_links
268
275
  if insert_externals && ext_urls
269
- write_urls_to_db(ext_urls)
270
- Wgit.logger.info("Found and saved #{ext_urls.length} external url(s)")
276
+ num_inserted_urls = write_urls_to_db(ext_urls)
277
+ Wgit.logger.info("Found and saved #{num_inserted_urls} external url(s)")
271
278
  end
272
279
 
273
280
  nil
@@ -315,14 +322,19 @@ site: #{url}")
315
322
  def write_urls_to_db(urls)
316
323
  count = 0
317
324
 
318
- if urls.respond_to?(:each)
319
- urls.each do |url|
320
- @db.insert(url)
321
- count += 1
322
- Wgit.logger.info("Inserted url: #{url}")
323
- rescue Mongo::Error::OperationFailure
324
- Wgit.logger.info("Url already exists: #{url}")
325
+ return count unless urls.respond_to?(:each)
326
+
327
+ urls.each do |url|
328
+ if url.invalid?
329
+ Wgit.logger.info("Ignoring invalid external url: #{url}")
330
+ next
325
331
  end
332
+
333
+ @db.insert(url)
334
+ count += 1
335
+ Wgit.logger.info("Inserted external url: #{url}")
336
+ rescue Mongo::Error::OperationFailure
337
+ Wgit.logger.info("External url already exists: #{url}")
326
338
  end
327
339
 
328
340
  count
@@ -0,0 +1,144 @@
1
+ module Wgit
2
+ # Response class representing a generic HTTP crawl response.
3
+ class Response
4
+ # The underlying HTTP adapter/library response object.
5
+ attr_accessor :adapter_response
6
+
7
+ # The HTML response body.
8
+ attr_reader :body
9
+
10
+ # The HTTP response headers.
11
+ attr_reader :headers
12
+
13
+ # The servers IP address.
14
+ attr_accessor :ip_address
15
+
16
+ # The redirections of the response.
17
+ attr_reader :redirections
18
+
19
+ # The number of redirections for the response.
20
+ attr_reader :redirect_count
21
+
22
+ # The HTTP response status code.
23
+ attr_reader :status
24
+
25
+ # The total crawl/network time for the response.
26
+ attr_reader :total_time
27
+
28
+ # The HTTP request URL.
29
+ attr_accessor :url
30
+
31
+ # Defaults some values and returns a "blank" Wgit::Response object.
32
+ def initialize
33
+ @body = ''
34
+ @headers = {}
35
+ @redirections = {}
36
+ @total_time = 0.0
37
+ end
38
+
39
+ # Adds time to @total_time (incrementally).
40
+ #
41
+ # @param time [Float] The time to add to @total_time.
42
+ # @return [Float] @total_time's new value.
43
+ def add_total_time(time)
44
+ @total_time += (time || 0.0)
45
+ end
46
+
47
+ # Sets the HTML response body.
48
+ #
49
+ # @param str [String] The new HTML body.
50
+ # @return [String] @body's new value.
51
+ def body=(str)
52
+ @body = (str || '')
53
+ end
54
+
55
+ # Returns the HTML response body or nil (if it's empty).
56
+ #
57
+ # @return [String, NilClass] The HTML body or nil if empty.
58
+ def body_or_nil
59
+ @body.empty? ? nil : @body
60
+ end
61
+
62
+ # Returns true if the response isn't a #success? or a #redirect?
63
+ #
64
+ # @return [Boolean] True if failed, false otherwise.
65
+ def failure?
66
+ !success? && !redirect?
67
+ end
68
+
69
+ # Sets the headers Hash to the given value. The header keys are mapped
70
+ # to snake_cased Symbols for consistency.
71
+ #
72
+ # @param headers [Hash] The new response headers.
73
+ # @return [Hash] @headers's new value.
74
+ def headers=(headers)
75
+ return @headers = {} unless headers
76
+
77
+ @headers = headers.map do |k, v|
78
+ k = k.downcase.gsub('-', '_').to_sym
79
+ [k, v]
80
+ end.to_h
81
+ end
82
+
83
+ # Returns whether or not the response is 404 Not Found.
84
+ #
85
+ # @return [Boolean] True if 404 Not Found, false otherwise.
86
+ def not_found?
87
+ @status == 404
88
+ end
89
+
90
+ # Returns whether or not the response is 200 OK.
91
+ #
92
+ # @return [Boolean] True if 200 OK, false otherwise.
93
+ def ok?
94
+ @status == 200
95
+ end
96
+
97
+ # Returns whether or not the response is a 3xx Redirect.
98
+ #
99
+ # @return [Boolean] True if 3xx Redirect, false otherwise.
100
+ def redirect?
101
+ return false unless @status
102
+
103
+ @status.between?(300, 399)
104
+ end
105
+
106
+ # Returns the number of redirects this response has had.
107
+ #
108
+ # @return [Integer] The number of response redirects.
109
+ def redirect_count
110
+ @redirections.size
111
+ end
112
+
113
+ # Returns the size of the response body.
114
+ #
115
+ # @return [Integer] The response body size in bytes.
116
+ def size
117
+ @body.size
118
+ end
119
+
120
+ # Sets the HTML response status.
121
+ #
122
+ # @param int [Integer] The new response status.
123
+ # @return [Integer] @status' new value.
124
+ def status=(int)
125
+ @status = int.positive? ? int : nil
126
+ end
127
+
128
+ # Returns whether or not the response is a 2xx Success.
129
+ #
130
+ # @return [Boolean] True if 2xx Success, false otherwise.
131
+ def success?
132
+ return false unless @status
133
+
134
+ @status.between?(200, 299)
135
+ end
136
+
137
+ alias code status
138
+ alias content body
139
+ alias crawl_time total_time
140
+ alias to_s body
141
+ alias redirects redirections
142
+ alias length size
143
+ end
144
+ end
data/lib/wgit/url.rb CHANGED
@@ -8,15 +8,19 @@ require 'addressable/uri'
8
8
  module Wgit
9
9
  # Class modeling a web based HTTP URL.
10
10
  #
11
- # Can be an internal/relative link e.g. "about.html" or a full URL
11
+ # Can be an internal/relative link e.g. "about.html" or an absolute URL
12
12
  # e.g. "http://www.google.co.uk". Is a subclass of String and uses 'uri' and
13
13
  # 'addressable/uri' internally.
14
+ #
15
+ # Most of the methods in this class return new Wgit::Url instances making the
16
+ # method calls chainable e.g. url.omit_base.omit_fragment etc. The methods
17
+ # also try to be idempotent where possible.
14
18
  class Url < String
15
19
  include Assertable
16
20
 
17
21
  # Whether or not the Url has been crawled or not. A custom crawled= method
18
22
  # is provided by this class, overridding the default one.
19
- attr_accessor :crawled
23
+ attr_reader :crawled
20
24
 
21
25
  # The Time stamp of when this Url was crawled.
22
26
  attr_accessor :date_crawled
@@ -110,7 +114,7 @@ module Wgit
110
114
 
111
115
  # Returns true if self is a relative Url; false if absolute.
112
116
  #
113
- # All external links in a page are expected to have a protocol prefix e.g.
117
+ # All external links in a page are expected to have a scheme prefix e.g.
114
118
  # 'http://', otherwise the link is treated as an internal link (regardless
115
119
  # of whether it's valid or not). The only exception is if an opts arg is
116
120
  # provided and self is a page belonging to that arg type e.g. host; then
@@ -118,7 +122,7 @@ module Wgit
118
122
  #
119
123
  # @param opts [Hash] The options with which to check relativity. Only one
120
124
  # opts param should be provided. The provided opts param Url must be
121
- # absolute and be prefixed with a protocol. Consider using the output of
125
+ # absolute and be prefixed with a scheme. Consider using the output of
122
126
  # Wgit::Url#to_base which should work unless it's nil.
123
127
  # @option opts [Wgit::Url, String] :base The Url base e.g.
124
128
  # http://www.google.com/how which gives a base of
@@ -147,8 +151,10 @@ module Wgit
147
151
 
148
152
  type, url = opts.first
149
153
  url = Wgit::Url.new(url)
150
- raise "Invalid opts param value, Url must be absolute and contain \
151
- protocol: #{url}" unless url.to_base
154
+ unless url.to_base
155
+ raise "Invalid opts param value, Url must be absolute and contain \
156
+ protocol scheme: #{url}"
157
+ end
152
158
 
153
159
  case type
154
160
  when :base # http://www.google.com
@@ -182,19 +188,29 @@ protocol: #{url}" unless url.to_base
182
188
  true
183
189
  end
184
190
 
185
- # Concats self and path together before returning a new Url. Self is not
191
+ # Returns if self is an invalid (relative) HTTP Url or not.
192
+ #
193
+ # @return [Boolean] True if invalid, otherwise false.
194
+ def invalid?
195
+ !valid?
196
+ end
197
+
198
+ # Concats self and other together before returning a new Url. Self is not
186
199
  # modified.
187
200
  #
188
- # @param path [Wgit::Url, String] The path to concat onto the end of self.
189
- # @return [Wgit::Url] self + separator + path, separator depends on path.
190
- def concat(path)
191
- path = Wgit::Url.new(path)
192
- raise 'path must be relative' unless path.relative?
201
+ # @param other [Wgit::Url, String] The other to concat to the end of self.
202
+ # @return [Wgit::Url] self + separator + other, separator depends on other.
203
+ def concat(other)
204
+ other = Wgit::Url.new(other)
205
+ raise 'other must be relative' unless other.relative?
206
+
207
+ other = other.omit_leading_slash
208
+ separator = other.start_with?('#') || other.start_with?('?') ? '' : '/'
193
209
 
194
- path = path.without_leading_slash
195
- separator = path.start_with?('#') || path.start_with?('?') ? '' : '/'
210
+ # We use to_s below to call String#+, not Wgit::Url#+ (alias for concat).
211
+ concatted = omit_trailing_slash.to_s + separator.to_s + other.to_s
196
212
 
197
- Wgit::Url.new(without_trailing_slash + separator + path)
213
+ Wgit::Url.new(concatted)
198
214
  end
199
215
 
200
216
  # Normalises/escapes self and returns a new Wgit::Url. Self isn't modified.
@@ -204,21 +220,47 @@ protocol: #{url}" unless url.to_base
204
220
  Wgit::Url.new(@uri.normalize.to_s)
205
221
  end
206
222
 
207
- # Modifies self by prefixing it with a protocol. Returns the url whether
208
- # its been modified or not. The default protocol prefix is http://.
223
+ # Returns an absolute form of self within the context of doc. Doesn't
224
+ # modify the receiver.
209
225
  #
210
- # @param protocol [Symbol] Either :http or :https.
211
- # @return [Wgit::Url] The url with protocol prefix (having been modified).
212
- def prefix_protocol(protocol: :http)
213
- unless %i[http https].include?(protocol)
214
- raise 'protocol must be :http or :https'
215
- end
226
+ # If self is absolute then it's returned as is, making this method
227
+ # idempotent. The doc's <base> element is used if present, otherwise
228
+ # doc.url is used as the base; which is concatted with self.
229
+ #
230
+ # Typically used to build an absolute link obtained from a document e.g.
231
+ #
232
+ # link = Wgit::Url.new('/favicon.png')
233
+ # doc = Wgit::Document.new('http://example.com')
234
+ #
235
+ # link.prefix_base(doc) # => "http://example.com/favicon.png"
236
+ #
237
+ # @param doc [Wgit::Document] The doc whose base Url is concatted with
238
+ # self.
239
+ # @raise [StandardError] If doc isn't a Wgit::Document or if `doc.base_url`
240
+ # raises an Exception.
241
+ # @return [Wgit::Url] Self in absolute form.
242
+ def prefix_base(doc)
243
+ assert_type(doc, Wgit::Document)
216
244
 
217
- unless start_with?('http://') || start_with?('https://')
218
- protocol == :http ? replace("http://#{url}") : replace("https://#{url}")
219
- end
245
+ absolute? ? self : doc.base_url(link: self).concat(self)
246
+ end
220
247
 
221
- self
248
+ # Returns self having prefixed a protocol scheme. Doesn't modify receiver.
249
+ # Returns self even if absolute (with scheme); therefore is idempotent.
250
+ #
251
+ # @param protocol [Symbol] Either :http or :https.
252
+ # @return [Wgit::Url] Self with a protocol scheme prefix.
253
+ def prefix_scheme(protocol: :http)
254
+ return self if absolute?
255
+
256
+ case protocol
257
+ when :http
258
+ Wgit::Url.new("http://#{url}")
259
+ when :https
260
+ Wgit::Url.new("https://#{url}")
261
+ else
262
+ raise "protocol must be :http or :https, not :#{protocol}"
263
+ end
222
264
  end
223
265
 
224
266
  # Returns a Hash containing this Url's instance vars excluding @uri.
@@ -238,6 +280,13 @@ protocol: #{url}" unless url.to_base
238
280
  URI(normalize)
239
281
  end
240
282
 
283
+ # Returns the Addressable::URI object for this URL.
284
+ #
285
+ # @return [Addressable::URI] The Addressable::URI object of self.
286
+ def to_addressable_uri
287
+ @uri
288
+ end
289
+
241
290
  # Returns self.
242
291
  #
243
292
  # @return [Wgit::Url] This (self) Url.
@@ -245,10 +294,10 @@ protocol: #{url}" unless url.to_base
245
294
  self
246
295
  end
247
296
 
248
- # Returns a new Wgit::Url containing just the scheme/protocol of this URL
297
+ # Returns a new Wgit::Url containing just the scheme of this URL
249
298
  # e.g. Given http://www.google.co.uk, http is returned.
250
299
  #
251
- # @return [Wgit::Url, nil] Containing just the scheme/protocol or nil.
300
+ # @return [Wgit::Url, nil] Containing just the scheme or nil.
252
301
  def to_scheme
253
302
  scheme = @uri.scheme
254
303
  scheme ? Wgit::Url.new(scheme) : nil
@@ -281,9 +330,11 @@ protocol: #{url}" unless url.to_base
281
330
  domain ? Wgit::Url.new(domain.split('.').first) : nil
282
331
  end
283
332
 
284
- # Returns only the base of this URL e.g. the protocol and host combined.
333
+ # Returns only the base of this URL e.g. the protocol scheme and host
334
+ # combined.
285
335
  #
286
- # @return [Wgit::Url, nil] Base of self e.g. http://www.google.co.uk or nil.
336
+ # @return [Wgit::Url, nil] The base of self e.g. http://www.google.co.uk or
337
+ # nil.
287
338
  def to_base
288
339
  return nil if @uri.scheme.nil? || @uri.host.nil?
289
340
 
@@ -302,7 +353,7 @@ protocol: #{url}" unless url.to_base
302
353
  return nil if path.nil? || path.empty?
303
354
  return Wgit::Url.new('/') if path == '/'
304
355
 
305
- Wgit::Url.new(path).without_slashes
356
+ Wgit::Url.new(path).omit_slashes
306
357
  end
307
358
 
308
359
  # Returns the endpoint of this URL e.g. the bit after the host with any
@@ -324,16 +375,16 @@ protocol: #{url}" unless url.to_base
324
375
  # @return [Wgit::Url, nil] Containing just the query string or nil.
325
376
  def to_query
326
377
  query = @uri.query
327
- query ? Wgit::Url.new("?#{query}") : nil
378
+ query ? Wgit::Url.new(query) : nil
328
379
  end
329
380
 
330
- # Returns a new Wgit::Url containing just the anchor string of this URL
381
+ # Returns a new Wgit::Url containing just the fragment string of this URL
331
382
  # e.g. Given http://google.com#about, #about is returned.
332
383
  #
333
- # @return [Wgit::Url, nil] Containing just the anchor string or nil.
334
- def to_anchor
335
- anchor = @uri.fragment
336
- anchor ? Wgit::Url.new("##{anchor}") : nil
384
+ # @return [Wgit::Url, nil] Containing just the fragment string or nil.
385
+ def to_fragment
386
+ fragment = @uri.fragment
387
+ fragment ? Wgit::Url.new(fragment) : nil
337
388
  end
338
389
 
339
390
  # Returns a new Wgit::Url containing just the file extension of this URL
@@ -348,12 +399,27 @@ protocol: #{url}" unless url.to_base
348
399
  segs.length > 1 ? Wgit::Url.new(segs.last) : nil
349
400
  end
350
401
 
402
+ # Omits the given URL components from self and returns a new Wgit::Url.
403
+ #
404
+ # Calls Addressable::URI#omit underneath and creates a new Wgit::Url from
405
+ # the output. See the Addressable::URI docs for more information.
406
+ #
407
+ # @param components [*Symbol] One or more Symbols representing the URL
408
+ # components to omit. The following components are supported: :scheme,
409
+ # :user, :password, :userinfo, :host, :port, :authority, :path, :query,
410
+ # :fragment.
411
+ # @return [Wgit::Url] Self's URL value with the given components omitted.
412
+ def omit(*components)
413
+ omitted = @uri.omit(*components)
414
+ Wgit::Url.new(omitted.to_s)
415
+ end
416
+
351
417
  # Returns a new Wgit::Url containing self without a trailing slash. Is
352
418
  # idempotent meaning self will always be returned regardless of whether
353
419
  # there's a trailing slash or not.
354
420
  #
355
421
  # @return [Wgit::Url] Self without a trailing slash.
356
- def without_leading_slash
422
+ def omit_leading_slash
357
423
  start_with?('/') ? Wgit::Url.new(self[1..-1]) : self
358
424
  end
359
425
 
@@ -362,7 +428,7 @@ protocol: #{url}" unless url.to_base
362
428
  # there's a trailing slash or not.
363
429
  #
364
430
  # @return [Wgit::Url] Self without a trailing slash.
365
- def without_trailing_slash
431
+ def omit_trailing_slash
366
432
  end_with?('/') ? Wgit::Url.new(chop) : self
367
433
  end
368
434
 
@@ -371,9 +437,9 @@ protocol: #{url}" unless url.to_base
371
437
  # present or not.
372
438
  #
373
439
  # @return [Wgit::Url] Self without leading or trailing slashes.
374
- def without_slashes
375
- without_leading_slash
376
- .without_trailing_slash
440
+ def omit_slashes
441
+ omit_leading_slash
442
+ .omit_trailing_slash
377
443
  end
378
444
 
379
445
  # Returns a new Wgit::Url with the base (proto and host) removed e.g. Given
@@ -382,13 +448,13 @@ protocol: #{url}" unless url.to_base
382
448
  # Leading and trailing slashes are always stripped from the return value.
383
449
  #
384
450
  # @return [Wgit::Url] Self containing everything after the base.
385
- def without_base
451
+ def omit_base
386
452
  base_url = to_base
387
- without_base = base_url ? gsub(base_url, '') : self
453
+ omit_base = base_url ? gsub(base_url, '') : self
388
454
 
389
- return self if ['', '/'].include?(without_base)
455
+ return self if ['', '/'].include?(omit_base)
390
456
 
391
- Wgit::Url.new(without_base).without_slashes
457
+ Wgit::Url.new(omit_base).omit_slashes
392
458
  end
393
459
 
394
460
  # Returns a new Wgit::Url with the query string portion removed e.g. Given
@@ -398,26 +464,26 @@ protocol: #{url}" unless url.to_base
398
464
  # URL.
399
465
  #
400
466
  # @return [Wgit::Url] Self with the query string portion removed.
401
- def without_query
467
+ def omit_query
402
468
  query = to_query
403
- without_query_string = query ? gsub(query, '') : self
469
+ omit_query_string = query ? gsub("?#{query}", '') : self
404
470
 
405
- Wgit::Url.new(without_query_string)
471
+ Wgit::Url.new(omit_query_string)
406
472
  end
407
473
 
408
- # Returns a new Wgit::Url with the anchor portion removed e.g. Given
474
+ # Returns a new Wgit::Url with the fragment portion removed e.g. Given
409
475
  # http://google.com/search#about, http://google.com/search is
410
- # returned. Self is returned as is if no anchor is present. A URL
411
- # consisting of only an anchor e.g. '#about' will return an empty URL.
412
- # This method assumes that the anchor is correctly placed at the very end
476
+ # returned. Self is returned as is if no fragment is present. A URL
477
+ # consisting of only a fragment e.g. '#about' will return an empty URL.
478
+ # This method assumes that the fragment is correctly placed at the very end
413
479
  # of the URL.
414
480
  #
415
- # @return [Wgit::Url] Self with the anchor portion removed.
416
- def without_anchor
417
- anchor = to_anchor
418
- without_anchor = anchor ? gsub(anchor, '') : self
481
+ # @return [Wgit::Url] Self with the fragment portion removed.
482
+ def omit_fragment
483
+ fragment = to_fragment
484
+ omit_fragment = fragment ? gsub("##{fragment}", '') : self
419
485
 
420
- Wgit::Url.new(without_anchor)
486
+ Wgit::Url.new(omit_fragment)
421
487
  end
422
488
 
423
489
  # Returns true if self is a URL query string e.g. ?q=hello etc. Note this
@@ -428,35 +494,33 @@ protocol: #{url}" unless url.to_base
428
494
  start_with?('?')
429
495
  end
430
496
 
431
- # Returns true if self is a URL anchor/fragment e.g. #top etc. Note this
432
- # shouldn't be used to determine if self contains an anchor/fragment.
497
+ # Returns true if self is a URL fragment e.g. #top etc. Note this
498
+ # shouldn't be used to determine if self contains a fragment.
433
499
  #
434
- # @return [Boolean] True if self is a anchor/fragment, false otherwise.
435
- def anchor?
500
+ # @return [Boolean] True if self is a fragment, false otherwise.
501
+ def fragment?
436
502
  start_with?('#')
437
503
  end
438
504
 
439
- alias crawled? crawled
440
- alias is_relative? relative?
441
- alias is_absolute? absolute?
442
- alias is_valid? valid?
443
- alias normalise normalize
444
- alias uri to_uri
445
- alias url to_url
446
- alias scheme to_scheme
447
- alias host to_host
448
- alias domain to_domain
449
- alias brand to_brand
450
- alias base to_base
451
- alias path to_path
452
- alias endpoint to_endpoint
453
- alias query to_query
454
- alias anchor to_anchor
455
- alias fragment to_anchor
456
- alias extension to_extension
457
- alias without_fragment without_anchor
458
- alias is_query? query?
459
- alias is_anchor? anchor?
460
- alias fragment? anchor?
505
+ alias + concat
506
+ alias crawled? crawled
507
+ alias normalise normalize
508
+ alias is_relative? relative?
509
+ alias is_absolute? absolute?
510
+ alias is_valid? valid?
511
+ alias is_query? query?
512
+ alias is_fragment? fragment?
513
+ alias uri to_uri
514
+ alias url to_url
515
+ alias scheme to_scheme
516
+ alias host to_host
517
+ alias domain to_domain
518
+ alias brand to_brand
519
+ alias base to_base
520
+ alias path to_path
521
+ alias endpoint to_endpoint
522
+ alias query to_query
523
+ alias fragment to_fragment
524
+ alias extension to_extension
461
525
  end
462
526
  end