wgit 0.5.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +7 -0
- data/CHANGELOG.md +249 -0
- data/CODE_OF_CONDUCT.md +76 -0
- data/CONTRIBUTING.md +21 -0
- data/LICENSE.txt +21 -0
- data/README.md +232 -0
- data/bin/wgit +39 -0
- data/lib/wgit.rb +3 -1
- data/lib/wgit/assertable.rb +3 -3
- data/lib/wgit/base.rb +30 -0
- data/lib/wgit/core_ext.rb +1 -1
- data/lib/wgit/crawler.rb +304 -148
- data/lib/wgit/database/database.rb +310 -135
- data/lib/wgit/database/model.rb +10 -3
- data/lib/wgit/document.rb +241 -169
- data/lib/wgit/{document_extensions.rb → document_extractors.rb} +20 -10
- data/lib/wgit/dsl.rb +324 -0
- data/lib/wgit/indexer.rb +68 -156
- data/lib/wgit/response.rb +17 -14
- data/lib/wgit/url.rb +213 -73
- data/lib/wgit/utils.rb +32 -20
- data/lib/wgit/version.rb +3 -2
- metadata +38 -19
data/lib/wgit/response.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module Wgit
|
2
|
-
# Response class
|
2
|
+
# Response class modeling a generic HTTP GET response.
|
3
3
|
class Response
|
4
4
|
# The underlying HTTP adapter/library response object.
|
5
5
|
attr_accessor :adapter_response
|
@@ -56,11 +56,11 @@ module Wgit
|
|
56
56
|
@body.empty? ? nil : @body
|
57
57
|
end
|
58
58
|
|
59
|
-
# Returns
|
59
|
+
# Returns whether or not a server response is absent.
|
60
60
|
#
|
61
|
-
# @return [Boolean] True if
|
61
|
+
# @return [Boolean] True if the status is nil or < 1, false otherwise.
|
62
62
|
def failure?
|
63
|
-
!success?
|
63
|
+
!success?
|
64
64
|
end
|
65
65
|
|
66
66
|
# Sets the headers Hash to the given value. The header keys are mapped
|
@@ -69,7 +69,10 @@ module Wgit
|
|
69
69
|
# @param headers [Hash] The new response headers.
|
70
70
|
# @return [Hash] @headers's new value.
|
71
71
|
def headers=(headers)
|
72
|
-
|
72
|
+
unless headers
|
73
|
+
@headers = {}
|
74
|
+
return
|
75
|
+
end
|
73
76
|
|
74
77
|
@headers = headers.map do |k, v|
|
75
78
|
k = k.downcase.gsub('-', '_').to_sym
|
@@ -122,20 +125,20 @@ module Wgit
|
|
122
125
|
@status = int.positive? ? int : nil
|
123
126
|
end
|
124
127
|
|
125
|
-
# Returns whether or not
|
128
|
+
# Returns whether or not a server response is present.
|
126
129
|
#
|
127
|
-
# @return [Boolean] True if
|
130
|
+
# @return [Boolean] True if the status is > 0, false otherwise.
|
128
131
|
def success?
|
129
132
|
return false unless @status
|
130
133
|
|
131
|
-
@status.
|
134
|
+
@status.positive?
|
132
135
|
end
|
133
136
|
|
134
|
-
alias code
|
135
|
-
alias content
|
136
|
-
alias
|
137
|
-
alias to_s
|
138
|
-
alias redirects
|
139
|
-
alias length
|
137
|
+
alias code status
|
138
|
+
alias content body
|
139
|
+
alias crawl_duration total_time
|
140
|
+
alias to_s body
|
141
|
+
alias redirects redirections
|
142
|
+
alias length size
|
140
143
|
end
|
141
144
|
end
|
data/lib/wgit/url.rb
CHANGED
@@ -6,20 +6,20 @@ require 'uri'
|
|
6
6
|
require 'addressable/uri'
|
7
7
|
|
8
8
|
module Wgit
|
9
|
-
# Class modeling a web based HTTP URL.
|
9
|
+
# Class modeling/serialising a web based HTTP URL.
|
10
10
|
#
|
11
11
|
# Can be an internal/relative link e.g. "about.html" or an absolute URL
|
12
|
-
# e.g. "http://www.google.co.uk". Is a subclass of String and uses
|
13
|
-
#
|
12
|
+
# e.g. "http://www.google.co.uk". Is a subclass of String and uses `URI` and
|
13
|
+
# `addressable/uri` internally for parsing.
|
14
14
|
#
|
15
|
-
# Most of the methods in this class return new Wgit::Url instances making
|
16
|
-
# method calls chainable e.g. url.omit_base.omit_fragment etc. The
|
17
|
-
# also try to be idempotent where possible.
|
15
|
+
# Most of the methods in this class return new `Wgit::Url` instances making
|
16
|
+
# the method calls chainable e.g. `url.omit_base.omit_fragment` etc. The
|
17
|
+
# methods also try to be idempotent where possible.
|
18
18
|
class Url < String
|
19
19
|
include Assertable
|
20
20
|
|
21
21
|
# Whether or not the Url has been crawled or not. A custom crawled= method
|
22
|
-
# is provided by this class
|
22
|
+
# is provided by this class.
|
23
23
|
attr_reader :crawled
|
24
24
|
|
25
25
|
# The Time stamp of when this Url was crawled.
|
@@ -28,10 +28,10 @@ module Wgit
|
|
28
28
|
# The duration of the crawl for this Url (in seconds).
|
29
29
|
attr_accessor :crawl_duration
|
30
30
|
|
31
|
-
# Initializes a new instance of Wgit::Url which
|
31
|
+
# Initializes a new instance of Wgit::Url which models a web based
|
32
32
|
# HTTP URL.
|
33
33
|
#
|
34
|
-
# @param url_or_obj [String, Wgit::Url,
|
34
|
+
# @param url_or_obj [String, Wgit::Url, #fetch#[]] Is either a String
|
35
35
|
# based URL or an object representing a Database record e.g. a MongoDB
|
36
36
|
# document/object.
|
37
37
|
# @param crawled [Boolean] Whether or not the HTML of the URL's web page
|
@@ -90,6 +90,23 @@ module Wgit
|
|
90
90
|
obj.is_a?(Wgit::Url) ? obj : new(obj)
|
91
91
|
end
|
92
92
|
|
93
|
+
# Returns a Wgit::Url instance from Wgit::Url.parse, or nil if obj cannot
|
94
|
+
# be parsed successfully e.g. the String is invalid.
|
95
|
+
#
|
96
|
+
# Use this method when you can't gaurentee that obj is parsable as a URL.
|
97
|
+
# See Wgit::Url.parse for more information.
|
98
|
+
#
|
99
|
+
# @param obj [Object] The object to parse, which #is_a?(String).
|
100
|
+
# @raise [StandardError] If obj.is_a?(String) is false.
|
101
|
+
# @return [Wgit::Url] A Wgit::Url instance or nil (if obj is invalid).
|
102
|
+
def self.parse?(obj)
|
103
|
+
parse(obj)
|
104
|
+
rescue Addressable::URI::InvalidURIError
|
105
|
+
Wgit.logger.debug("Wgit::Url.parse?('#{obj}') exception: \
|
106
|
+
Addressable::URI::InvalidURIError")
|
107
|
+
nil
|
108
|
+
end
|
109
|
+
|
93
110
|
# Sets the @crawled instance var, also setting @date_crawled for
|
94
111
|
# convenience.
|
95
112
|
#
|
@@ -98,8 +115,6 @@ module Wgit
|
|
98
115
|
def crawled=(bool)
|
99
116
|
@crawled = bool
|
100
117
|
@date_crawled = bool ? Wgit::Utils.time_stamp : nil
|
101
|
-
|
102
|
-
bool
|
103
118
|
end
|
104
119
|
|
105
120
|
# Overrides String#replace setting the new_url @uri and String value.
|
@@ -114,33 +129,40 @@ module Wgit
|
|
114
129
|
|
115
130
|
# Returns true if self is a relative Url; false if absolute.
|
116
131
|
#
|
117
|
-
#
|
118
|
-
# 'http://', otherwise the
|
132
|
+
# An absolute URL must have a scheme prefix e.g.
|
133
|
+
# 'http://', otherwise the URL is regarded as being relative (regardless
|
119
134
|
# of whether it's valid or not). The only exception is if an opts arg is
|
120
135
|
# provided and self is a page belonging to that arg type e.g. host; then
|
121
136
|
# the link is relative.
|
122
137
|
#
|
138
|
+
# @example
|
139
|
+
# url = Wgit::Url.new('http://example.com/about')
|
140
|
+
#
|
141
|
+
# url.relative? # => false
|
142
|
+
# url.relative?(host: 'http://example.com') # => true
|
143
|
+
#
|
123
144
|
# @param opts [Hash] The options with which to check relativity. Only one
|
124
145
|
# opts param should be provided. The provided opts param Url must be
|
125
146
|
# absolute and be prefixed with a scheme. Consider using the output of
|
126
|
-
# Wgit::Url#
|
127
|
-
# @option opts [Wgit::Url, String] :
|
128
|
-
# http://www.google.com/how which gives a
|
129
|
-
# 'http://www.google.com'.
|
147
|
+
# Wgit::Url#to_origin which should work (unless it's nil).
|
148
|
+
# @option opts [Wgit::Url, String] :origin The Url origin e.g.
|
149
|
+
# http://www.google.com:81/how which gives a origin of
|
150
|
+
# 'http://www.google.com:81'.
|
130
151
|
# @option opts [Wgit::Url, String] :host The Url host e.g.
|
131
152
|
# http://www.google.com/how which gives a host of 'www.google.com'.
|
132
153
|
# @option opts [Wgit::Url, String] :domain The Url domain e.g.
|
133
154
|
# http://www.google.com/how which gives a domain of 'google.com'.
|
134
155
|
# @option opts [Wgit::Url, String] :brand The Url brand e.g.
|
135
156
|
# http://www.google.com/how which gives a domain of 'google'.
|
136
|
-
# @raise [StandardError] If self is invalid e.g. empty or an invalid opts
|
157
|
+
# @raise [StandardError] If self is invalid (e.g. empty) or an invalid opts
|
137
158
|
# param has been provided.
|
138
159
|
# @return [Boolean] True if relative, false if absolute.
|
139
160
|
def relative?(opts = {})
|
140
|
-
defaults = {
|
161
|
+
defaults = { origin: nil, host: nil, domain: nil, brand: nil }
|
141
162
|
opts = defaults.merge(opts)
|
142
163
|
raise 'Url (self) cannot be empty' if empty?
|
143
164
|
|
165
|
+
return false if scheme_relative?
|
144
166
|
return true if @uri.relative?
|
145
167
|
|
146
168
|
# Self is absolute but may be relative to the opts param e.g. host.
|
@@ -151,14 +173,14 @@ module Wgit
|
|
151
173
|
|
152
174
|
type, url = opts.first
|
153
175
|
url = Wgit::Url.new(url)
|
154
|
-
|
155
|
-
raise "Invalid opts param value,
|
156
|
-
protocol scheme: #{url}"
|
176
|
+
if url.invalid?
|
177
|
+
raise "Invalid opts param value, it must be absolute, containing a \
|
178
|
+
protocol scheme and domain (e.g. http://example.com): #{url}"
|
157
179
|
end
|
158
180
|
|
159
181
|
case type
|
160
|
-
when :
|
161
|
-
|
182
|
+
when :origin # http://www.google.com:81
|
183
|
+
to_origin == url.to_origin
|
162
184
|
when :host # www.google.com
|
163
185
|
to_host == url.to_host
|
164
186
|
when :domain # google.com
|
@@ -177,18 +199,20 @@ protocol scheme: #{url}"
|
|
177
199
|
@uri.absolute?
|
178
200
|
end
|
179
201
|
|
180
|
-
# Returns if self is a valid and absolute HTTP
|
202
|
+
# Returns if self is a valid and absolute HTTP URL or not. Self should
|
203
|
+
# always be crawlable if this method returns true.
|
181
204
|
#
|
182
|
-
# @return [Boolean] True if valid and
|
205
|
+
# @return [Boolean] True if valid, absolute and crawable, otherwise false.
|
183
206
|
def valid?
|
184
207
|
return false if relative?
|
185
|
-
return false unless
|
186
|
-
return false
|
208
|
+
return false unless to_origin && to_domain
|
209
|
+
return false unless URI::DEFAULT_PARSER.make_regexp.match(normalize)
|
187
210
|
|
188
211
|
true
|
189
212
|
end
|
190
213
|
|
191
|
-
# Returns if self is an invalid (relative) HTTP
|
214
|
+
# Returns if self is an invalid (e.g. relative) HTTP URL. See
|
215
|
+
# Wgit::Url#valid? for the inverse (and more information).
|
192
216
|
#
|
193
217
|
# @return [Boolean] True if invalid, otherwise false.
|
194
218
|
def invalid?
|
@@ -213,7 +237,8 @@ protocol scheme: #{url}"
|
|
213
237
|
Wgit::Url.new(concatted)
|
214
238
|
end
|
215
239
|
|
216
|
-
#
|
240
|
+
# Normalizes/escapes self and returns a new Wgit::Url. Self isn't modified.
|
241
|
+
# This should be used before GET'ing the url, in case it has IRI chars.
|
217
242
|
#
|
218
243
|
# @return [Wgit::Url] An escaped version of self.
|
219
244
|
def normalize
|
@@ -224,43 +249,46 @@ protocol scheme: #{url}"
|
|
224
249
|
# modify the receiver.
|
225
250
|
#
|
226
251
|
# If self is absolute then it's returned as is, making this method
|
227
|
-
# idempotent. The doc's
|
228
|
-
# doc.url is used as the base; which is concatted with self.
|
252
|
+
# idempotent. The doc's `<base>` element is used if present, otherwise
|
253
|
+
# `doc.url` is used as the base; which is concatted with self.
|
229
254
|
#
|
230
|
-
# Typically used to build an absolute link obtained from a document
|
255
|
+
# Typically used to build an absolute link obtained from a document.
|
231
256
|
#
|
257
|
+
# @example
|
232
258
|
# link = Wgit::Url.new('/favicon.png')
|
233
259
|
# doc = Wgit::Document.new('http://example.com')
|
234
260
|
#
|
235
|
-
# link.
|
261
|
+
# link.make_absolute(doc) # => "http://example.com/favicon.png"
|
236
262
|
#
|
237
263
|
# @param doc [Wgit::Document] The doc whose base Url is concatted with
|
238
264
|
# self.
|
239
265
|
# @raise [StandardError] If doc isn't a Wgit::Document or if `doc.base_url`
|
240
266
|
# raises an Exception.
|
241
267
|
# @return [Wgit::Url] Self in absolute form.
|
242
|
-
def
|
268
|
+
def make_absolute(doc)
|
243
269
|
assert_type(doc, Wgit::Document)
|
270
|
+
raise 'Cannot make absolute when Document @url is not valid' \
|
271
|
+
unless doc.url.valid?
|
272
|
+
|
273
|
+
return prefix_scheme(doc.url.to_scheme&.to_sym) if scheme_relative?
|
244
274
|
|
245
275
|
absolute? ? self : doc.base_url(link: self).concat(self)
|
246
276
|
end
|
247
277
|
|
248
|
-
# Returns self having prefixed a protocol
|
278
|
+
# Returns self having prefixed a scheme/protocol. Doesn't modify receiver.
|
249
279
|
# Returns self even if absolute (with scheme); therefore is idempotent.
|
250
280
|
#
|
251
|
-
# @param
|
252
|
-
# @return [Wgit::Url] Self with a
|
253
|
-
def prefix_scheme(
|
254
|
-
|
255
|
-
|
256
|
-
case protocol
|
257
|
-
when :http
|
258
|
-
Wgit::Url.new("http://#{url}")
|
259
|
-
when :https
|
260
|
-
Wgit::Url.new("https://#{url}")
|
261
|
-
else
|
262
|
-
raise "protocol must be :http or :https, not :#{protocol}"
|
281
|
+
# @param scheme [Symbol] Either :http or :https.
|
282
|
+
# @return [Wgit::Url] Self with a scheme prefix.
|
283
|
+
def prefix_scheme(scheme = :http)
|
284
|
+
unless %i[http https].include?(scheme)
|
285
|
+
raise "scheme must be :http or :https, not :#{scheme}"
|
263
286
|
end
|
287
|
+
|
288
|
+
return self if absolute? && !scheme_relative?
|
289
|
+
|
290
|
+
separator = scheme_relative? ? '' : '//'
|
291
|
+
Wgit::Url.new("#{scheme}:#{separator}#{self}")
|
264
292
|
end
|
265
293
|
|
266
294
|
# Returns a Hash containing this Url's instance vars excluding @uri.
|
@@ -268,8 +296,7 @@ protocol scheme: #{url}"
|
|
268
296
|
#
|
269
297
|
# @return [Hash] self's instance vars as a Hash.
|
270
298
|
def to_h
|
271
|
-
|
272
|
-
h = Wgit::Utils.to_h(self, ignore: ignore)
|
299
|
+
h = Wgit::Utils.to_h(self, ignore: ['@uri'])
|
273
300
|
Hash[h.to_a.insert(0, ['url', self])] # Insert url at position 0.
|
274
301
|
end
|
275
302
|
|
@@ -312,6 +339,20 @@ protocol scheme: #{url}"
|
|
312
339
|
host ? Wgit::Url.new(host) : nil
|
313
340
|
end
|
314
341
|
|
342
|
+
# Returns a new Wgit::Url containing just the port of this URL e.g.
|
343
|
+
# Given http://www.google.co.uk:443/about.html, '443' is returned.
|
344
|
+
#
|
345
|
+
# @return [Wgit::Url, nil] Containing just the port or nil.
|
346
|
+
def to_port
|
347
|
+
port = @uri.port
|
348
|
+
|
349
|
+
# @uri.port defaults port to 80/443 if missing, so we check for :#{port}.
|
350
|
+
return nil unless port
|
351
|
+
return nil unless include?(":#{port}")
|
352
|
+
|
353
|
+
Wgit::Url.new(port.to_s)
|
354
|
+
end
|
355
|
+
|
315
356
|
# Returns a new Wgit::Url containing just the domain of this URL e.g.
|
316
357
|
# Given http://www.google.co.uk/about.html, google.co.uk is returned.
|
317
358
|
#
|
@@ -321,6 +362,20 @@ protocol scheme: #{url}"
|
|
321
362
|
domain ? Wgit::Url.new(domain) : nil
|
322
363
|
end
|
323
364
|
|
365
|
+
# Returns a new Wgit::Url containing just the sub domain of this URL e.g.
|
366
|
+
# Given http://scripts.dev.google.com, scripts.dev is returned.
|
367
|
+
#
|
368
|
+
# @return [Wgit::Url, nil] Containing just the sub domain or nil.
|
369
|
+
def to_sub_domain
|
370
|
+
return nil unless to_host
|
371
|
+
|
372
|
+
dot_domain = ".#{to_domain}"
|
373
|
+
return nil unless include?(dot_domain)
|
374
|
+
|
375
|
+
sub_domain = to_host.sub(dot_domain, '')
|
376
|
+
Wgit::Url.new(sub_domain)
|
377
|
+
end
|
378
|
+
|
324
379
|
# Returns a new Wgit::Url containing just the brand of this URL e.g.
|
325
380
|
# Given http://www.google.co.uk/about.html, google is returned.
|
326
381
|
#
|
@@ -336,12 +391,24 @@ protocol scheme: #{url}"
|
|
336
391
|
# @return [Wgit::Url, nil] The base of self e.g. http://www.google.co.uk or
|
337
392
|
# nil.
|
338
393
|
def to_base
|
339
|
-
return nil
|
394
|
+
return nil unless @uri.scheme && @uri.host
|
340
395
|
|
341
396
|
base = "#{@uri.scheme}://#{@uri.host}"
|
342
397
|
Wgit::Url.new(base)
|
343
398
|
end
|
344
399
|
|
400
|
+
# Returns only the origin of this URL e.g. the protocol scheme, host and
|
401
|
+
# port combined. For http://localhost:3000/api, http://localhost:3000 gets
|
402
|
+
# returned. If there's no port present, then to_base is returned.
|
403
|
+
#
|
404
|
+
# @return [Wgit::Url, nil] The origin of self or nil.
|
405
|
+
def to_origin
|
406
|
+
return nil unless to_base
|
407
|
+
return to_base unless to_port
|
408
|
+
|
409
|
+
Wgit::Url.new("#{to_base}:#{to_port}")
|
410
|
+
end
|
411
|
+
|
345
412
|
# Returns the path of this URL e.g. the bit after the host without slashes.
|
346
413
|
# For example:
|
347
414
|
# Wgit::Url.new("http://www.google.co.uk/about.html/").to_path returns
|
@@ -370,7 +437,7 @@ protocol scheme: #{url}"
|
|
370
437
|
end
|
371
438
|
|
372
439
|
# Returns a new Wgit::Url containing just the query string of this URL
|
373
|
-
# e.g. Given http://google.com?q=
|
440
|
+
# e.g. Given http://google.com?q=foo&bar=1, 'q=ruby&bar=1' is returned.
|
374
441
|
#
|
375
442
|
# @return [Wgit::Url, nil] Containing just the query string or nil.
|
376
443
|
def to_query
|
@@ -378,6 +445,24 @@ protocol scheme: #{url}"
|
|
378
445
|
query ? Wgit::Url.new(query) : nil
|
379
446
|
end
|
380
447
|
|
448
|
+
# Returns a Hash containing just the query string parameters of this URL
|
449
|
+
# e.g. Given http://google.com?q=ruby, "{ 'q' => 'ruby' }" is returned.
|
450
|
+
#
|
451
|
+
# @param symbolize_keys [Boolean] The returned Hash keys will be Symbols if
|
452
|
+
# true, Strings otherwise.
|
453
|
+
# @return [Hash<String | Symbol, String>] Containing the query string
|
454
|
+
# params or empty if the URL doesn't contain any query parameters.
|
455
|
+
def to_query_hash(symbolize_keys: false)
|
456
|
+
query_str = to_query
|
457
|
+
return {} unless query_str
|
458
|
+
|
459
|
+
query_str.split('&').each_with_object({}) do |param, hash|
|
460
|
+
k, v = param.split('=')
|
461
|
+
k = k.to_sym if symbolize_keys
|
462
|
+
hash[k] = v
|
463
|
+
end
|
464
|
+
end
|
465
|
+
|
381
466
|
# Returns a new Wgit::Url containing just the fragment string of this URL
|
382
467
|
# e.g. Given http://google.com#about, #about is returned.
|
383
468
|
#
|
@@ -399,6 +484,24 @@ protocol scheme: #{url}"
|
|
399
484
|
segs.length > 1 ? Wgit::Url.new(segs.last) : nil
|
400
485
|
end
|
401
486
|
|
487
|
+
# Returns a new Wgit::Url containing just the username string of this URL
|
488
|
+
# e.g. Given http://me:pass1@example.com, me is returned.
|
489
|
+
#
|
490
|
+
# @return [Wgit::Url, nil] Containing just the user string or nil.
|
491
|
+
def to_user
|
492
|
+
user = @uri.user
|
493
|
+
user ? Wgit::Url.new(user) : nil
|
494
|
+
end
|
495
|
+
|
496
|
+
# Returns a new Wgit::Url containing just the password string of this URL
|
497
|
+
# e.g. Given http://me:pass1@example.com, pass1 is returned.
|
498
|
+
#
|
499
|
+
# @return [Wgit::Url, nil] Containing just the password string or nil.
|
500
|
+
def to_password
|
501
|
+
password = @uri.password
|
502
|
+
password ? Wgit::Url.new(password) : nil
|
503
|
+
end
|
504
|
+
|
402
505
|
# Omits the given URL components from self and returns a new Wgit::Url.
|
403
506
|
#
|
404
507
|
# Calls Addressable::URI#omit underneath and creates a new Wgit::Url from
|
@@ -442,7 +545,7 @@ protocol scheme: #{url}"
|
|
442
545
|
.omit_trailing_slash
|
443
546
|
end
|
444
547
|
|
445
|
-
# Returns a new Wgit::Url with the base (
|
548
|
+
# Returns a new Wgit::Url with the base (scheme and host) removed e.g. Given
|
446
549
|
# http://google.com/search?q=something#about, search?q=something#about is
|
447
550
|
# returned. If relative and base isn't present then self is returned.
|
448
551
|
# Leading and trailing slashes are always stripped from the return value.
|
@@ -457,6 +560,21 @@ protocol scheme: #{url}"
|
|
457
560
|
Wgit::Url.new(omit_base).omit_slashes
|
458
561
|
end
|
459
562
|
|
563
|
+
# Returns a new Wgit::Url with the origin (base + port) removed e.g. Given
|
564
|
+
# http://google.com:81/search?q=something#about, search?q=something#about is
|
565
|
+
# returned. If relative and base isn't present then self is returned.
|
566
|
+
# Leading and trailing slashes are always stripped from the return value.
|
567
|
+
#
|
568
|
+
# @return [Wgit::Url] Self containing everything after the origin.
|
569
|
+
def omit_origin
|
570
|
+
origin = to_origin
|
571
|
+
omit_origin = origin ? gsub(origin, '') : self
|
572
|
+
|
573
|
+
return self if ['', '/'].include?(omit_origin)
|
574
|
+
|
575
|
+
Wgit::Url.new(omit_origin).omit_slashes
|
576
|
+
end
|
577
|
+
|
460
578
|
# Returns a new Wgit::Url with the query string portion removed e.g. Given
|
461
579
|
# http://google.com/search?q=hello, http://google.com/search is
|
462
580
|
# returned. Self is returned as is if no query string is present. A URL
|
@@ -502,25 +620,47 @@ protocol scheme: #{url}"
|
|
502
620
|
start_with?('#')
|
503
621
|
end
|
504
622
|
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
alias
|
521
|
-
alias
|
522
|
-
alias
|
523
|
-
alias
|
524
|
-
alias
|
623
|
+
# Returns true if self equals '/' a.k.a. index.
|
624
|
+
#
|
625
|
+
# @return [Boolean] True if self equals '/', false otherwise.
|
626
|
+
def index?
|
627
|
+
self == '/'
|
628
|
+
end
|
629
|
+
|
630
|
+
# Returns true if self starts with '//' a.k.a a scheme/protocol relative
|
631
|
+
# path.
|
632
|
+
#
|
633
|
+
# @return [Boolean] True if self starts with '//', false otherwise.
|
634
|
+
def scheme_relative?
|
635
|
+
start_with?('//')
|
636
|
+
end
|
637
|
+
|
638
|
+
alias + concat
|
639
|
+
alias crawled? crawled
|
640
|
+
alias is_relative? relative?
|
641
|
+
alias is_absolute? absolute?
|
642
|
+
alias is_valid? valid?
|
643
|
+
alias is_query? query?
|
644
|
+
alias is_fragment? fragment?
|
645
|
+
alias is_index? index?
|
646
|
+
alias is_scheme_relative? scheme_relative?
|
647
|
+
alias uri to_uri
|
648
|
+
alias url to_url
|
649
|
+
alias scheme to_scheme
|
650
|
+
alias host to_host
|
651
|
+
alias port to_port
|
652
|
+
alias domain to_domain
|
653
|
+
alias brand to_brand
|
654
|
+
alias base to_base
|
655
|
+
alias origin to_origin
|
656
|
+
alias path to_path
|
657
|
+
alias endpoint to_endpoint
|
658
|
+
alias query to_query
|
659
|
+
alias query_hash to_query_hash
|
660
|
+
alias fragment to_fragment
|
661
|
+
alias extension to_extension
|
662
|
+
alias user to_user
|
663
|
+
alias password to_password
|
664
|
+
alias sub_domain to_sub_domain
|
525
665
|
end
|
526
666
|
end
|